package pl.edu.icm.yadda.analysis.textr.transformers;

import edu.umass.cs.mallet.base.fst.Transducer;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import javax.activation.UnsupportedDataTypeException;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.HTMLLayout;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import pl.edu.icm.yadda.analysis.textr.model.BxBounds;
import pl.edu.icm.yadda.analysis.textr.model.BxChunk;
import pl.edu.icm.yadda.analysis.textr.model.BxDocument;
import pl.edu.icm.yadda.analysis.textr.model.BxLine;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxWord;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabel;
import pl.edu.icm.yadda.metadata.transformers.IMetadataReader;
import pl.edu.icm.yadda.metadata.transformers.MetadataFormat;
import pl.edu.icm.yadda.metadata.transformers.MetadataModel;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.7.3.jar:pl/edu/icm/yadda/analysis/textr/transformers/MargToTextrImporter.class */
public class MargToTextrImporter implements IMetadataReader<BxPage> {
    private static final Logger log = LoggerFactory.getLogger(MargToTextrImporter.class);
    public static int STANDART_DOCUMENT_HEIGHT = 3299;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.7.3.jar:pl/edu/icm/yadda/analysis/textr/transformers/MargToTextrImporter$ComparablePair.class */
    public static class ComparablePair<X extends Comparable, Y extends Comparable> implements Comparable {
        X o1;
        Y o2;

        public X getO1() {
            return this.o1;
        }

        public void setO1(X x) {
            this.o1 = x;
        }

        public Y getO2() {
            return this.o2;
        }

        public void setO2(Y y) {
            this.o2 = y;
        }

        public ComparablePair(X x, Y y) {
            this.o1 = x;
            this.o2 = y;
        }

        public boolean equals(Object obj) {
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            ComparablePair comparablePair = (ComparablePair) obj;
            if (this.o1 != comparablePair.o1 && (this.o1 == null || !this.o1.equals(comparablePair.o1))) {
                return false;
            }
            if (this.o2 != comparablePair.o2) {
                return this.o2 != null && this.o2.equals(comparablePair.o2);
            }
            return true;
        }

        public int hashCode() {
            return (47 * ((47 * 7) + (this.o1 != null ? this.o1.hashCode() : 0))) + (this.o2 != null ? this.o2.hashCode() : 0);
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            if (obj == null) {
                throw new NullPointerException();
            }
            if (getClass() != obj.getClass()) {
                throw new UnsupportedOperationException("different classes");
            }
            ComparablePair comparablePair = (ComparablePair) obj;
            if (this.o1 != comparablePair.o1 && (this.o1 == null || !this.o1.equals(comparablePair.o1))) {
                if (this.o1 == null) {
                    return -1;
                }
                return this.o1.compareTo(comparablePair.o1);
            }
            if (this.o2 == comparablePair.o2) {
                return 0;
            }
            if (this.o2 != null && this.o2.equals(comparablePair.o2)) {
                return 0;
            }
            if (this.o2 == null) {
                return -1;
            }
            return this.o2.compareTo(comparablePair.o2);
        }
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public MetadataFormat getSourceFormat() {
        return TrueVizUtils.MARG_FORMAT;
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public MetadataModel<BxPage> getTargetModel() {
        return BxDocumentTransformers.MODEL;
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public List<BxPage> read(String str, Object... objArr) throws TransformationException {
        return read(new StringReader(str), objArr);
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public List<BxPage> read(Reader reader, Object... objArr) throws TransformationException {
        ArrayList arrayList = new ArrayList();
        try {
            arrayList.add(importSource(new InputSource(reader)));
            return arrayList;
        } catch (IOException e) {
            throw new TransformationException(e);
        } catch (ParserConfigurationException e2) {
            throw new TransformationException(e2);
        } catch (SAXException e3) {
            throw new TransformationException(e3);
        }
    }

    private ArrayList<Element> getChildren(String str, Element element) {
        ArrayList<Element> arrayList = new ArrayList<>();
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item instanceof Element) {
                Element element2 = (Element) item;
                if (element2.getTagName().equalsIgnoreCase(str)) {
                    arrayList.add(element2);
                }
            }
        }
        return arrayList;
    }

    private BxBounds parseElementContainingVertexes(Element element) {
        ArrayList<Element> children = getChildren("Vertex", element);
        if (children.isEmpty()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        int i = Integer.MAX_VALUE;
        int i2 = Integer.MIN_VALUE;
        int i3 = Integer.MAX_VALUE;
        int i4 = Integer.MIN_VALUE;
        Iterator<Element> it = children.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            int parseInt = Integer.parseInt(next.getAttribute("x"));
            if (parseInt < i) {
                i = parseInt;
            }
            if (parseInt > i2) {
                i2 = parseInt;
            }
            int parseInt2 = Integer.parseInt(next.getAttribute("y"));
            if (parseInt2 < i3) {
                i3 = parseInt2;
            }
            if (parseInt2 > i4) {
                i4 = parseInt2;
            }
            arrayList.add(new ComparablePair(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
        }
        Collections.sort(arrayList);
        BxBounds bxBounds = new BxBounds(i, i3, i2 - i, i4 - i3);
        if (bxBounds.getHeight() == Transducer.ZERO_COST || bxBounds.getWidth() == Transducer.ZERO_COST) {
            log.warn("problems with height or width points are:");
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                ComparablePair comparablePair = (ComparablePair) it2.next();
                log.warn("\t" + comparablePair.o1 + " , " + comparablePair.o2);
            }
        }
        return bxBounds;
    }

    private BxChunk parseCharacterElement(Element element) {
        BxBounds bxBounds = null;
        String str = null;
        if (!getChildren("CharacterCorners", element).isEmpty()) {
            bxBounds = parseElementContainingVertexes(getChildren("CharacterCorners", element).get(0));
        }
        if (!getChildren("GT_Text", element).isEmpty()) {
            str = getChildren("GT_Text", element).get(0).getAttribute("Value");
        }
        return new BxChunk(bxBounds, str);
    }

    private BxWord parseWordElement(Element element) {
        BxWord bxWord = new BxWord();
        if (!getChildren("WordCorners", element).isEmpty()) {
            bxWord.setBounds(parseElementContainingVertexes(getChildren("WordCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Character", element).iterator();
        while (it.hasNext()) {
            bxWord.addChunks(parseCharacterElement(it.next()));
        }
        return bxWord;
    }

    private BxLine parseLineElement(Element element) {
        BxLine bxLine = new BxLine();
        if (!getChildren("LineCorners", element).isEmpty()) {
            bxLine.setBounds(parseElementContainingVertexes(getChildren("LineCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Word", element).iterator();
        while (it.hasNext()) {
            bxLine.addWord(parseWordElement(it.next()));
        }
        return bxLine;
    }

    private BxZoneLabel parseClassification(Element element) {
        String attribute;
        ArrayList<Element> children = getChildren("Category", element);
        Element element2 = children.isEmpty() ? null : children.get(0);
        if (element2 == null) {
            ArrayList<Element> children2 = getChildren("Type", element);
            element2 = children2.isEmpty() ? null : children2.get(0);
        }
        if (element2 == null || (attribute = element2.getAttribute("Value")) == null) {
            return null;
        }
        return attribute.equalsIgnoreCase("Abstract") ? BxZoneLabel.ABSTRACT : attribute.equalsIgnoreCase("Affiliation") ? BxZoneLabel.AFFILIATION : attribute.equalsIgnoreCase("Author") ? BxZoneLabel.AUTHOR : attribute.equalsIgnoreCase("Footer") ? BxZoneLabel.FOOTER : attribute.equalsIgnoreCase("Header") ? BxZoneLabel.HEADER : attribute.equalsIgnoreCase(HTMLLayout.TITLE_OPTION) ? BxZoneLabel.TITLE : BxZoneLabel.UNKNOWN;
    }

    private BxZone parseZoneNode(Element element) {
        BxZone bxZone = new BxZone();
        if (!getChildren("Classification", element).isEmpty()) {
            bxZone.setLabel(parseClassification(getChildren("Classification", element).get(0)));
        }
        if (!getChildren("ZoneCorners", element).isEmpty()) {
            bxZone.setBounds(parseElementContainingVertexes(getChildren("ZoneCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Line", element).iterator();
        while (it.hasNext()) {
            bxZone.addLine(parseLineElement(it.next()));
        }
        return bxZone;
    }

    private BxPage parsePageNode(Element element) {
        BxPage bxPage = new BxPage();
        double d = 0.0d;
        double d2 = 0.0d;
        double d3 = 0.0d;
        double d4 = 0.0d;
        boolean z = false;
        Iterator<Element> it = getChildren("Zone", element).iterator();
        while (it.hasNext()) {
            BxZone parseZoneNode = parseZoneNode(it.next());
            bxPage.addZone(parseZoneNode);
            BxBounds bounds = parseZoneNode.getBounds();
            if (!z) {
                d = bounds.getX();
                d2 = bounds.getY();
                d3 = bounds.getX() + bounds.getWidth();
                d4 = bounds.getY() + bounds.getHeight();
                z = true;
            }
            if (bounds.getX() < d) {
                d = bounds.getX();
            }
            if (bounds.getX() + bounds.getWidth() > d3) {
                d3 = bounds.getX() + bounds.getWidth();
            }
            if (bounds.getY() < d2) {
                d2 = bounds.getY();
            }
            if (bounds.getY() + bounds.getHeight() > d4) {
                d4 = bounds.getY() + bounds.getHeight();
            }
        }
        Collections.sort(bxPage.getZones(), new Comparator() { // from class: pl.edu.icm.yadda.analysis.textr.transformers.MargToTextrImporter.1
            @Override // java.util.Comparator
            public int compare(Object obj, Object obj2) {
                BxZone bxZone = (BxZone) obj;
                BxZone bxZone2 = (BxZone) obj2;
                int compare = Double.compare(bxZone.getBounds().getY(), bxZone2.getBounds().getY());
                if (compare == 0) {
                    compare = Double.compare(bxZone.getBounds().getX(), bxZone2.getBounds().getX());
                }
                return compare;
            }
        });
        return bxPage.setBounds(new BxBounds(d, d2, d3 - d, d4 - d2));
    }

    private BxDocument parseDocumentNode(Element element) {
        return null;
    }

    private BxPage importSource(InputSource inputSource) throws IOException, ParserConfigurationException, SAXException {
        Document parse = TrueVizUtils.newDocumentBuilder().parse(inputSource);
        if ("Page".equalsIgnoreCase(parse.getDocumentElement().getTagName())) {
            return parsePageNode(parse.getDocumentElement());
        }
        if (StandardStructureTypes.DOCUMENT.equalsIgnoreCase(parse.getDocumentElement().getTagName())) {
        }
        throw new UnsupportedDataTypeException("There were no example of this type contact kura for more info");
    }
}
