package pl.edu.icm.yadda.analysis.textr.transformers;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import pl.edu.icm.yadda.analysis.textr.model.BxBounds;
import pl.edu.icm.yadda.analysis.textr.model.BxChunk;
import pl.edu.icm.yadda.analysis.textr.model.BxLine;
import pl.edu.icm.yadda.analysis.textr.model.BxPage;
import pl.edu.icm.yadda.analysis.textr.model.BxWord;
import pl.edu.icm.yadda.analysis.textr.model.BxZone;
import pl.edu.icm.yadda.analysis.textr.model.BxZoneLabel;
import pl.edu.icm.yadda.analysis.textr.tools.BxBoundsBuilder;
import pl.edu.icm.yadda.analysis.textr.tools.BxModelUtils;
import pl.edu.icm.yadda.metadata.transformers.IMetadataReader;
import pl.edu.icm.yadda.metadata.transformers.MetadataFormat;
import pl.edu.icm.yadda.metadata.transformers.MetadataModel;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-0.1.3.jar:pl/edu/icm/yadda/analysis/textr/transformers/TrueVizToBxDocumentReader.class */
public class TrueVizToBxDocumentReader implements IMetadataReader<BxPage> {
    private static final Logger log = LoggerFactory.getLogger(TrueVizToBxDocumentReader.class);

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public MetadataFormat getSourceFormat() {
        return TrueVizUtils.TRUEVIZ_FORMAT;
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public MetadataModel<BxPage> getTargetModel() {
        return BxDocumentTransformers.MODEL;
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public List<BxPage> read(String str, Object... objArr) throws TransformationException {
        return read(new StringReader(str), objArr);
    }

    @Override // pl.edu.icm.yadda.metadata.transformers.IMetadataReader
    public List<BxPage> read(Reader reader, Object... objArr) throws TransformationException {
        try {
            Document parse = TrueVizUtils.newDocumentBuilder().parse(new InputSource(reader));
            ArrayList arrayList = new ArrayList();
            if ("Page".equalsIgnoreCase(parse.getDocumentElement().getTagName())) {
                arrayList.add(parsePageNode(parse.getDocumentElement()));
            } else if (StandardStructureTypes.DOCUMENT.equalsIgnoreCase(parse.getDocumentElement().getTagName())) {
                Iterator<Element> it = getChildren("Page", parse.getDocumentElement()).iterator();
                while (it.hasNext()) {
                    arrayList.add(parsePageNode(it.next()));
                }
            }
            return arrayList;
        } catch (IOException e) {
            throw new TransformationException(e);
        } catch (ParserConfigurationException e2) {
            throw new TransformationException(e2);
        } catch (SAXException e3) {
            throw new TransformationException(e3);
        }
    }

    private ArrayList<Element> getChildren(String str, Element element) {
        ArrayList<Element> arrayList = new ArrayList<>();
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item instanceof Element) {
                Element element2 = (Element) item;
                if (element2.getTagName().equalsIgnoreCase(str)) {
                    arrayList.add(element2);
                }
            }
        }
        return arrayList;
    }

    private BxBounds parseElementContainingVertexes(Element element) {
        ArrayList<Element> children = getChildren("Vertex", element);
        BxBoundsBuilder bxBoundsBuilder = new BxBoundsBuilder();
        Iterator<Element> it = children.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            bxBoundsBuilder.expand(Double.parseDouble(next.getAttribute("x")), Double.parseDouble(next.getAttribute("y")));
        }
        return bxBoundsBuilder.getBounds();
    }

    private BxChunk parseCharacterElement(Element element) {
        BxBounds bxBounds = null;
        String str = null;
        if (!getChildren("CharacterCorners", element).isEmpty()) {
            bxBounds = parseElementContainingVertexes(getChildren("CharacterCorners", element).get(0));
        }
        if (!getChildren("GT_Text", element).isEmpty()) {
            str = getChildren("GT_Text", element).get(0).getAttribute("Value");
        }
        return new BxChunk(bxBounds, str);
    }

    private BxWord parseWordElement(Element element) {
        BxWord bxWord = new BxWord();
        if (!getChildren("WordCorners", element).isEmpty()) {
            bxWord.setBounds(parseElementContainingVertexes(getChildren("WordCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Character", element).iterator();
        while (it.hasNext()) {
            bxWord.addChunks(parseCharacterElement(it.next()));
        }
        return bxWord;
    }

    private BxLine parseLineElement(Element element) {
        BxLine bxLine = new BxLine();
        if (!getChildren("LineCorners", element).isEmpty()) {
            bxLine.setBounds(parseElementContainingVertexes(getChildren("LineCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Word", element).iterator();
        while (it.hasNext()) {
            bxLine.addWord(parseWordElement(it.next()));
        }
        return bxLine;
    }

    private BxZoneLabel parseClassification(Element element) {
        String attribute;
        ArrayList<Element> children = getChildren("Category", element);
        Element element2 = children.isEmpty() ? null : children.get(0);
        if (element2 == null) {
            ArrayList<Element> children2 = getChildren("Type", element);
            element2 = children2.isEmpty() ? null : children2.get(0);
        }
        if (element2 == null || (attribute = element2.getAttribute("Value")) == null) {
            return null;
        }
        try {
            return BxZoneLabel.valueOf(attribute.toUpperCase());
        } catch (IllegalArgumentException e) {
            return BxZoneLabel.UNKNOWN;
        }
    }

    private BxZone parseZoneNode(Element element) {
        BxZone bxZone = new BxZone();
        if (!getChildren("Classification", element).isEmpty()) {
            bxZone.setLabel(parseClassification(getChildren("Classification", element).get(0)));
        }
        if (!getChildren("ZoneCorners", element).isEmpty()) {
            bxZone.setBounds(parseElementContainingVertexes(getChildren("ZoneCorners", element).get(0)));
        }
        Iterator<Element> it = getChildren("Line", element).iterator();
        while (it.hasNext()) {
            bxZone.addLine(parseLineElement(it.next()));
        }
        return bxZone;
    }

    private BxPage parsePageNode(Element element) {
        BxPage bxPage = new BxPage();
        Iterator<Element> it = getChildren("Zone", element).iterator();
        while (it.hasNext()) {
            bxPage.addZone(parseZoneNode(it.next()));
        }
        BxBoundsBuilder.setBounds(bxPage);
        BxModelUtils.sortZonesYX(bxPage);
        return bxPage;
    }
}
