package pl.edu.icm.cermine.pubmed;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import pl.edu.icm.cermine.PdfBxStructureExtractor;
import pl.edu.icm.cermine.evaluation.tools.CosineDistance;
import pl.edu.icm.cermine.evaluation.tools.SmithWatermanDistance;
import pl.edu.icm.cermine.evaluation.tools.StringTools;
import pl.edu.icm.cermine.evaluation.tools.XMLTools;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.metadata.zoneclassification.tools.ZoneLocaliser;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.structure.transformers.BxDocumentToTrueVizWriter;

/* loaded from: input_file:pl/edu/icm/cermine/pubmed/PubmedXMLGenerator.class */
public class PubmedXMLGenerator {
    private boolean verbose = false;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:pl/edu/icm/cermine/pubmed/PubmedXMLGenerator$LabelTrio.class */
    public static class LabelTrio {
        private BxZoneLabel label;
        private Double alignment;
        private List<String> entryTokens;

        public int hashCode() {
            return (31 * ((31 * 1) + (this.label == null ? 0 : this.label.hashCode()))) + (this.alignment == null ? 0 : this.alignment.hashCode());
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            LabelTrio labelTrio = (LabelTrio) obj;
            if (this.label != labelTrio.label) {
                return false;
            }
            return this.alignment == null ? labelTrio.alignment == null : this.alignment.equals(labelTrio.alignment);
        }

        public LabelTrio(BxZoneLabel bxZoneLabel, List<String> list, Double d) {
            this.alignment = d;
            this.label = bxZoneLabel;
            this.entryTokens = list;
        }
    }

    private void setVerbose(boolean z) {
        this.verbose = z;
    }

    private void printlnVerbose(String str) {
        if (this.verbose) {
            System.out.println(str);
        }
    }

    private void printVerbose(String str) {
        if (this.verbose) {
            System.out.print(str);
        }
    }

    public BxDocument generateTrueViz(InputStream inputStream, InputStream inputStream2) throws AnalysisException, ParserConfigurationException, SAXException, IOException, XPathExpressionException, TransformationException {
        Integer valueOf;
        Double valueOf2;
        Double valueOf3;
        String str;
        XPath newXPath = XPathFactory.newInstance().newXPath();
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document parse = newInstance.newDocumentBuilder().parse(inputStream2);
        BxDocument extractStructure = new PdfBxStructureExtractor().extractStructure(inputStream);
        Integer valueOf4 = Integer.valueOf(extractStructure.asZones().size());
        SmartHashMap smartHashMap = new SmartHashMap();
        String extractTextFromNode = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/abstract", parse, XPathConstants.NODE));
        smartHashMap.putIf("Abstract " + extractTextFromNode, BxZoneLabel.MET_ABSTRACT);
        smartHashMap.putIf("Abstract", BxZoneLabel.MET_ABSTRACT);
        String str2 = (String) newXPath.evaluate("/article/front/article-meta/title-group/article-title", parse, XPathConstants.STRING);
        smartHashMap.putIf(str2, BxZoneLabel.MET_TITLE);
        smartHashMap.putIf((String) newXPath.evaluate("/article/front/article-meta/title-group/article-subtitle", parse, XPathConstants.STRING), BxZoneLabel.MET_TITLE);
        String str3 = (String) newXPath.evaluate("/article/front/journal-meta/journal-title", parse, XPathConstants.STRING);
        if (str3 == null || str3.isEmpty()) {
            str3 = (String) newXPath.evaluate("/article/front/journal-meta/journal-title-group/journal-title", parse, XPathConstants.STRING);
        }
        smartHashMap.putIf(str3, BxZoneLabel.MET_BIB_INFO);
        String str4 = (String) newXPath.evaluate("/article/front/journal-meta/publisher/publisher-name", parse, XPathConstants.STRING);
        smartHashMap.putIf(str4, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf((String) newXPath.evaluate("/article/front/journal-meta/journal-id[@journal-id-type='publisher-id']", parse, XPathConstants.STRING), BxZoneLabel.MET_BIB_INFO);
        String str5 = (String) newXPath.evaluate("/article/front/journal-meta/issn", parse, XPathConstants.STRING);
        smartHashMap.putIf(str5, BxZoneLabel.MET_BIB_INFO);
        String extractTextFromNode2 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/permissions", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode2, BxZoneLabel.MET_COPYRIGHT);
        String extractTextFromNode3 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/license", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode3, BxZoneLabel.MET_COPYRIGHT);
        List<String> extractTextAsList = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/@article-type", parse, XPathConstants.NODESET));
        extractTextAsList.add(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/article-categories/subj-group", parse, XPathConstants.NODE)));
        smartHashMap.putIf(extractTextAsList, BxZoneLabel.MET_TYPE);
        List extractChildrenAsTextList = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/history/date[@date-type='received']", parse, XPathConstants.NODE));
        if (!extractChildrenAsTextList.isEmpty() && extractChildrenAsTextList.size() >= 3) {
            Iterator it = StringTools.produceDates(extractChildrenAsTextList).iterator();
            while (it.hasNext()) {
                smartHashMap.putIf((String) it.next(), BxZoneLabel.MET_DATES);
            }
        }
        List extractChildrenAsTextList2 = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/history/date[@date-type='accepted']", parse, XPathConstants.NODE));
        if (!extractChildrenAsTextList2.isEmpty() && extractChildrenAsTextList2.size() >= 3) {
            Iterator it2 = StringTools.produceDates(extractChildrenAsTextList2).iterator();
            while (it2.hasNext()) {
                smartHashMap.putIf((String) it2.next(), BxZoneLabel.MET_DATES);
            }
        }
        List extractChildrenAsTextList3 = ((NodeList) newXPath.evaluate("/article/front/article-meta/pub-date", parse, XPathConstants.NODESET)).getLength() > 1 ? XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='epub']", parse, XPathConstants.NODE)) : XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='collection']", parse, XPathConstants.NODE));
        if (extractChildrenAsTextList3 != null && extractChildrenAsTextList3.size() >= 3) {
            Iterator it3 = StringTools.produceDates(extractChildrenAsTextList3).iterator();
            while (it3.hasNext()) {
                smartHashMap.putIf((String) it3.next(), BxZoneLabel.MET_DATES);
            }
        }
        extractChildrenAsTextList3.clear();
        if (((NodeList) newXPath.evaluate("/article/front/article-meta/pub-date", parse, XPathConstants.NODESET)).getLength() > 1) {
            extractChildrenAsTextList3 = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='ppub']", parse, XPathConstants.NODE));
        }
        if (extractChildrenAsTextList3 != null && extractChildrenAsTextList3.size() >= 3) {
            Iterator it4 = StringTools.produceDates(extractChildrenAsTextList3).iterator();
            while (it4.hasNext()) {
                smartHashMap.putIf((String) it4.next(), BxZoneLabel.MET_DATES);
            }
        }
        String str6 = (String) newXPath.evaluate("/article/front/article-meta/ext-link[@ext-link-type='uri']/xlink:href", parse, XPathConstants.STRING);
        printlnVerbose(str6);
        smartHashMap.putIf(str6, BxZoneLabel.MET_ACCESS_DATA);
        String extractTextFromNode4 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/kwd-group", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode4, BxZoneLabel.MET_KEYWORDS);
        String str7 = (String) newXPath.evaluate("/article/front/article-meta/article-id[@pub-id-type='doi']", parse, XPathConstants.STRING);
        smartHashMap.putIf("DOI " + str7, BxZoneLabel.MET_BIB_INFO);
        String str8 = (String) newXPath.evaluate("/article/front/article-meta/volume", parse, XPathConstants.STRING);
        smartHashMap.putIf("volume " + str8, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("vol " + str8, BxZoneLabel.MET_BIB_INFO);
        String str9 = (String) newXPath.evaluate("/article/front/article-meta/issue", parse, XPathConstants.STRING);
        smartHashMap.putIf("number " + str9, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("journal", BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("et al", BxZoneLabel.MET_BIB_INFO);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        String str10 = (String) newXPath.evaluate("/article/front/article-meta/fpage", parse, XPathConstants.STRING);
        String str11 = (String) newXPath.evaluate("/article/front/article-meta/lpage", parse, XPathConstants.STRING);
        smartHashMap.putIf("pages " + str10 + " " + str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("pp " + str10 + " " + str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str10, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str11, BxZoneLabel.OTH_PAGE_NUMBER);
        smartHashMap.putIf(str11, BxZoneLabel.OTH_PAGE_NUMBER);
        try {
            int intValue = Integer.valueOf(str10).intValue();
            int intValue2 = Integer.valueOf(str11).intValue();
            while (intValue < intValue2) {
                intValue++;
                smartHashMap.putIf(String.valueOf(intValue), BxZoneLabel.OTH_PAGE_NUMBER);
            }
        } catch (NumberFormatException e) {
        }
        smartHashMap.putIf("page of", BxZoneLabel.OTH_PAGE_NUMBER);
        NodeList nodeList = (NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/contrib[@contrib-type='editor']", parse, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
            arrayList4.add(XMLTools.extractTextFromNode(nodeList.item(i)));
        }
        smartHashMap.putIf(StringTools.joinStrings(arrayList4), BxZoneLabel.MET_EDITOR);
        NodeList nodeList2 = (NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/contrib[@contrib-type='author']", parse, XPathConstants.NODESET);
        for (int i2 = 0; i2 < nodeList2.getLength(); i2++) {
            Node item = nodeList2.item(i2);
            String str12 = (String) newXPath.evaluate("name/given-names", item, XPathConstants.STRING);
            String str13 = (String) newXPath.evaluate("name/surname", item, XPathConstants.STRING);
            List extractTextAsList2 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/aff", parse, XPathConstants.NODESET));
            try {
                str = (String) newXPath.evaluate("address/email", item, XPathConstants.STRING);
            } catch (XPathExpressionException e2) {
                str = "";
            }
            if (str.isEmpty()) {
                try {
                    str = (String) newXPath.evaluate("email", item, XPathConstants.STRING);
                } catch (XPathExpressionException e3) {
                }
            }
            if (!str.isEmpty()) {
                arrayList2.add(str);
            }
            if (!extractTextAsList2.isEmpty()) {
                arrayList3.addAll(extractTextAsList2);
            }
            arrayList.add(str12 + " " + str13);
        }
        smartHashMap.putIf(StringTools.joinStrings(arrayList), BxZoneLabel.MET_AUTHOR);
        arrayList3.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/front/article-meta/aff", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(arrayList3, BxZoneLabel.MET_AFFILIATION);
        arrayList2.add(XMLTools.extractTextFromNodes((NodeList) newXPath.evaluate("/article/front/article-meta/author-notes/corresp", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(arrayList2, BxZoneLabel.MET_CORRESPONDENCE);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/author-notes/corresp/fn", parse, XPathConstants.NODE)), BxZoneLabel.MET_CORRESPONDENCE);
        String extractTextFromNode5 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/notes", parse, XPathConstants.NODE));
        List<String> extractTextAsList3 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//p", parse, XPathConstants.NODESET));
        smartHashMap.putIf(extractTextAsList3, BxZoneLabel.BODY_CONTENT);
        smartHashMap.putIf(XMLTools.extractTextFromNodes((NodeList) newXPath.evaluate("/article/back/app-group//p", parse, XPathConstants.NODESET)), BxZoneLabel.BODY_CONTENT);
        List<String> extractTextAsList4 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//title", parse, XPathConstants.NODESET));
        smartHashMap.putIf(extractTextAsList4, BxZoneLabel.BODY_CONTENT);
        smartHashMap.putIf(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back/app-group//title", parse, XPathConstants.NODESET)), BxZoneLabel.BODY_CONTENT);
        List<String> extractTextAsList5 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/floats-wrap//fig", parse, XPathConstants.NODESET));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/floats-group//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back/app-group//fig", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(extractTextAsList5, BxZoneLabel.BODY_FIGURE);
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        ArrayList arrayList7 = new ArrayList();
        NodeList nodeList3 = (NodeList) newXPath.evaluate("/article//table-wrap", parse, XPathConstants.NODESET);
        int i3 = 0;
        while (true) {
            Integer num = i3;
            if (num.intValue() >= nodeList3.getLength()) {
                break;
            }
            Node item2 = nodeList3.item(num.intValue());
            String str14 = (String) newXPath.evaluate("caption", item2, XPathConstants.STRING);
            arrayList5.add(str14);
            String extractTextFromNode6 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("table", item2, XPathConstants.NODE));
            arrayList6.add(extractTextFromNode6);
            List<String> extractTextAsList6 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("table-wrap-foot/fn", item2, XPathConstants.NODESET));
            arrayList7.addAll(extractTextAsList6);
            smartHashMap.putIf(str14, BxZoneLabel.BODY_TABLE);
            smartHashMap.putIf(extractTextFromNode6, BxZoneLabel.BODY_TABLE);
            smartHashMap.putIf(extractTextAsList6, BxZoneLabel.BODY_TABLE);
            i3 = Integer.valueOf(num.intValue() + 1);
        }
        String extractTextFromNode7 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article//fn[@fn-type='financial-disclosure']", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode7, BxZoneLabel.BODY_ACKNOWLEDGMENT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article//fn[@fn-type='conflict']", parse, XPathConstants.NODE)), BxZoneLabel.BODY_CONFLICT_STMT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/permissions/copyright-statement", parse, XPathConstants.NODE)), BxZoneLabel.MET_COPYRIGHT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/ack", parse, XPathConstants.NODE)), BxZoneLabel.BODY_ACKNOWLEDGMENT);
        String extractTextFromNode8 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/fn-group/fn", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode8, BxZoneLabel.BODY_CONFLICT_STMT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/glossary", parse, XPathConstants.NODE)), BxZoneLabel.BODY_GLOSSARY);
        NodeList nodeList4 = (NodeList) newXPath.evaluate("/article/body//disp-formula", parse, XPathConstants.NODESET);
        for (int i4 = 0; i4 < nodeList4.getLength(); i4++) {
            Node item3 = nodeList4.item(i4);
            smartHashMap.putIf(newXPath.evaluate("label", item3), BxZoneLabel.BODY_EQUATION);
            NodeList childNodes = item3.getChildNodes();
            ArrayList arrayList8 = new ArrayList();
            for (int i5 = 0; i5 < childNodes.getLength(); i5++) {
                Node item4 = childNodes.item(i5);
                if (!item4.getNodeName().equals("label")) {
                    arrayList8.add(XMLTools.extractTextFromNode(item4));
                }
            }
            smartHashMap.putIf(StringTools.joinStrings(arrayList8), BxZoneLabel.BODY_EQUATION);
        }
        ArrayList arrayList9 = new ArrayList();
        Node node = (Node) newXPath.evaluate("/article/back/ref-list", parse, XPathConstants.NODE);
        if (node != null) {
            int i6 = 0;
            while (true) {
                Integer num2 = i6;
                if (num2.intValue() >= node.getChildNodes().getLength()) {
                    break;
                }
                arrayList9.add(XMLTools.extractTextFromNode(node.getChildNodes().item(num2.intValue())));
                i6 = Integer.valueOf(num2.intValue() + 1);
            }
        }
        smartHashMap.putIf(StringTools.joinStrings(arrayList9), BxZoneLabel.REFERENCES);
        smartHashMap.put("references", BxZoneLabel.REFERENCES);
        HashSet hashSet = new HashSet();
        for (Map.Entry entry : smartHashMap.entrySet()) {
            if (BxZoneLabel.MET_BIB_INFO.equals(entry.getValue())) {
                hashSet.addAll(Arrays.asList(((String) entry.getKey()).split(" ")));
            }
        }
        smartHashMap.put(StringUtils.join(hashSet, " "), BxZoneLabel.MET_BIB_INFO);
        printlnVerbose("journalTitle: " + str3);
        printlnVerbose("journalPublisher: " + str4);
        printlnVerbose("journalISSNPublisher: " + str5);
        printlnVerbose("articleType: " + extractTextAsList);
        printlnVerbose("received: " + extractChildrenAsTextList);
        printlnVerbose("accepted: " + extractChildrenAsTextList2);
        printlnVerbose("pubdate: " + extractChildrenAsTextList3);
        printlnVerbose("permissions: " + extractTextFromNode2);
        printlnVerbose("license: " + extractTextFromNode3);
        printlnVerbose("title: " + str2);
        printlnVerbose("abstract: " + extractTextFromNode);
        printlnVerbose("authorEmails: " + arrayList2);
        printlnVerbose("authorNames: " + arrayList);
        printlnVerbose("authorAff: " + arrayList3);
        printlnVerbose("authorNotes: " + extractTextFromNode5);
        printlnVerbose("editor: " + arrayList4);
        printlnVerbose("keywords: " + extractTextFromNode4);
        printlnVerbose("DOI: " + str7);
        printlnVerbose("volume: " + str8);
        printlnVerbose("issue: " + str9);
        printlnVerbose("financial dis.: " + extractTextFromNode7);
        printlnVerbose("paragraphs: " + extractTextAsList3);
        printlnVerbose("section titles: " + extractTextAsList4);
        printlnVerbose("tableBodies: " + arrayList6);
        printlnVerbose("tableCaptions: " + arrayList5);
        printlnVerbose("tableFootnotes: " + arrayList7);
        printlnVerbose("figures: " + extractTextAsList5);
        printlnVerbose("acknowledgement: " + extractTextFromNode8);
        printlnVerbose("ref: " + arrayList9.size() + " " + arrayList9);
        SmithWatermanDistance smithWatermanDistance = new SmithWatermanDistance(0.1d, 0.1d);
        CosineDistance cosineDistance = new CosineDistance();
        ArrayList arrayList10 = new ArrayList(valueOf4.intValue());
        ArrayList arrayList11 = new ArrayList(valueOf4.intValue());
        int i7 = 0;
        while (true) {
            Integer num3 = i7;
            if (num3.intValue() >= valueOf4.intValue()) {
                break;
            }
            arrayList10.add(new ArrayList());
            arrayList11.add(new ArrayList());
            i7 = Integer.valueOf(num3.intValue() + 1);
        }
        for (Map.Entry entry2 : smartHashMap.entrySet()) {
            List list = StringTools.tokenize((String) entry2.getKey());
            printlnVerbose("--------------------");
            printlnVerbose(entry2.getValue() + " " + ((String) entry2.getKey()) + "\n");
            int i8 = 0;
            while (true) {
                Integer num4 = i8;
                if (num4.intValue() < valueOf4.intValue()) {
                    BxZone bxZone = (BxZone) extractStructure.asZones().get(num4.intValue());
                    List list2 = StringTools.tokenize(StringTools.removeOrphantSpaces(StringTools.cleanLigatures(bxZone.toText().toLowerCase())));
                    if (bxZone.toText().contains("www.biomedcentral.com")) {
                        valueOf2 = Double.valueOf(0.0d);
                        valueOf3 = Double.valueOf(0.0d);
                    } else {
                        valueOf2 = Double.valueOf(smithWatermanDistance.compare(list, list2));
                        valueOf3 = Double.valueOf(cosineDistance.compare(list, list2));
                    }
                    printlnVerbose(valueOf2 + " " + ((BxZone) extractStructure.asZones().get(num4.intValue())).toText() + "\n\n");
                    ((List) arrayList10.get(num4.intValue())).add(new LabelTrio((BxZoneLabel) entry2.getValue(), list, valueOf2));
                    ((List) arrayList11.get(num4.intValue())).add(new LabelTrio((BxZoneLabel) entry2.getValue(), list, valueOf3));
                    i8 = Integer.valueOf(num4.intValue() + 1);
                }
            }
        }
        printlnVerbose("===========================");
        for (BxPage bxPage : extractStructure.getPages()) {
            for (BxZone bxZone2 : bxPage.getZones()) {
                Integer valueOf5 = Integer.valueOf(extractStructure.asZones().indexOf(bxZone2));
                BxZone bxZone3 = (BxZone) extractStructure.asZones().get(valueOf5.intValue());
                List<String> list3 = StringTools.tokenize(StringTools.removeOrphantSpaces(bxZone3.toText().toLowerCase()));
                Boolean bool = false;
                Collections.sort((List) arrayList10.get(valueOf5.intValue()), new Comparator<LabelTrio>() { // from class: pl.edu.icm.cermine.pubmed.PubmedXMLGenerator.1
                    @Override // java.util.Comparator
                    public int compare(LabelTrio labelTrio, LabelTrio labelTrio2) {
                        Double valueOf6 = Double.valueOf((labelTrio.alignment.doubleValue() / labelTrio.entryTokens.size()) - (labelTrio2.alignment.doubleValue() / labelTrio2.entryTokens.size()));
                        return Math.abs(valueOf6.doubleValue()) < 1.0E-4d ? labelTrio2.entryTokens.size() - labelTrio.entryTokens.size() : valueOf6.doubleValue() > 0.0d ? 1 : -1;
                    }
                });
                Collections.reverse((List) arrayList10.get(valueOf5.intValue()));
                if (Math.max(list3.size(), ((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).entryTokens.size()) > 0 && Math.min(list3.size(), r0.size()) / Math.max(list3.size(), r0.size()) > 0.7d && ((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).alignment.doubleValue() / r0.size() > 0.7d) {
                    bxZone3.setLabel(((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).label);
                    bool = true;
                    printVerbose("0 ");
                }
                if (!bool.booleanValue()) {
                    Collections.sort((List) arrayList10.get(valueOf5.intValue()), new Comparator<LabelTrio>() { // from class: pl.edu.icm.cermine.pubmed.PubmedXMLGenerator.2
                        @Override // java.util.Comparator
                        public int compare(LabelTrio labelTrio, LabelTrio labelTrio2) {
                            Double valueOf6 = Double.valueOf(labelTrio.alignment.doubleValue() - labelTrio2.alignment.doubleValue());
                            return Math.abs(valueOf6.doubleValue()) < 1.0E-4d ? labelTrio2.entryTokens.size() - labelTrio.entryTokens.size() : valueOf6.doubleValue() > 0.0d ? 1 : -1;
                        }
                    });
                    Collections.reverse((List) arrayList10.get(valueOf5.intValue()));
                    printlnVerbose("-->" + (((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).alignment.doubleValue() / list3.size()));
                    if (((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).alignment.doubleValue() / list3.size() > 0.5d) {
                        bxZone3.setLabel(((LabelTrio) ((List) arrayList10.get(valueOf5.intValue())).get(0)).label);
                        bool = true;
                        printVerbose("1 ");
                    }
                }
                if (!bool.booleanValue()) {
                    EnumMap enumMap = new EnumMap(BxZoneLabel.class);
                    for (LabelTrio labelTrio : (List) arrayList10.get(valueOf5.intValue())) {
                        if (enumMap.containsKey(labelTrio.label)) {
                            enumMap.put((EnumMap) labelTrio.label, (BxZoneLabel) Double.valueOf(((Double) enumMap.get(labelTrio.label)).doubleValue() + (labelTrio.alignment.doubleValue() / Math.max(list3.size(), labelTrio.entryTokens.size()))));
                        } else {
                            enumMap.put((EnumMap) labelTrio.label, (BxZoneLabel) Double.valueOf(labelTrio.alignment.doubleValue() / Math.max(list3.size(), labelTrio.entryTokens.size())));
                        }
                    }
                    Double valueOf6 = Double.valueOf(Double.NEGATIVE_INFINITY);
                    BxZoneLabel bxZoneLabel = null;
                    for (Map.Entry entry3 : enumMap.entrySet()) {
                        if (((Double) entry3.getValue()).doubleValue() > valueOf6.doubleValue()) {
                            valueOf6 = (Double) entry3.getValue();
                            bxZoneLabel = (BxZoneLabel) entry3.getKey();
                        }
                    }
                    if (valueOf6.doubleValue() >= 0.5d) {
                        bxZone3.setLabel(bxZoneLabel);
                        printVerbose("2 ");
                        bool = true;
                    }
                }
                if (!bool.booleanValue()) {
                    Collections.sort((List) arrayList10.get(valueOf5.intValue()), new Comparator<LabelTrio>() { // from class: pl.edu.icm.cermine.pubmed.PubmedXMLGenerator.3
                        @Override // java.util.Comparator
                        public int compare(LabelTrio labelTrio2, LabelTrio labelTrio3) {
                            Double valueOf7 = Double.valueOf((labelTrio2.alignment.doubleValue() / labelTrio2.entryTokens.size()) - (labelTrio3.alignment.doubleValue() / labelTrio3.entryTokens.size()));
                            return Math.abs(valueOf7.doubleValue()) < 0.001d ? labelTrio3.entryTokens.size() - labelTrio2.entryTokens.size() : valueOf7.doubleValue() > 0.0d ? 1 : -1;
                        }
                    });
                    Collections.reverse((List) arrayList10.get(valueOf5.intValue()));
                    List<LabelTrio> list4 = (List) arrayList10.get(valueOf5.intValue());
                    BxZoneLabel bxZoneLabel2 = null;
                    int i9 = 0;
                    for (LabelTrio labelTrio2 : list4) {
                        int i10 = 0;
                        Iterator it5 = list3.iterator();
                        while (it5.hasNext()) {
                            if (labelTrio2.entryTokens.contains((String) it5.next())) {
                                i10++;
                            }
                        }
                        if (i10 > i9 && i10 > 1) {
                            bxZoneLabel2 = labelTrio2.label;
                            i9 = i10;
                        }
                    }
                    if (bxZoneLabel2 != null) {
                        bxZone3.setLabel(bxZoneLabel2);
                        bool = true;
                    } else {
                        for (LabelTrio labelTrio3 : list4) {
                            int i11 = 0;
                            for (String str15 : list3) {
                                Iterator it6 = labelTrio3.entryTokens.iterator();
                                while (true) {
                                    if (it6.hasNext()) {
                                        if (str15.replaceAll("[^0-9a-zA-Z,;\\.!\\?]", "").equals(((String) it6.next()).replaceAll("[^0-9a-zA-Z,;\\.!\\?]", ""))) {
                                            i11++;
                                            break;
                                        }
                                    }
                                }
                            }
                            if (i11 > i9 && i11 > 1) {
                                bxZoneLabel2 = labelTrio3.label;
                                i9 = i11;
                            }
                        }
                    }
                    if (bxZoneLabel2 != null) {
                        bxZone3.setLabel(bxZoneLabel2);
                        bool = true;
                    }
                }
                if (!bool.booleanValue()) {
                    bxZone3.setLabel((BxZoneLabel) null);
                }
                printlnVerbose(bxZone2.getLabel() + " " + bxZone2.toText() + "\n");
            }
            HashMap hashMap = new HashMap();
            HashSet hashSet2 = new HashSet();
            for (BxZone bxZone4 : bxPage.getZones()) {
                if (bxZone4.getLabel() == null) {
                    hashSet2.add(bxZone4);
                    hashMap.put(bxZone4, new ZoneLocaliser(bxZone4));
                }
            }
            do {
                valueOf = Integer.valueOf(hashSet2.size());
                infereLabels(hashSet2, hashMap);
                infereLabels(hashSet2, hashMap);
            } while (valueOf.intValue() != hashSet2.size());
        }
        printlnVerbose("=>=>=>=>=>=>=>=>=>=>=>=>=>=");
        return extractStructure;
    }

    private void infereLabels(Set<BxZone> set, Map<BxZone, ZoneLocaliser> map) {
        HashSet hashSet = new HashSet();
        for (BxZone bxZone : set) {
            if (bxZone.getLabel() == null) {
                ZoneLocaliser zoneLocaliser = map.get(bxZone);
                if (zoneLocaliser.getLeftZone() != null && zoneLocaliser.getRightZone() != null && zoneLocaliser.getLeftZone().getLabel() == zoneLocaliser.getRightZone().getLabel()) {
                    bxZone.setLabel(zoneLocaliser.getLeftZone().getLabel());
                    printVerbose("3 ");
                    hashSet.add(bxZone);
                } else if (zoneLocaliser.getLowerZone() != null && zoneLocaliser.getUpperZone() != null && zoneLocaliser.getLowerZone().getLabel() == zoneLocaliser.getUpperZone().getLabel()) {
                    bxZone.setLabel(zoneLocaliser.getLowerZone().getLabel());
                    printVerbose("3 ");
                    hashSet.add(bxZone);
                } else if (bxZone.hasNext() && bxZone.hasPrev() && ((BxZone) bxZone.getPrev()).getLabel() == ((BxZone) bxZone.getNext()).getLabel()) {
                    bxZone.setLabel(((BxZone) bxZone.getPrev()).getLabel());
                    printVerbose("3 ");
                    hashSet.add(bxZone);
                }
            }
        }
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            map.remove((BxZone) it.next());
        }
        set.removeAll(hashSet);
    }

    public static void main(String[] strArr) {
        if (strArr.length != 1) {
            System.err.println("Usage: <pubmed directory>");
            System.exit(1);
        }
        Iterator it = FileUtils.listFiles(new File(strArr[0]), new String[]{"pdf"}, true).iterator();
        while (it.hasNext()) {
            try {
                String path = ((File) it.next()).getPath();
                String nLMPath = StringTools.getNLMPath(path);
                if (!new File(StringTools.getTrueVizPath(nLMPath)).exists()) {
                    System.out.print(path + " ");
                    FileInputStream fileInputStream = new FileInputStream(path);
                    FileInputStream fileInputStream2 = new FileInputStream(nLMPath);
                    PubmedXMLGenerator pubmedXMLGenerator = new PubmedXMLGenerator();
                    pubmedXMLGenerator.setVerbose(false);
                    BxDocument generateTrueViz = pubmedXMLGenerator.generateTrueViz(fileInputStream, fileInputStream2);
                    int i = 0;
                    EnumSet noneOf = EnumSet.noneOf(BxZoneLabel.class);
                    int i2 = 0;
                    int i3 = 0;
                    for (BxZone bxZone : generateTrueViz.asZones()) {
                        i2++;
                        if (bxZone.getLabel() != null) {
                            i3++;
                            if (bxZone.getLabel().isOfCategoryOrGeneral(BxZoneLabelCategory.CAT_METADATA)) {
                                noneOf.add(bxZone.getLabel());
                            }
                            if (BxZoneLabel.REFERENCES.equals(bxZone.getLabel())) {
                                i = 1;
                            }
                        }
                    }
                    if (noneOf.contains(BxZoneLabel.MET_AFFILIATION)) {
                        i++;
                    }
                    if (noneOf.contains(BxZoneLabel.MET_AUTHOR)) {
                        i++;
                    }
                    if (noneOf.contains(BxZoneLabel.MET_BIB_INFO)) {
                        i++;
                    }
                    if (noneOf.contains(BxZoneLabel.MET_TITLE)) {
                        i++;
                    }
                    int i4 = i2 > 0 ? (i3 * 100) / i2 : 0;
                    System.out.print(i4 + " " + noneOf.size() + " " + i);
                    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(StringTools.getTrueVizPath(nLMPath).replace(".xml", "." + i4 + ".cxml")));
                    bufferedWriter.write(new BxDocumentToTrueVizWriter().write(generateTrueViz.getPages(), new Object[0]));
                    bufferedWriter.close();
                    System.out.println(" done");
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}
