package pl.edu.icm.cermine.pubmed;

import com.google.common.collect.Lists;
import com.itextpdf.text.Annotation;
import com.itextpdf.text.html.HtmlTags;
import com.itextpdf.text.xml.xmp.PdfSchema;
import edu.umass.cs.mallet.base.fst.Transducer;
import edu.umass.cs.mallet.projects.seg_plus_coref.coreference.Citation;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import pl.edu.icm.cermine.content.cleaning.ContentCleaner;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.structure.model.BxPage;
import pl.edu.icm.cermine.structure.model.BxZone;
import pl.edu.icm.cermine.structure.model.BxZoneLabel;
import pl.edu.icm.cermine.structure.model.BxZoneLabelCategory;
import pl.edu.icm.cermine.structure.transformers.BxDocumentToTrueVizWriter;
import pl.edu.icm.cermine.structure.transformers.TrueVizToBxDocumentReader;
import pl.edu.icm.cermine.tools.SmartHashMap;
import pl.edu.icm.cermine.tools.TextUtils;
import pl.edu.icm.cermine.tools.distance.CosineDistance;
import pl.edu.icm.cermine.tools.distance.SmithWatermanDistance;

/* loaded from: input_file:pl/edu/icm/cermine/pubmed/RuleBasedPubmedXMLGenerator.class */
public class RuleBasedPubmedXMLGenerator {
    private boolean verbose = false;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:pl/edu/icm/cermine/pubmed/RuleBasedPubmedXMLGenerator$LabelTrio.class */
    public static class LabelTrio {
        private final BxZoneLabel label;
        private final Double alignment;
        private final List<String> entryTokens;

        public int hashCode() {
            return (31 * ((31 * 1) + (this.label == null ? 0 : this.label.hashCode()))) + (this.alignment == null ? 0 : this.alignment.hashCode());
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            LabelTrio labelTrio = (LabelTrio) obj;
            if (this.label != labelTrio.label) {
                return false;
            }
            return this.alignment == null ? labelTrio.alignment == null : this.alignment.equals(labelTrio.alignment);
        }

        public LabelTrio(BxZoneLabel bxZoneLabel, List<String> list, Double d) {
            this.alignment = d;
            this.label = bxZoneLabel;
            this.entryTokens = list;
        }
    }

    private void setVerbose(boolean z) {
        this.verbose = z;
    }

    private void printlnVerbose(String str) {
        if (this.verbose) {
            System.out.println(str);
        }
    }

    public BxDocument generateTrueViz(InputStream inputStream, InputStream inputStream2) throws AnalysisException, ParserConfigurationException, SAXException, IOException, XPathExpressionException, TransformationException {
        Double valueOf;
        Double valueOf2;
        String str;
        XPath newXPath = XPathFactory.newInstance().newXPath();
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document parse = newInstance.newDocumentBuilder().parse(inputStream2);
        BxDocument pages = new BxDocument().setPages(new TrueVizToBxDocumentReader().read(new InputStreamReader(inputStream, "UTF-8"), new Object[0]));
        ArrayList newArrayList = Lists.newArrayList(pages.asZones());
        Integer valueOf3 = Integer.valueOf(newArrayList.size());
        SmartHashMap smartHashMap = new SmartHashMap();
        String extractTextFromNode = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/abstract", parse, XPathConstants.NODE));
        smartHashMap.putIf("Abstract " + extractTextFromNode, BxZoneLabel.MET_ABSTRACT);
        smartHashMap.putIf("Abstract", BxZoneLabel.MET_ABSTRACT);
        String str2 = (String) newXPath.evaluate("/article/front/article-meta/title-group/article-title", parse, XPathConstants.STRING);
        smartHashMap.putIf(str2, BxZoneLabel.MET_TITLE);
        smartHashMap.putIf((String) newXPath.evaluate("/article/front/article-meta/title-group/article-subtitle", parse, XPathConstants.STRING), BxZoneLabel.MET_TITLE);
        String str3 = (String) newXPath.evaluate("/article/front/journal-meta/journal-title", parse, XPathConstants.STRING);
        if (str3 == null || str3.isEmpty()) {
            str3 = (String) newXPath.evaluate("/article/front/journal-meta/journal-title-group/journal-title", parse, XPathConstants.STRING);
        }
        smartHashMap.putIf(str3, BxZoneLabel.MET_BIB_INFO);
        String str4 = (String) newXPath.evaluate("/article/front/journal-meta/publisher/publisher-name", parse, XPathConstants.STRING);
        smartHashMap.putIf(str4, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf((String) newXPath.evaluate("/article/front/journal-meta/journal-id[@journal-id-type='publisher-id']", parse, XPathConstants.STRING), BxZoneLabel.MET_BIB_INFO);
        String str5 = (String) newXPath.evaluate("/article/front/journal-meta/issn", parse, XPathConstants.STRING);
        smartHashMap.putIf(str5, BxZoneLabel.MET_BIB_INFO);
        String extractTextFromNode2 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/permissions", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode2, BxZoneLabel.MET_COPYRIGHT);
        String extractTextFromNode3 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/license", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode3, BxZoneLabel.MET_COPYRIGHT);
        List<String> extractTextAsList = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/@article-type", parse, XPathConstants.NODESET));
        extractTextAsList.add(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/article-categories/subj-group", parse, XPathConstants.NODE)));
        smartHashMap.putIf(extractTextAsList, BxZoneLabel.MET_TYPE);
        List<String> extractChildrenAsTextList = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/history/date[@date-type='received']", parse, XPathConstants.NODE));
        if (!extractChildrenAsTextList.isEmpty() && extractChildrenAsTextList.size() >= 3) {
            Iterator<String> it = TextUtils.produceDates(extractChildrenAsTextList).iterator();
            while (it.hasNext()) {
                smartHashMap.putIf(it.next(), BxZoneLabel.MET_DATES);
            }
        }
        List<String> extractChildrenAsTextList2 = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/history/date[@date-type='accepted']", parse, XPathConstants.NODE));
        if (!extractChildrenAsTextList2.isEmpty() && extractChildrenAsTextList2.size() >= 3) {
            Iterator<String> it2 = TextUtils.produceDates(extractChildrenAsTextList2).iterator();
            while (it2.hasNext()) {
                smartHashMap.putIf(it2.next(), BxZoneLabel.MET_DATES);
            }
        }
        List<String> extractChildrenAsTextList3 = ((NodeList) newXPath.evaluate("/article/front/article-meta/pub-date", parse, XPathConstants.NODESET)).getLength() > 1 ? XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='epub']", parse, XPathConstants.NODE)) : XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='collection']", parse, XPathConstants.NODE));
        if (extractChildrenAsTextList3 != null && extractChildrenAsTextList3.size() >= 3) {
            Iterator<String> it3 = TextUtils.produceDates(extractChildrenAsTextList3).iterator();
            while (it3.hasNext()) {
                smartHashMap.putIf(it3.next(), BxZoneLabel.MET_DATES);
            }
        }
        if (extractChildrenAsTextList3 != null) {
            extractChildrenAsTextList3.clear();
        }
        if (((NodeList) newXPath.evaluate("/article/front/article-meta/pub-date", parse, XPathConstants.NODESET)).getLength() > 1) {
            extractChildrenAsTextList3 = XMLTools.extractChildrenAsTextList((Node) newXPath.evaluate("/article/front/article-meta/pub-date[@pub-type='ppub']", parse, XPathConstants.NODE));
        }
        if (extractChildrenAsTextList3 != null && extractChildrenAsTextList3.size() >= 3) {
            Iterator<String> it4 = TextUtils.produceDates(extractChildrenAsTextList3).iterator();
            while (it4.hasNext()) {
                smartHashMap.putIf(it4.next(), BxZoneLabel.MET_DATES);
            }
        }
        String str6 = (String) newXPath.evaluate("/article/front/article-meta/ext-link[@ext-link-type='uri']/xlink:href", parse, XPathConstants.STRING);
        printlnVerbose(str6);
        smartHashMap.putIf(str6, BxZoneLabel.MET_ACCESS_DATA);
        String extractTextFromNode4 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/kwd-group", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode4, BxZoneLabel.MET_KEYWORDS);
        String str7 = (String) newXPath.evaluate("/article/front/article-meta/article-id[@pub-id-type='doi']", parse, XPathConstants.STRING);
        smartHashMap.putIf("DOI " + str7, BxZoneLabel.MET_BIB_INFO);
        String str8 = (String) newXPath.evaluate("/article/front/article-meta/volume", parse, XPathConstants.STRING);
        smartHashMap.putIf("volume " + str8, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("vol " + str8, BxZoneLabel.MET_BIB_INFO);
        String str9 = (String) newXPath.evaluate("/article/front/article-meta/issue", parse, XPathConstants.STRING);
        smartHashMap.putIf("number " + str9, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(Citation.journal, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("et al", BxZoneLabel.MET_BIB_INFO);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        String str10 = (String) newXPath.evaluate("/article/front/article-meta/fpage", parse, XPathConstants.STRING);
        String str11 = (String) newXPath.evaluate("/article/front/article-meta/lpage", parse, XPathConstants.STRING);
        smartHashMap.putIf("pages " + str10 + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf("pp " + str10 + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str10, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str11, BxZoneLabel.MET_BIB_INFO);
        smartHashMap.putIf(str11, BxZoneLabel.OTH_PAGE_NUMBER);
        smartHashMap.putIf(str11, BxZoneLabel.OTH_PAGE_NUMBER);
        try {
            int parseInt = Integer.parseInt(str10);
            int parseInt2 = Integer.parseInt(str11);
            while (parseInt < parseInt2) {
                parseInt++;
                smartHashMap.putIf(String.valueOf(parseInt), BxZoneLabel.OTH_PAGE_NUMBER);
            }
        } catch (NumberFormatException e) {
        }
        smartHashMap.putIf("page of", BxZoneLabel.OTH_PAGE_NUMBER);
        NodeList nodeList = (NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/contrib[@contrib-type='editor']", parse, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
            arrayList4.add(XMLTools.extractTextFromNode(nodeList.item(i)));
        }
        smartHashMap.putIf(TextUtils.joinStrings(arrayList4), BxZoneLabel.MET_EDITOR);
        NodeList nodeList2 = (NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/contrib[@contrib-type='author']", parse, XPathConstants.NODESET);
        for (int i2 = 0; i2 < nodeList2.getLength(); i2++) {
            Node item = nodeList2.item(i2);
            String str12 = (String) newXPath.evaluate("name/given-names", item, XPathConstants.STRING);
            String str13 = (String) newXPath.evaluate("name/surname", item, XPathConstants.STRING);
            List<String> extractTextAsList2 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/front/article-meta/contrib-group/aff", parse, XPathConstants.NODESET));
            try {
                str = (String) newXPath.evaluate("address/email", item, XPathConstants.STRING);
            } catch (XPathExpressionException e2) {
                str = "";
            }
            if (str.isEmpty()) {
                try {
                    str = (String) newXPath.evaluate("email", item, XPathConstants.STRING);
                } catch (XPathExpressionException e3) {
                }
            }
            if (!str.isEmpty()) {
                arrayList2.add(str);
            }
            if (!extractTextAsList2.isEmpty()) {
                arrayList3.addAll(extractTextAsList2);
            }
            arrayList.add(str12 + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + str13);
        }
        smartHashMap.putIf(TextUtils.joinStrings(arrayList), BxZoneLabel.MET_AUTHOR);
        arrayList3.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/front/article-meta/aff", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(arrayList3, BxZoneLabel.MET_AFFILIATION);
        arrayList2.add(XMLTools.extractTextFromNodes((NodeList) newXPath.evaluate("/article/front/article-meta/author-notes/corresp", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(arrayList2, BxZoneLabel.MET_CORRESPONDENCE);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/author-notes/corresp/fn", parse, XPathConstants.NODE)), BxZoneLabel.MET_CORRESPONDENCE);
        String extractTextFromNode5 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/notes", parse, XPathConstants.NODE));
        List<String> extractTextAsList3 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//p", parse, XPathConstants.NODESET));
        smartHashMap.putIf(extractTextAsList3, BxZoneLabel.BODY_CONTENT);
        smartHashMap.putIf(XMLTools.extractTextFromNodes((NodeList) newXPath.evaluate("/article/back/app-group//p", parse, XPathConstants.NODESET)), BxZoneLabel.BODY_CONTENT);
        List<String> extractTextAsList4 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//title", parse, XPathConstants.NODESET));
        smartHashMap.putIf(extractTextAsList4, BxZoneLabel.BODY_CONTENT);
        smartHashMap.putIf(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back/app-group//title", parse, XPathConstants.NODESET)), BxZoneLabel.BODY_CONTENT);
        List<String> extractTextAsList5 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/floats-wrap//fig", parse, XPathConstants.NODESET));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/floats-group//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/body//fig", parse, XPathConstants.NODESET)));
        extractTextAsList5.addAll(XMLTools.extractTextAsList((NodeList) newXPath.evaluate("/article/back/app-group//fig", parse, XPathConstants.NODESET)));
        smartHashMap.putIf(extractTextAsList5, BxZoneLabel.BODY_FIGURE);
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        ArrayList arrayList7 = new ArrayList();
        NodeList nodeList3 = (NodeList) newXPath.evaluate("/article//table-wrap", parse, XPathConstants.NODESET);
        int i3 = 0;
        while (true) {
            Integer num = i3;
            if (num.intValue() >= nodeList3.getLength()) {
                break;
            }
            Node item2 = nodeList3.item(num.intValue());
            String str14 = (String) newXPath.evaluate("caption", item2, XPathConstants.STRING);
            arrayList5.add(str14);
            String extractTextFromNode6 = XMLTools.extractTextFromNode((Node) newXPath.evaluate(HtmlTags.TABLE, item2, XPathConstants.NODE));
            arrayList6.add(extractTextFromNode6);
            List<String> extractTextAsList6 = XMLTools.extractTextAsList((NodeList) newXPath.evaluate("table-wrap-foot/fn", item2, XPathConstants.NODESET));
            arrayList7.addAll(extractTextAsList6);
            smartHashMap.putIf(str14, BxZoneLabel.BODY_TABLE);
            smartHashMap.putIf(extractTextFromNode6, BxZoneLabel.BODY_TABLE);
            smartHashMap.putIf(extractTextAsList6, BxZoneLabel.BODY_TABLE);
            i3 = Integer.valueOf(num.intValue() + 1);
        }
        String extractTextFromNode7 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article//fn[@fn-type='financial-disclosure']", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode7, BxZoneLabel.BODY_ACKNOWLEDGMENT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article//fn[@fn-type='conflict']", parse, XPathConstants.NODE)), BxZoneLabel.BODY_CONFLICT_STMT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/front/article-meta/permissions/copyright-statement", parse, XPathConstants.NODE)), BxZoneLabel.MET_COPYRIGHT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/ack", parse, XPathConstants.NODE)), BxZoneLabel.BODY_ACKNOWLEDGMENT);
        String extractTextFromNode8 = XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/fn-group/fn", parse, XPathConstants.NODE));
        smartHashMap.putIf(extractTextFromNode8, BxZoneLabel.BODY_CONFLICT_STMT);
        smartHashMap.putIf(XMLTools.extractTextFromNode((Node) newXPath.evaluate("/article/back/glossary", parse, XPathConstants.NODE)), BxZoneLabel.BODY_GLOSSARY);
        NodeList nodeList4 = (NodeList) newXPath.evaluate("/article/body//disp-formula", parse, XPathConstants.NODESET);
        for (int i4 = 0; i4 < nodeList4.getLength(); i4++) {
            Node item3 = nodeList4.item(i4);
            smartHashMap.putIf(newXPath.evaluate("label", item3), BxZoneLabel.BODY_EQUATION);
            NodeList childNodes = item3.getChildNodes();
            ArrayList arrayList8 = new ArrayList();
            for (int i5 = 0; i5 < childNodes.getLength(); i5++) {
                Node item4 = childNodes.item(i5);
                if (!item4.getNodeName().equals("label")) {
                    arrayList8.add(XMLTools.extractTextFromNode(item4));
                }
            }
            smartHashMap.putIf(TextUtils.joinStrings(arrayList8), BxZoneLabel.BODY_EQUATION);
        }
        ArrayList arrayList9 = new ArrayList();
        Node node = (Node) newXPath.evaluate("/article/back/ref-list", parse, XPathConstants.NODE);
        if (node != null) {
            int i6 = 0;
            while (true) {
                Integer num2 = i6;
                if (num2.intValue() >= node.getChildNodes().getLength()) {
                    break;
                }
                arrayList9.add(XMLTools.extractTextFromNode(node.getChildNodes().item(num2.intValue())));
                i6 = Integer.valueOf(num2.intValue() + 1);
            }
        }
        smartHashMap.putIf(TextUtils.joinStrings(arrayList9), BxZoneLabel.REFERENCES);
        smartHashMap.put("references", BxZoneLabel.REFERENCES);
        HashSet hashSet = new HashSet();
        for (Map.Entry entry : smartHashMap.entrySet()) {
            if (BxZoneLabel.MET_BIB_INFO.equals(entry.getValue())) {
                hashSet.addAll(Arrays.asList(((String) entry.getKey()).split(HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR)));
            }
        }
        smartHashMap.put(StringUtils.join(hashSet, HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR), BxZoneLabel.MET_BIB_INFO);
        printlnVerbose("journalTitle: " + str3);
        printlnVerbose("journalPublisher: " + str4);
        printlnVerbose("journalISSNPublisher: " + str5);
        printlnVerbose("articleType: " + extractTextAsList);
        printlnVerbose("received: " + extractChildrenAsTextList);
        printlnVerbose("accepted: " + extractChildrenAsTextList2);
        printlnVerbose("pubdate: " + extractChildrenAsTextList3);
        printlnVerbose("permissions: " + extractTextFromNode2);
        printlnVerbose("license: " + extractTextFromNode3);
        printlnVerbose("title: " + str2);
        printlnVerbose("abstract: " + extractTextFromNode);
        printlnVerbose("authorEmails: " + arrayList2);
        printlnVerbose("authorNames: " + arrayList);
        printlnVerbose("authorAff: " + arrayList3);
        printlnVerbose("authorNotes: " + extractTextFromNode5);
        printlnVerbose("editor: " + arrayList4);
        printlnVerbose("keywords: " + extractTextFromNode4);
        printlnVerbose("DOI: " + str7);
        printlnVerbose("volume: " + str8);
        printlnVerbose("issue: " + str9);
        printlnVerbose("financial dis.: " + extractTextFromNode7);
        printlnVerbose("paragraphs: " + extractTextAsList3);
        printlnVerbose("section titles: " + extractTextAsList4);
        printlnVerbose("tableBodies: " + arrayList6);
        printlnVerbose("tableCaptions: " + arrayList5);
        printlnVerbose("tableFootnotes: " + arrayList7);
        printlnVerbose("figures: " + extractTextAsList5);
        printlnVerbose("acknowledgement: " + extractTextFromNode8);
        printlnVerbose("ref: " + arrayList9.size() + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + arrayList9);
        SmithWatermanDistance smithWatermanDistance = new SmithWatermanDistance(0.1d, 0.1d);
        CosineDistance cosineDistance = new CosineDistance();
        ArrayList arrayList10 = new ArrayList(valueOf3.intValue());
        ArrayList arrayList11 = new ArrayList(valueOf3.intValue());
        int i7 = 0;
        while (true) {
            Integer num3 = i7;
            if (num3.intValue() >= valueOf3.intValue()) {
                break;
            }
            arrayList10.add(new ArrayList());
            arrayList11.add(new ArrayList());
            i7 = Integer.valueOf(num3.intValue() + 1);
        }
        for (Map.Entry entry2 : smartHashMap.entrySet()) {
            List<String> list = TextUtils.tokenize((String) entry2.getKey());
            printlnVerbose("--------------------");
            printlnVerbose(entry2.getValue() + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + ((String) entry2.getKey()) + "\n");
            int i8 = 0;
            while (true) {
                Integer num4 = i8;
                if (num4.intValue() < valueOf3.intValue()) {
                    BxZone bxZone = (BxZone) newArrayList.get(num4.intValue());
                    List<String> list2 = TextUtils.tokenize(TextUtils.removeOrphantSpaces(TextUtils.cleanLigatures(bxZone.toText().toLowerCase(Locale.ENGLISH))));
                    if (bxZone.toText().contains("www.biomedcentral.com")) {
                        valueOf = Double.valueOf(Transducer.ZERO_COST);
                        valueOf2 = Double.valueOf(Transducer.ZERO_COST);
                    } else {
                        valueOf = Double.valueOf(smithWatermanDistance.compare(list, list2));
                        valueOf2 = Double.valueOf(cosineDistance.compare(list, list2));
                    }
                    printlnVerbose(valueOf + HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR + ((BxZone) newArrayList.get(num4.intValue())).toText() + "\n\n");
                    ((List) arrayList10.get(num4.intValue())).add(new LabelTrio((BxZoneLabel) entry2.getValue(), list, valueOf));
                    ((List) arrayList11.get(num4.intValue())).add(new LabelTrio((BxZoneLabel) entry2.getValue(), list, valueOf2));
                    i8 = Integer.valueOf(num4.intValue() + 1);
                }
            }
        }
        Iterator<BxPage> it5 = pages.iterator();
        while (it5.hasNext()) {
            BxPage next = it5.next();
            boolean z = true;
            while (z) {
                z = false;
                boolean z2 = false;
                Iterator<BxZone> it6 = next.iterator();
                while (it6.hasNext()) {
                    BxZone next2 = it6.next();
                    BxZoneLabel label = next2.getLabel();
                    double d = 0.0d;
                    double d2 = 0.0d;
                    for (LabelTrio labelTrio : (List) arrayList10.get(newArrayList.indexOf(next2))) {
                        if (labelTrio.label.equals(BxZoneLabel.MET_TITLE)) {
                            d = labelTrio.alignment.doubleValue() / labelTrio.entryTokens.size();
                        }
                        if (labelTrio.label.equals(BxZoneLabel.MET_AUTHOR)) {
                            d2 = labelTrio.alignment.doubleValue() / labelTrio.entryTokens.size();
                        }
                    }
                    String lowerCase = ContentCleaner.cleanAllAndBreaks(next2.toText()).toLowerCase(Locale.ENGLISH);
                    int childrenCount = next2.childrenCount();
                    int indexOf = Lists.newArrayList(pages).indexOf(next2.getParent());
                    BxLine firstChild = next2.getFirstChild();
                    if (indexOf == 0 && ((next2.getLabel().equals(BxZoneLabel.MET_TITLE) || next2.getLabel().equals(BxZoneLabel.BODY_CONTENT)) && d >= 0.7d && d2 >= 0.4d)) {
                        next2.setLabel(BxZoneLabel.MET_TITLE_AUTHOR);
                    }
                    if (childrenCount == 2 && lowerCase.contains(Annotation.PAGE) && lowerCase.contains("of") && lowerCase.contains("page number not for")) {
                        next2.setLabel(BxZoneLabel.OTH_PAGE_NUMBER);
                    }
                    if (childrenCount == 1 && (lowerCase.contains("page number not for") || (lowerCase.contains(Annotation.PAGE) && lowerCase.contains("of")))) {
                        next2.setLabel(BxZoneLabel.OTH_PAGE_NUMBER);
                    }
                    if (indexOf == 0 && !next2.getLabel().isOfCategory(BxZoneLabelCategory.CAT_METADATA) && childrenCount < 11 && (lowerCase.contains("department") || lowerCase.contains("university"))) {
                        next2.setLabel(BxZoneLabel.MET_AFFILIATION);
                    }
                    if (indexOf > 0 && next2.getLabel().equals(BxZoneLabel.MET_COPYRIGHT)) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if (childrenCount < 5 && firstChild.toText().length() < 11 && firstChild.toText().startsWith("Figure") && next2.getLabel().equals(BxZoneLabel.BODY_CONTENT)) {
                        next2.setLabel(BxZoneLabel.BODY_FIGURE);
                    }
                    if (indexOf > 0 && next2.getLabel().equals(BxZoneLabel.MET_TITLE)) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if (indexOf > 0 && next2.hasPrev() && next2.hasNext() && ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN) || next2.getLabel().equals(BxZoneLabel.MET_DATES) || next2.getLabel().equals(BxZoneLabel.BODY_ACKNOWLEDGMENT)) && ((next2.getPrev().getLabel().equals(BxZoneLabel.BODY_TABLE) || next2.getNext().getLabel().equals(BxZoneLabel.BODY_TABLE)) && next2.getWidth() < 100.0d))) {
                        if (next2.getPrev().getLabel().equals(BxZoneLabel.BODY_TABLE) && next2.getNext().getLabel().equals(BxZoneLabel.BODY_TABLE)) {
                            next2.setLabel(BxZoneLabel.BODY_TABLE);
                        }
                        if (next2.getPrev().getLabel().equals(BxZoneLabel.BODY_TABLE)) {
                            double x = next2.getPrev().getX() + (next2.getPrev().getWidth() / 2.0d);
                            double y = next2.getPrev().getY() + (next2.getPrev().getHeight() / 2.0d);
                            double x2 = next2.getX() + (next2.getWidth() / 2.0d);
                            double y2 = next2.getY() + (next2.getHeight() / 2.0d);
                            if (Math.abs(x - x2) < 200.0d && Math.abs(y - y2) < 200.0d) {
                                next2.setLabel(BxZoneLabel.BODY_TABLE);
                            }
                        }
                        if (next2.getNext().getLabel().equals(BxZoneLabel.BODY_TABLE)) {
                            double x3 = next2.getNext().getX() + (next2.getNext().getWidth() / 2.0d);
                            double y3 = next2.getNext().getY() + (next2.getNext().getHeight() / 2.0d);
                            double x4 = next2.getX() + (next2.getWidth() / 2.0d);
                            double y4 = next2.getY() + (next2.getHeight() / 2.0d);
                            if (Math.abs(x3 - x4) < 200.0d && Math.abs(y3 - y4) < 200.0d) {
                                next2.setLabel(BxZoneLabel.BODY_TABLE);
                            }
                        }
                    }
                    if (indexOf > 1 && (next2.getLabel().equals(BxZoneLabel.MET_AFFILIATION) || next2.getLabel().equals(BxZoneLabel.MET_ABSTRACT))) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if (indexOf == 0 && childrenCount < 10 && (lowerCase.startsWith("citation:") || lowerCase.contains(" volume ") || lowerCase.contains("vol\\. ") || lowerCase.contains("doi"))) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if (indexOf == 0 && (lowerCase.startsWith("editor:") || lowerCase.startsWith("academic editor:"))) {
                        next2.setLabel(BxZoneLabel.MET_EDITOR);
                    }
                    if (indexOf == 0 && lowerCase.startsWith("copyright:")) {
                        next2.setLabel(BxZoneLabel.MET_COPYRIGHT);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.MET_DATES) && lowerCase.contains(Citation.volume) && lowerCase.contains("issue")) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.MET_AUTHOR) || next2.getLabel().equals(BxZoneLabel.REFERENCES) || next2.getLabel().equals(BxZoneLabel.MET_DATES)) && childrenCount < 6 && (next2.getY() < 100.0d || next2.getParent().getHeight() - next2.getY() < 100.0d)) {
                        BxPage parent = next2.getParent();
                        if (indexOf > 0) {
                            Iterator<BxZone> it7 = parent.getPrev().iterator();
                            while (it7.hasNext()) {
                                BxZone next3 = it7.next();
                                if (next3.toText().replaceAll("[^a-zA-Z]", "").equals(next2.toText().replaceAll("[^a-zA-Z]", "")) && Math.abs(next3.getY() - next2.getY()) < 10.0d) {
                                    next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                                }
                            }
                        }
                        if (indexOf < pages.childrenCount() - 1) {
                            Iterator<BxZone> it8 = parent.getNext().iterator();
                            while (it8.hasNext()) {
                                BxZone next4 = it8.next();
                                if (next4.toText().replaceAll("[^a-zA-Z]", "").equals(next2.toText().replaceAll("[^a-zA-Z]", "")) && Math.abs(next4.getY() - next2.getY()) < 10.0d) {
                                    next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                                }
                            }
                        }
                        if (indexOf > 1) {
                            Iterator<BxZone> it9 = parent.getPrev().getPrev().iterator();
                            while (it9.hasNext()) {
                                BxZone next5 = it9.next();
                                if (next5.toText().replaceAll("[^a-zA-Z]", "").equals(next2.toText().replaceAll("[^a-zA-Z]", "")) && Math.abs(next5.getY() - next2.getY()) < 10.0d) {
                                    next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                                }
                            }
                        }
                        if (indexOf < pages.childrenCount() - 2) {
                            Iterator<BxZone> it10 = parent.getNext().getNext().iterator();
                            while (it10.hasNext()) {
                                BxZone next6 = it10.next();
                                if (next6.toText().replaceAll("[^a-zA-Z]", "").equals(next2.toText().replaceAll("[^a-zA-Z]", "")) && Math.abs(next6.getY() - next2.getY()) < 10.0d) {
                                    next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                                }
                            }
                        }
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN) || next2.getLabel().equals(BxZoneLabel.MET_BIB_INFO) || next2.getLabel().equals(BxZoneLabel.REFERENCES)) && lowerCase.matches("d?[0-9]+") && lowerCase.length() <= 4 && (next2.getY() < 100.0d || next2.getParent().getHeight() - next2.getY() < 100.0d)) {
                        next2.setLabel(BxZoneLabel.OTH_PAGE_NUMBER);
                    }
                    if (lowerCase.equals("acknowledgments")) {
                        next2.setLabel(BxZoneLabel.BODY_ACKNOWLEDGMENT);
                    }
                    if (lowerCase.startsWith("introduction") && next2.hasPrev() && !next2.getPrev().toText().equalsIgnoreCase(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE)) {
                        z2 = true;
                    }
                    if (z2 && next2.getLabel().equals(BxZoneLabel.MET_ABSTRACT)) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if (indexOf == 0 && next2.getLabel().equals(BxZoneLabel.REFERENCES) && !lowerCase.equals("references") && (!next2.hasPrev() || !next2.getPrev().toText().toLowerCase(Locale.ENGLISH).equals("references"))) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.REFERENCES) && childrenCount < 10 && !lowerCase.matches(".*[1-2][09][0-9][0-9].*") && next2.hasNext() && next2.hasPrev() && next2.getPrev().getLabel().equals(BxZoneLabel.BODY_CONTENT) && next2.getNext().getLabel().equals(BxZoneLabel.BODY_CONTENT)) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.MET_ABSTRACT) && next2.hasPrev() && next2.getPrev().getLabel().equals(BxZoneLabel.MET_ABSTRACT) && next2.getX() + 10.0d < next2.getPrev().getX() && next2.getWidth() * 2.0d < next.getWidth()) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.MET_ABSTRACT) && next2.hasPrev() && next2.getPrev().getLabel().equals(BxZoneLabel.BODY_CONTENT) && !lowerCase.startsWith(BeanDefinitionParserDelegate.ABSTRACT_ATTRIBUTE) && next2.getWidth() * 2.0d < next.getWidth()) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && next2.hasPrev() && next2.getPrev().getLabel().equals(BxZoneLabel.REFERENCES) && (lowerCase.matches("[1-9][0-9]?[0-9]?\\.?") || lowerCase.matches(".*[1-2][0-9][0-9][0-9].*"))) {
                        next2.setLabel(BxZoneLabel.REFERENCES);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.REFERENCES) || next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && (lowerCase.startsWith("doi") || lowerCase.startsWith("cite this article"))) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && firstChild.toText().equalsIgnoreCase("author details")) {
                        next2.setLabel(BxZoneLabel.MET_AFFILIATION);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && (firstChild.toText().toLowerCase(Locale.ENGLISH).equals("acknowledgments") || firstChild.toText().toLowerCase(Locale.ENGLISH).equals("acknowledgements"))) {
                        next2.setLabel(BxZoneLabel.BODY_ACKNOWLEDGMENT);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.MET_TITLE) && next2.getY() * 2.0d > next.getHeight()) {
                        next2.setLabel(BxZoneLabel.BODY_CONTENT);
                    }
                    if ((next2.getY() < 100.0d || next2.getParent().getHeight() - next2.getY() < 100.0d) && lowerCase.matches("sup-[0-9][0-9]?")) {
                        next2.setLabel(BxZoneLabel.OTH_PAGE_NUMBER);
                    }
                    if ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && firstChild.toText().equalsIgnoreCase("references")) {
                        next2.setLabel(BxZoneLabel.REFERENCES);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) && (firstChild.toText().matches("F[iI][gG][uU][rR][eE] [0-9IV][0-9IV]?[0-9IV]?[\\.:] [A-Z].*") || firstChild.toText().matches("F[iI][gG]\\. [0-9IV][0-9IV]?[0-9IV]?[\\.:] [A-Z].*") || firstChild.toText().matches("F[iI][gG][uU][rR][eE] [0-9IV][0-9IV]?[0-9IV]?\\.") || firstChild.toText().matches("F[iI][gG]\\. [0-9IV][0-9IV]?[0-9IV]?\\.") || firstChild.toText().matches("F[iI][gG][uU][rR][eE] [0-9IV][0-9IV]?[0-9IV]?") || firstChild.toText().matches("F[iI][gG]\\. [0-9IV][0-9IV]?[0-9IV]?"))) {
                        next2.setLabel(BxZoneLabel.BODY_FIGURE);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) && (firstChild.toText().matches("T[aA][bB][lL][eE] [0-9IV][0-9IV]?[0-9IV]?[\\.:] [A-Z].*") || firstChild.toText().matches("T[aA][bB][lL][eE] [0-9IV][0-9IV]?[0-9IV]?\\.?"))) {
                        next2.setLabel(BxZoneLabel.BODY_TABLE);
                    }
                    if (next2.getLabel().equals(BxZoneLabel.BODY_ACKNOWLEDGMENT) && lowerCase.contains("this article is distributed")) {
                        next2.setLabel(BxZoneLabel.MET_COPYRIGHT);
                    }
                    if (indexOf == 0 && !next2.getLabel().isOfCategory(BxZoneLabelCategory.CAT_METADATA) && lowerCase.contains(Citation.journal)) {
                        next2.setLabel(BxZoneLabel.MET_BIB_INFO);
                    }
                    if (indexOf == 0 && !next2.getLabel().isOfCategory(BxZoneLabelCategory.CAT_METADATA) && lowerCase.contains("correspondence")) {
                        next2.setLabel(BxZoneLabel.MET_CORRESPONDENCE);
                    }
                    if (indexOf == 0 && ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && lowerCase.contains("accepted") && lowerCase.contains("published"))) {
                        next2.setLabel(BxZoneLabel.MET_DATES);
                    }
                    if (indexOf == 0 && childrenCount < 10 && ((next2.getLabel().equals(BxZoneLabel.BODY_CONTENT) || next2.getLabel().equals(BxZoneLabel.OTH_UNKNOWN)) && next2.hasPrev() && (next2.getY() - next2.getHeight()) - next2.getPrev().getY() < 4.0d && Math.abs(firstChild.getHeight() - next2.getPrev().getFirstChild().getHeight()) < 0.5d && !next2.getPrev().getLabel().equals(BxZoneLabel.MET_KEYWORDS))) {
                        next2.setLabel(next2.getPrev().getLabel());
                    }
                    if (indexOf == pages.childrenCount() - 1 && (lowerCase.startsWith("publish with") || lowerCase.contains("will be the most significant development") || lowerCase.contains("disseminating the results of biomedical") || lowerCase.contains("sir paul nurse") || lowerCase.contains("your research papers") || lowerCase.contains("available free of charge") || lowerCase.contains("peer reviewed and published") || lowerCase.contains("cited in pubmed and archived") || lowerCase.contains("you keep the copyright") || lowerCase.contains("submit your manuscript") || lowerCase.contains("submit your next manuscript") || lowerCase.contains("online submission") || lowerCase.contains("peer review") || lowerCase.contains("space constraints") || lowerCase.contains("publication on acceptance") || lowerCase.contains("inclusion in pubmed") || lowerCase.contains("freely available") || lowerCase.contains("publication history"))) {
                        next2.setLabel(BxZoneLabel.OTH_UNKNOWN);
                    }
                    if (lowerCase.startsWith("funding:") || firstChild.toText().equals("Funding")) {
                        next2.setLabel(BxZoneLabel.BODY_ACKNOWLEDGMENT);
                    }
                    if (lowerCase.startsWith("conflicts of interest") || lowerCase.startsWith("conflict of interest") || lowerCase.startsWith("competing interests") || (next2.hasPrev() && (next2.getPrev().toText().toLowerCase(Locale.ENGLISH).equals("conflicts of interest") || next2.getPrev().toText().toLowerCase(Locale.ENGLISH).equals("conflict of interest") || next2.getPrev().toText().toLowerCase(Locale.ENGLISH).equals("competing interests")))) {
                        next2.setLabel(BxZoneLabel.BODY_CONFLICT_STMT);
                    }
                    z = z || !label.equals(next2.getLabel());
                }
                boolean z3 = false;
                Iterator<BxZone> it11 = next.iterator();
                while (it11.hasNext()) {
                    BxZone next7 = it11.next();
                    BxZoneLabel label2 = next7.getLabel();
                    String lowerCase2 = ContentCleaner.cleanAllAndBreaks(next7.toText()).toLowerCase(Locale.ENGLISH);
                    if (BxZoneLabel.MET_AUTHOR.equals(next7.getLabel()) && z3 && ((lowerCase2.contains("email") && lowerCase2.contains("@")) || lowerCase2.startsWith("correspondence"))) {
                        next7.setLabel(BxZoneLabel.MET_CORRESPONDENCE);
                    }
                    if (BxZoneLabel.MET_AUTHOR.equals(next7.getLabel()) || BxZoneLabel.MET_TITLE_AUTHOR.equals(next7.getLabel())) {
                        z3 = true;
                    }
                    z = z || !label2.equals(next7.getLabel());
                }
            }
        }
        return pages;
    }

    public static void main(String[] strArr) throws FileNotFoundException, AnalysisException, ParserConfigurationException, SAXException, IOException, XPathExpressionException, TransformationException {
        if (strArr.length != 1) {
            System.err.println("Usage: <pubmed directory>");
            System.exit(1);
        }
        Collection<File> listFiles = FileUtils.listFiles(new File(strArr[0]), new String[]{PdfSchema.DEFAULT_XPATH_ID}, true);
        int i = 0;
        Iterator<File> it = listFiles.iterator();
        while (it.hasNext()) {
            String path = it.next().getPath();
            String nLMPath = TextUtils.getNLMPath(path);
            String replaceFirst = path.replaceFirst("\\.pdf", ".cxml");
            String replaceFirst2 = path.replaceFirst("\\.pdf", ".cxml-corr");
            if (new File(replaceFirst2).exists()) {
                i++;
            } else {
                System.out.println(path);
                FileInputStream fileInputStream = new FileInputStream(nLMPath);
                FileInputStream fileInputStream2 = new FileInputStream(replaceFirst);
                RuleBasedPubmedXMLGenerator ruleBasedPubmedXMLGenerator = new RuleBasedPubmedXMLGenerator();
                ruleBasedPubmedXMLGenerator.setVerbose(false);
                BxDocument generateTrueViz = ruleBasedPubmedXMLGenerator.generateTrueViz(fileInputStream2, fileInputStream);
                i++;
                EnumSet noneOf = EnumSet.noneOf(BxZoneLabel.class);
                for (BxZone bxZone : generateTrueViz.asZones()) {
                    if (bxZone.getLabel() != null && bxZone.getLabel().isOfCategoryOrGeneral(BxZoneLabelCategory.CAT_METADATA)) {
                        noneOf.add(bxZone.getLabel());
                    }
                }
                OutputStreamWriter outputStreamWriter = null;
                try {
                    outputStreamWriter = new OutputStreamWriter(new FileOutputStream(replaceFirst2), "UTF-8");
                    outputStreamWriter.write(new BxDocumentToTrueVizWriter().write(Lists.newArrayList(generateTrueViz), new Object[0]));
                    if (outputStreamWriter != null) {
                        outputStreamWriter.close();
                    }
                    System.out.println("Progress: " + i + " out of " + listFiles.size() + " (" + ((i * 100.0d) / listFiles.size()) + "%)");
                } catch (Throwable th) {
                    if (outputStreamWriter != null) {
                        outputStreamWriter.close();
                    }
                    throw th;
                }
            }
        }
    }
}
