package pl.edu.icm.cermine.evaluation;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.lang.StringUtils;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import pl.edu.icm.cermine.evaluation.tools.EvaluationUtils;
import pl.edu.icm.cermine.evaluation.tools.MetadataList;
import pl.edu.icm.cermine.evaluation.tools.MetadataRelation;
import pl.edu.icm.cermine.evaluation.tools.MetadataSingle;
import pl.edu.icm.cermine.evaluation.tools.NlmIterator;
import pl.edu.icm.cermine.evaluation.tools.NlmPair;
import pl.edu.icm.cermine.evaluation.tools.PrecisionRecall;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.tools.XMLTools;

/* loaded from: input_file:pl/edu/icm/cermine/evaluation/BwmetaGrobidFinalMetadataExtractionEvaluation.class */
public final class BwmetaGrobidFinalMetadataExtractionEvaluation {
    public void evaluate(int i, NlmIterator nlmIterator) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        DocumentBuilder newDocumentBuilder = newInstance.newDocumentBuilder();
        SAXBuilder sAXBuilder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
        sAXBuilder.setValidation(false);
        sAXBuilder.setFeature("http://xml.org/sax/features/validation", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        ArrayList arrayList7 = new ArrayList();
        ArrayList arrayList8 = new ArrayList();
        ArrayList arrayList9 = new ArrayList();
        ArrayList arrayList10 = new ArrayList();
        ArrayList arrayList11 = new ArrayList();
        ArrayList arrayList12 = new ArrayList();
        ArrayList arrayList13 = new ArrayList();
        ArrayList arrayList14 = new ArrayList();
        ArrayList arrayList15 = new ArrayList();
        if (i == 1) {
            System.out.println("path,gro_title,gro_abstract,gro_keywords,gro_authors,gro_affs,gro_autaff,gro_email,gro_autemail,gro_journal,gro_volume,gro_issue,gro_pages,gro_year,gro_doi,gro_refs,one");
        }
        int i2 = 0;
        Iterator<NlmPair> it = nlmIterator.iterator();
        while (it.hasNext()) {
            NlmPair next = it.next();
            i2++;
            if (i == 0) {
                System.out.println("");
                System.out.println(">>>>>>>>> " + i2);
                System.out.println(next.getExtractedNlm().getPath());
            }
            if (i == 1) {
                System.out.print(next.getOriginalNlm().getPath() + ",");
            }
            try {
                Document parse = newDocumentBuilder.parse(new FileInputStream(next.getOriginalNlm()));
                Document parse2 = newDocumentBuilder.parse(new FileInputStream(next.getExtractedNlm()));
                MetadataSingle metadataSingle = new MetadataSingle(parse, "/bwmeta/element/name[not(@type)]", parse2, "//teiHeader//titleStmt/title");
                metadataSingle.setComp(EvaluationUtils.swComparator);
                arrayList.add(metadataSingle);
                metadataSingle.print(i, "title");
                MetadataSingle metadataSingle2 = new MetadataSingle(parse, "/bwmeta/element/description[@type='abstract']", parse2, "//teiHeader//abstract/p");
                metadataSingle2.setComp(EvaluationUtils.swComparator);
                arrayList7.add(metadataSingle2);
                metadataSingle2.print(i, "abstract");
                MetadataList metadataList = new MetadataList(parse, "/bwmeta/element/tags[@type='keyword']/tag", parse2, "//teiHeader//keywords//term");
                arrayList8.add(metadataList);
                metadataList.print(i, "keywords");
                List<Node> extractNodes = XMLTools.extractNodes(parse, "/bwmeta/element/contributor[@role='author']");
                ArrayList arrayList16 = new ArrayList();
                Iterator it2 = extractNodes.iterator();
                while (it2.hasNext()) {
                    List extractChildrenNodesFromNode = XMLTools.extractChildrenNodesFromNode((Node) it2.next(), "name");
                    if (!extractChildrenNodesFromNode.isEmpty()) {
                        Iterator it3 = extractChildrenNodesFromNode.iterator();
                        while (true) {
                            if (it3.hasNext()) {
                                Node node = (Node) it3.next();
                                if (node.getAttributes().getNamedItem("type") != null && node.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                                    arrayList16.add(node.getTextContent());
                                    break;
                                }
                            }
                        }
                    }
                }
                List<Node> extractNodes2 = XMLTools.extractNodes(parse2, "//teiHeader//sourceDesc/biblStruct//author/persName");
                ArrayList arrayList17 = new ArrayList();
                for (Node node2 : extractNodes2) {
                    arrayList17.add(StringUtils.join(XMLTools.extractChildrenTextFromNode(node2, "forename"), " ") + " " + StringUtils.join(XMLTools.extractChildrenTextFromNode(node2, "surname"), " "));
                }
                MetadataList metadataList2 = new MetadataList(arrayList16, arrayList17);
                metadataList2.setComp(EvaluationUtils.authorComparator);
                arrayList2.add(metadataList2);
                metadataList2.print(i, "author");
                MetadataList metadataList3 = new MetadataList(Lists.newArrayList(Sets.newHashSet(XMLTools.extractTextAsList(parse, "/bwmeta/element/affiliation/text"))), Lists.newArrayList(Sets.newHashSet(XMLTools.extractTextAsList(parse2, "//teiHeader//sourceDesc/biblStruct//author/affiliation"))));
                metadataList3.setComp(EvaluationUtils.cosineComparator());
                arrayList3.add(metadataList3);
                metadataList3.print(i, "affiliation");
                MetadataRelation metadataRelation = new MetadataRelation();
                metadataRelation.setComp1(EvaluationUtils.authorComparator);
                metadataRelation.setComp2(EvaluationUtils.cosineComparator());
                List<Node> extractNodes3 = XMLTools.extractNodes(parse, "/bwmeta/element/affiliation");
                HashMap hashMap = new HashMap();
                for (Node node3 : extractNodes3) {
                    hashMap.put(node3.getAttributes().getNamedItem("id").getNodeValue(), (String) XMLTools.extractChildrenTextFromNode(node3, "text").get(0));
                }
                for (Node node4 : extractNodes) {
                    String str = null;
                    List extractChildrenNodesFromNode2 = XMLTools.extractChildrenNodesFromNode(node4, "name");
                    if (!extractChildrenNodesFromNode2.isEmpty()) {
                        Iterator it4 = extractChildrenNodesFromNode2.iterator();
                        while (true) {
                            if (!it4.hasNext()) {
                                break;
                            }
                            Node node5 = (Node) it4.next();
                            if (node5.getAttributes().getNamedItem("type") != null && node5.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                                str = node5.getTextContent();
                                break;
                            }
                        }
                        if (str != null) {
                            Iterator it5 = XMLTools.extractChildrenNodesFromNode(node4, "affiliation-ref").iterator();
                            while (it5.hasNext()) {
                                String str2 = (String) hashMap.get(((Node) it5.next()).getAttributes().getNamedItem("ref").getNodeValue());
                                if (str2 != null) {
                                    metadataRelation.addExpected(new MetadataRelation.StringRelation(str, str2));
                                }
                            }
                        }
                    }
                }
                for (Node node6 : XMLTools.extractNodes(parse2, "//teiHeader//sourceDesc/biblStruct//author/persName")) {
                    String str3 = StringUtils.join(XMLTools.extractChildrenTextFromNode(node6, "forename"), " ") + " " + StringUtils.join(XMLTools.extractChildrenTextFromNode(node6, "surname"), " ");
                    NodeList childNodes = node6.getParentNode().getChildNodes();
                    for (int i3 = 0; i3 < childNodes.getLength(); i3++) {
                        Node item = childNodes.item(i3);
                        if ("affiliation".equals(item.getNodeName())) {
                            metadataRelation.addExtracted(new MetadataRelation.StringRelation(str3, XMLTools.extractTextFromNode(item)));
                        }
                    }
                }
                arrayList4.add(metadataRelation);
                metadataRelation.print(i, "author - affiliation");
                MetadataList metadataList4 = new MetadataList(parse, "/bwmeta/element/contributor[@role='author']/attribute[@key='contact-email']/value", parse2, "//teiHeader//sourceDesc/biblStruct//author/email");
                metadataList4.setComp(EvaluationUtils.emailComparator);
                arrayList5.add(metadataList4);
                metadataList4.print(i, "email");
                MetadataRelation metadataRelation2 = new MetadataRelation();
                metadataRelation2.setComp1(EvaluationUtils.authorComparator);
                metadataRelation2.setComp2(EvaluationUtils.emailComparator);
                for (Node node7 : extractNodes) {
                    String str4 = null;
                    List extractChildrenNodesFromNode3 = XMLTools.extractChildrenNodesFromNode(node7, "name");
                    if (!extractChildrenNodesFromNode3.isEmpty()) {
                        Iterator it6 = extractChildrenNodesFromNode3.iterator();
                        while (true) {
                            if (!it6.hasNext()) {
                                break;
                            }
                            Node node8 = (Node) it6.next();
                            if (node8.getAttributes().getNamedItem("type") != null && node8.getAttributes().getNamedItem("type").getTextContent().equals("canonical")) {
                                str4 = node8.getTextContent();
                                break;
                            }
                        }
                        if (str4 != null) {
                            for (Node node9 : XMLTools.extractChildrenNodesFromNode(node7, "attribute")) {
                                if ("contact-email".equals(node9.getAttributes().getNamedItem("key").getNodeValue())) {
                                    metadataRelation2.addExpected(new MetadataRelation.StringRelation(str4, (String) XMLTools.extractChildrenTextFromNode(node9, "value").get(0)));
                                }
                            }
                        }
                    }
                }
                for (Node node10 : XMLTools.extractNodes(parse2, "//teiHeader//sourceDesc/biblStruct//author/persName")) {
                    String str5 = StringUtils.join(XMLTools.extractChildrenTextFromNode(node10, "forename"), " ") + " " + StringUtils.join(XMLTools.extractChildrenTextFromNode(node10, "surname"), " ");
                    NodeList childNodes2 = node10.getParentNode().getChildNodes();
                    for (int i4 = 0; i4 < childNodes2.getLength(); i4++) {
                        Node item2 = childNodes2.item(i4);
                        if ("email".equals(item2.getNodeName())) {
                            metadataRelation2.addExtracted(new MetadataRelation.StringRelation(str5, XMLTools.extractTextFromNode(item2)));
                        }
                    }
                }
                arrayList6.add(metadataRelation2);
                metadataRelation2.print(i, "author - email");
                MetadataSingle metadataSingle3 = new MetadataSingle(parse, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Journal']/name[@type='canonical']", parse2, "//monogr/title[@level='j' and @type='main']");
                metadataSingle3.setComp(EvaluationUtils.journalComparator);
                arrayList9.add(metadataSingle3);
                metadataSingle3.print(i, "journal title");
                MetadataSingle metadataSingle4 = new MetadataSingle(parse, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Volume']/name[@type='canonical']", parse2, "//monogr/imprint/biblScope[@unit='volume']");
                arrayList10.add(metadataSingle4);
                metadataSingle4.print(i, "volume");
                MetadataSingle metadataSingle5 = new MetadataSingle(parse, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Number']/name[@type='canonical']", parse2, "//monogr/imprint/biblScope[@unit='issue']");
                arrayList11.add(metadataSingle5);
                metadataSingle5.print(i, "issue");
                MetadataSingle metadataSingle6 = new MetadataSingle(parse, "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position", parse2, "//monogr/imprint/biblScope[@unit='page']/@from");
                MetadataSingle metadataSingle7 = new MetadataSingle(parse, "/bwmeta/element/structure/current[@level='bwmeta1.level.hierarchy_Journal_Article']/@position", parse2, "//monogr/imprint/biblScope[@unit='page']/@to");
                MetadataSingle metadataSingle8 = new MetadataSingle(metadataSingle6.hasExpected() ? metadataSingle6.getExpectedValue().replaceAll("-", "--") : "", (metadataSingle6.hasExtracted() && metadataSingle7.hasExtracted()) ? metadataSingle6.getExtractedValue() + "--" + metadataSingle7.getExtractedValue() : "");
                arrayList12.add(metadataSingle8);
                metadataSingle8.print(i, "pages");
                List<String> removeLeadingZerosFromDate = EvaluationUtils.removeLeadingZerosFromDate(XMLTools.extractTextAsList(parse, "/bwmeta/element/structure/ancestor[@level='bwmeta1.level.hierarchy_Journal_Year']/name[@type='canonical']"));
                List extractNodes4 = XMLTools.extractNodes(parse2, "//teiHeader//date[@type='published']");
                Collection newArrayList = Lists.newArrayList();
                if (!extractNodes4.isEmpty()) {
                    Node node11 = (Node) extractNodes4.get(0);
                    String textContent = node11.getTextContent();
                    if (node11.getAttributes().getNamedItem("when") != null) {
                        textContent = node11.getAttributes().getNamedItem("when").getTextContent();
                    }
                    newArrayList = EvaluationUtils.removeLeadingZerosFromDate(Lists.newArrayList(textContent.split("-")));
                }
                MetadataSingle metadataSingle9 = new MetadataSingle(StringUtils.join(removeLeadingZerosFromDate, "---"), StringUtils.join(newArrayList, "---"));
                metadataSingle9.setComp(EvaluationUtils.yearComparator);
                arrayList13.add(metadataSingle9);
                metadataSingle9.print(i, "year");
                MetadataSingle metadataSingle10 = new MetadataSingle(parse, "/bwmeta/element/id[@scheme='bwmeta1.id-class.DOI']/@value", parse2, "//teiHeader//idno[@type='DOI']");
                arrayList14.add(metadataSingle10);
                metadataSingle10.print(i, "DOI");
                List extractNodes5 = XMLTools.extractNodes(parse, "//relation[@type='reference-to']/attribute[@key='reference-text']/value");
                List extractNodes6 = XMLTools.extractNodes(parse2, "//listBibl/biblStruct");
                ArrayList arrayList18 = new ArrayList();
                ArrayList arrayList19 = new ArrayList();
                Iterator it7 = extractNodes5.iterator();
                while (it7.hasNext()) {
                    arrayList18.add(XMLTools.extractTextFromNode((Node) it7.next()).trim());
                }
                Iterator it8 = extractNodes6.iterator();
                while (it8.hasNext()) {
                    arrayList19.add(XMLTools.extractTextFromNode((Node) it8.next()).trim());
                }
                MetadataList metadataList5 = new MetadataList(arrayList18, arrayList19);
                metadataList5.setComp(EvaluationUtils.cosineComparator(0.6d));
                arrayList15.add(metadataList5);
                metadataList5.print(i, "references");
                if (i == 1) {
                    System.out.println("1");
                }
            } catch (SAXException e) {
                i2--;
            }
        }
        if (i != 1) {
            System.out.println("==== Summary (" + nlmIterator.size() + " docs)====");
            PrecisionRecall build = new PrecisionRecall().build(arrayList);
            build.print("Title");
            PrecisionRecall build2 = new PrecisionRecall().build(arrayList7);
            build2.print("Abstract");
            PrecisionRecall build3 = new PrecisionRecall().build(arrayList8);
            build3.print("Keywords");
            PrecisionRecall build4 = new PrecisionRecall().build(arrayList2);
            build4.print("Authors");
            PrecisionRecall build5 = new PrecisionRecall().build(arrayList3);
            build5.print("Affiliations");
            new PrecisionRecall().build(arrayList4).print("Author - affiliation");
            PrecisionRecall build6 = new PrecisionRecall().build(arrayList5);
            build6.print("Emails");
            new PrecisionRecall().build(arrayList6).print("Author - email");
            new PrecisionRecall().build(arrayList9).print("Journal");
            new PrecisionRecall().build(arrayList10).print("Volume");
            new PrecisionRecall().build(arrayList11).print("Issue");
            new PrecisionRecall().build(arrayList12).print("Pages");
            PrecisionRecall build7 = new PrecisionRecall().build(arrayList13);
            build7.print("Year");
            PrecisionRecall build8 = new PrecisionRecall().build(arrayList14);
            build8.print("DOI");
            new PrecisionRecall().build(arrayList15).print("References");
            double d = 0.0d;
            double d2 = 0.0d;
            double d3 = 0.0d;
            for (PrecisionRecall precisionRecall : Lists.newArrayList(new PrecisionRecall[]{build, build4, build5, build6, build2, build3, build7, build8})) {
                d += precisionRecall.getPrecision().doubleValue();
                d2 += precisionRecall.getRecall().doubleValue();
                d3 += precisionRecall.getF1().doubleValue();
            }
            double size = d / r0.size();
            double size2 = d2 / r0.size();
            System.out.printf("Average precision\t\t%4.2f\n", Double.valueOf(100.0d * size));
            System.out.printf("Average recall\t\t%4.2f\n", Double.valueOf(100.0d * size2));
            System.out.printf("Average F1 score\t\t%4.2f\n", Double.valueOf(100.0d * (d3 / r0.size())));
        }
    }

    public static void main(String[] strArr) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        if (strArr.length != 3 && strArr.length != 4) {
            System.out.println("Usage: FinalMetadataExtractionEvaluation <input dir> <orig extension> <extract extension>");
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int i = 0;
        if (strArr.length == 4 && strArr[3].equals("csv")) {
            i = 1;
        }
        if (strArr.length == 4 && strArr[3].equals("q")) {
            i = 2;
        }
        new BwmetaGrobidFinalMetadataExtractionEvaluation().evaluate(i, new NlmIterator(str, str2, str3));
    }
}
