package pl.edu.icm.cermine.evaluation;

import com.google.common.collect.Lists;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import pl.edu.icm.cermine.evaluation.tools.EvaluationUtils;
import pl.edu.icm.cermine.evaluation.tools.MetadataList;
import pl.edu.icm.cermine.evaluation.tools.NlmIterator;
import pl.edu.icm.cermine.evaluation.tools.NlmPair;
import pl.edu.icm.cermine.evaluation.tools.PrecisionRecall;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.tools.XMLTools;

/* loaded from: input_file:pl/edu/icm/cermine/evaluation/GrobidFinalTextExtractionEvaluation.class */
public final class GrobidFinalTextExtractionEvaluation {
    public void evaluate(int i, NlmIterator nlmIterator) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        DocumentBuilder newDocumentBuilder = newInstance.newDocumentBuilder();
        SAXBuilder sAXBuilder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
        sAXBuilder.setValidation(false);
        sAXBuilder.setFeature("http://xml.org/sax/features/validation", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        ArrayList arrayList = new ArrayList();
        if (i == 1) {
            System.out.println("path,gro_header,one");
        }
        int i2 = 0;
        Iterator<NlmPair> it = nlmIterator.iterator();
        while (it.hasNext()) {
            NlmPair next = it.next();
            i2++;
            if (i == 0) {
                System.out.println("");
                System.out.println(">>>>>>>>> " + i2);
                System.out.println(next.getExtractedNlm().getPath());
            }
            if (i == 1) {
                System.out.print(next.getOriginalNlm().getPath() + ",");
            }
            try {
                Document parse = newDocumentBuilder.parse(new FileInputStream(next.getOriginalNlm()));
                Document parse2 = newDocumentBuilder.parse(new FileInputStream(next.getExtractedNlm()));
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                for (Node node : XMLTools.extractNodes(parse, "/article/body//sec/title")) {
                    int i3 = 0;
                    Node parentNode = node.getParentNode();
                    while ("sec".equals(parentNode.getNodeName())) {
                        parentNode = parentNode.getParentNode();
                        i3++;
                    }
                    if (i3 <= 3) {
                        String replaceAll = XMLTools.extractTextFromNode(node).trim().toLowerCase().replaceAll("[^a-zA-Z ]", "");
                        if (isProper(replaceAll)) {
                            arrayList2.add(replaceAll);
                        }
                    }
                }
                Iterator it2 = XMLTools.extractNodes(parse2, "/TEI/text/body/div/head").iterator();
                while (it2.hasNext()) {
                    String replaceAll2 = XMLTools.extractTextFromNode((Node) it2.next()).trim().toLowerCase().replaceAll("[^a-zA-Z ]", "");
                    if (isProper(replaceAll2)) {
                        arrayList3.add(replaceAll2);
                    }
                }
                MetadataList metadataList = new MetadataList(removeReferences(arrayList2), removeReferences(arrayList3));
                metadataList.setComp(EvaluationUtils.swComparator);
                metadataList.print(i, "Headers");
                arrayList.add(metadataList);
                if (i == 1) {
                    System.out.println("1");
                }
            } catch (SAXException e) {
                i2--;
            }
        }
        if (i != 1) {
            System.out.println("==== Summary (" + nlmIterator.size() + " docs)====");
            new PrecisionRecall().build(arrayList).print("Headers");
        }
    }

    private boolean isProper(String str) {
        return !Lists.newArrayList(new String[]{"references", "acknowledgements", "acknowledgments", "conflicts of interest", "declaration of interest", "appendix", "conflict of interest statement", "conflict of interest", "funding", "authors contributions", "competing interests"}).contains(str);
    }

    private List<String> removeReferences(List<String> list) {
        ArrayList newArrayList = Lists.newArrayList(list);
        for (String str : list) {
            if (!isProper(str)) {
                newArrayList.remove(str);
            }
        }
        return newArrayList;
    }

    public static void main(String[] strArr) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        if (strArr.length != 3 && strArr.length != 4) {
            System.out.println("Usage: FinalMetadataExtractionEvaluation <input dir> <orig extension> <extract extension>");
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int i = 0;
        if (strArr.length == 4 && strArr[3].equals("csv")) {
            i = 1;
        }
        if (strArr.length == 4 && strArr[3].equals("q")) {
            i = 2;
        }
        new GrobidFinalTextExtractionEvaluation().evaluate(i, new NlmIterator(str, str2, str3));
    }
}
