package pl.edu.icm.cermine.evaluation;

import com.google.common.collect.Lists;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import pl.edu.icm.cermine.evaluation.tools.EvaluationUtils;
import pl.edu.icm.cermine.evaluation.tools.MetadataList;
import pl.edu.icm.cermine.evaluation.tools.MetadataRelation;
import pl.edu.icm.cermine.evaluation.tools.NlmIterator;
import pl.edu.icm.cermine.evaluation.tools.NlmPair;
import pl.edu.icm.cermine.evaluation.tools.PrecisionRecall;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.exception.TransformationException;
import pl.edu.icm.cermine.tools.XMLTools;

/* loaded from: input_file:pl/edu/icm/cermine/evaluation/ParsCitFinalTextExtractionEvaluation.class */
public final class ParsCitFinalTextExtractionEvaluation {
    public void evaluate(int i, NlmIterator nlmIterator) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        DocumentBuilder newDocumentBuilder = newInstance.newDocumentBuilder();
        SAXBuilder sAXBuilder = new SAXBuilder("org.apache.xerces.parsers.SAXParser");
        sAXBuilder.setValidation(false);
        sAXBuilder.setFeature("http://xml.org/sax/features/validation", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
        sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        if (i == 1) {
            System.out.println("path,pcit_header,pcit_hlevel,pcit_header0,pcit_header1,pcit_header2,pcit_header3,one");
        }
        int i2 = 0;
        Iterator<NlmPair> it = nlmIterator.iterator();
        while (it.hasNext()) {
            NlmPair next = it.next();
            i2++;
            if (i == 0) {
                System.out.println("");
                System.out.println(">>>>>>>>> " + i2);
                System.out.println(next.getExtractedNlm().getPath());
            }
            if (i == 1) {
                System.out.print(next.getOriginalNlm().getPath() + ",");
            }
            try {
                Document parse = newDocumentBuilder.parse(new FileInputStream(next.getOriginalNlm()));
                Document parse2 = newDocumentBuilder.parse(new FileInputStream(next.getExtractedNlm()));
                StringBuilder sb = new StringBuilder();
                for (Node node : XMLTools.extractNodes(parse, "/article/body//sec")) {
                    String trim = (XMLTools.extractChildrenNodesFromNode(node, "title").isEmpty() ? "-" : XMLTools.extractTextFromNode((Node) XMLTools.extractChildrenNodesFromNode(node, "title").get(0))).toLowerCase().replaceAll("[^a-zA-Z ]", "").trim();
                    if (isProper(trim)) {
                        Node parentNode = node.getParentNode();
                        if ("body".equals(parentNode.getNodeName()) || "sec".equals(parentNode.getNodeName())) {
                            int i3 = 0;
                            while ("sec".equals(parentNode.getNodeName())) {
                                parentNode = parentNode.getParentNode();
                                i3++;
                            }
                            if (i3 < 3) {
                                while (i3 > 0) {
                                    sb.append(" ");
                                    i3--;
                                }
                                sb.append(trim);
                                sb.append("\n");
                            }
                        }
                    }
                }
                String trim2 = sb.toString().trim();
                StringBuilder sb2 = new StringBuilder();
                NodeList childNodes = ((Node) XMLTools.extractNodes(parse2, "//algorithm[@name='SectLabel']/variant").get(0)).getChildNodes();
                for (int i4 = 0; i4 < childNodes.getLength(); i4++) {
                    Node item = childNodes.item(i4);
                    if ("sectionHeader".equals(item.getNodeName())) {
                        String trim3 = item.getTextContent().toLowerCase().replaceAll("[^a-zA-Z ]", "").trim();
                        if (isProper(trim3)) {
                            sb2.append(trim3);
                            sb2.append("\n");
                        }
                    } else if ("subsectionHeader".equals(item.getNodeName())) {
                        String trim4 = item.getTextContent().toLowerCase().replaceAll("[^a-zA-Z ]", "").trim();
                        if (isProper(trim4)) {
                            sb2.append(" ");
                            sb2.append(trim4);
                            sb2.append("\n");
                        }
                    } else if ("subsubsectionHeader".equals(item.getNodeName())) {
                        String trim5 = item.getTextContent().toLowerCase().replaceAll("[^a-zA-Z ]", "").trim();
                        if (isProper(trim5)) {
                            sb2.append("  ");
                            sb2.append(trim5);
                            sb2.append("\n");
                        }
                    }
                }
                String trim6 = sb2.toString().trim();
                ArrayList arrayList7 = new ArrayList();
                ArrayList arrayList8 = new ArrayList();
                for (String str : trim2.split("\n")) {
                    arrayList7.add(str.trim());
                }
                for (String str2 : trim6.split("\n")) {
                    arrayList8.add(str2.trim());
                }
                MetadataList metadataList = new MetadataList(removeReferences(arrayList7), removeReferences(arrayList8));
                metadataList.setComp(EvaluationUtils.swComparator);
                metadataList.print(i, "Headers");
                arrayList2.add(metadataList);
                MetadataRelation metadataRelation = new MetadataRelation();
                metadataRelation.setComp2(EvaluationUtils.swComparator);
                for (String str3 : trim2.split("\n")) {
                    String str4 = "1";
                    if (str3.startsWith("  ")) {
                        str4 = "3";
                    } else if (str3.startsWith(" ")) {
                        str4 = "2";
                    }
                    metadataRelation.addExpected(new MetadataRelation.StringRelation(str4, str3.trim()));
                }
                for (String str5 : trim6.split("\n")) {
                    String str6 = "1";
                    if (str5.startsWith("  ")) {
                        str6 = "3";
                    } else if (str5.startsWith(" ")) {
                        str6 = "2";
                    }
                    metadataRelation.addExtracted(new MetadataRelation.StringRelation(str6, str5.trim()));
                }
                metadataRelation.print(i, "Headers levels");
                arrayList.add(metadataRelation);
                ArrayList arrayList9 = new ArrayList();
                ArrayList arrayList10 = new ArrayList();
                if (!trim2.isEmpty()) {
                    arrayList10.add(trim2);
                }
                if (!trim6.isEmpty()) {
                    arrayList9.add(trim6);
                }
                MetadataList metadataList2 = new MetadataList(arrayList10, arrayList9);
                metadataList2.setComp(EvaluationUtils.headerComparator(EvaluationUtils.swComparator));
                metadataList2.print(i, "Headers 0");
                arrayList3.add(metadataList2);
                ArrayList arrayList11 = new ArrayList();
                ArrayList arrayList12 = new ArrayList();
                StringBuilder sb3 = new StringBuilder();
                for (String str7 : trim2.split("\n")) {
                    if (!str7.startsWith(" ")) {
                        if (!sb3.toString().isEmpty()) {
                            arrayList12.add(sb3.toString().trim());
                        }
                        sb3 = new StringBuilder();
                    }
                    sb3.append(str7);
                    sb3.append("\n");
                }
                if (!sb3.toString().isEmpty()) {
                    arrayList12.add(sb3.toString().trim());
                }
                StringBuilder sb4 = new StringBuilder();
                for (String str8 : trim6.split("\n")) {
                    if (!str8.startsWith(" ")) {
                        if (!sb4.toString().isEmpty()) {
                            arrayList11.add(sb4.toString().trim());
                        }
                        sb4 = new StringBuilder();
                    }
                    sb4.append(str8);
                    sb4.append("\n");
                }
                if (!sb4.toString().isEmpty()) {
                    arrayList11.add(sb4.toString().trim());
                }
                MetadataList metadataList3 = new MetadataList(arrayList12, arrayList11);
                metadataList3.setComp(EvaluationUtils.headerComparator(EvaluationUtils.swComparator));
                metadataList3.print(i, "Headers 1");
                arrayList4.add(metadataList3);
                ArrayList arrayList13 = new ArrayList();
                ArrayList arrayList14 = new ArrayList();
                StringBuilder sb5 = new StringBuilder();
                for (String str9 : trim2.split("\n")) {
                    if (str9.startsWith("  ")) {
                        sb5.append(str9);
                        sb5.append("\n");
                    } else if (str9.startsWith(" ")) {
                        if (!sb5.toString().isEmpty()) {
                            arrayList14.add(sb5.toString().trim());
                        }
                        sb5 = new StringBuilder();
                        sb5.append(str9);
                        sb5.append("\n");
                    }
                }
                if (!sb5.toString().isEmpty()) {
                    arrayList14.add(sb5.toString().trim());
                }
                StringBuilder sb6 = new StringBuilder();
                for (String str10 : trim6.split("\n")) {
                    if (str10.startsWith("  ")) {
                        sb6.append(str10);
                        sb6.append("\n");
                    } else if (str10.startsWith(" ")) {
                        if (!sb6.toString().isEmpty()) {
                            arrayList13.add(sb6.toString().trim());
                        }
                        sb6 = new StringBuilder();
                        sb6.append(str10);
                        sb6.append("\n");
                    }
                }
                if (!sb6.toString().isEmpty()) {
                    arrayList13.add(sb6.toString().trim());
                }
                MetadataList metadataList4 = new MetadataList(arrayList14, arrayList13);
                metadataList4.setComp(EvaluationUtils.headerComparator(EvaluationUtils.swComparator));
                metadataList4.print(i, "Headers 2");
                arrayList5.add(metadataList4);
                ArrayList arrayList15 = new ArrayList();
                ArrayList arrayList16 = new ArrayList();
                for (String str11 : trim2.split("\n")) {
                    if (str11.startsWith("  ")) {
                        arrayList16.add(str11);
                    }
                }
                for (String str12 : trim6.split("\n")) {
                    if (str12.startsWith("  ")) {
                        arrayList15.add(str12);
                    }
                }
                MetadataList metadataList5 = new MetadataList(arrayList16, arrayList15);
                metadataList5.setComp(EvaluationUtils.headerComparator(EvaluationUtils.swComparator));
                metadataList5.print(i, "Headers 3");
                arrayList6.add(metadataList5);
                if (i == 1) {
                    System.out.println("1");
                }
            } catch (SAXException e) {
                i2--;
            }
        }
        if (i != 1) {
            System.out.println("==== Summary (" + nlmIterator.size() + " docs)====");
            new PrecisionRecall().build(arrayList).print("Level - header");
            new PrecisionRecall().build(arrayList2).print("Headers");
            new PrecisionRecall().build(arrayList3).print("Headers 0");
            new PrecisionRecall().build(arrayList4).print("Headers 1");
            new PrecisionRecall().build(arrayList5).print("Headers 2");
            new PrecisionRecall().build(arrayList6).print("Headers 3");
        }
    }

    private boolean isProper(String str) {
        return !Lists.newArrayList(new String[]{"references", "acknowledgements", "acknowledgments", "conflicts of interest", "declaration of interest", "appendix", "conflict of interest statement", "conflict of interest", "funding", "authors contributions", "competing interests"}).contains(str);
    }

    private List<String> removeReferences(List<String> list) {
        ArrayList newArrayList = Lists.newArrayList(list);
        for (String str : list) {
            if (!isProper(str)) {
                newArrayList.remove(str);
            }
        }
        return newArrayList;
    }

    public static void main(String[] strArr) throws AnalysisException, IOException, TransformationException, ParserConfigurationException, SAXException, JDOMException, XPathExpressionException, TransformerException {
        if (strArr.length != 3 && strArr.length != 4) {
            System.out.println("Usage: FinalMetadataExtractionEvaluation <input dir> <orig extension> <extract extension>");
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        int i = 0;
        if (strArr.length == 4 && strArr[3].equals("csv")) {
            i = 1;
        }
        if (strArr.length == 4 && strArr[3].equals("q")) {
            i = 2;
        }
        new ParsCitFinalTextExtractionEvaluation().evaluate(i, new NlmIterator(str, str2, str3));
    }
}
