package eu.eudml.util.nlm;

import eu.eudml.service.EudmlServiceException;
import eu.eudml.service.idmanager.IdManagerFacade;
import eu.eudml.service.idmanager.Identifier;
import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.codec.digest.DigestUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.XPath;
import org.dom4j.io.DocumentResult;
import org.dom4j.io.DocumentSource;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.dom4j.tree.DefaultElement;
import org.jaxen.JaxenException;
import org.jaxen.NamespaceContext;
import org.jaxen.SimpleNamespaceContext;
import org.jaxen.dom4j.Dom4jXPath;
import org.xml.sax.SAXException;

/* loaded from: input_file:eu/eudml/util/nlm/NlmProcessorHelper.class */
public class NlmProcessorHelper {
    public static final String EUDML_ID_SEPARATOR = "-";
    public static final String EUDML_ID_TYPE = "eudml-id";
    public static final String ARTICLE_ID_ELEMENT = "article-id";
    public static final String JOURNAL_ID_ELEMENT = "journal-id";
    public static final String ISSUE_ID_ELEMENT = "issue-id";
    public static final String ATTR_ART_ID_TYPE = "pub-id-type";
    public static final String ATTR_JOURNAL_ID_TYPE = "journal-id-type";
    public static final String ATTR_ISSUE_ID_TYPE = "pub-id-type";
    public static final String ATTR_ISSN_PUB_TYPE = "pub-type";
    public static final String XPATH_NS_PREFIX = "nlm";
    public static final String XPATH_NS_ARTICLE_META = "//nlm:front/nlm:article-meta";
    public static final String XPATH_NS_ARTICLE_ID = "//nlm:front/nlm:article-meta/nlm:article-id";
    public static final String XPATH_NS_JOURNAL_META = "//nlm:front/nlm:journal-meta";
    public static final String XPATH_NS_JOURNAL_ID = "//nlm:front/nlm:journal-meta/nlm:journal-id";
    public static final String XPATH_NS_ISSN = "//nlm:front/nlm:journal-meta/nlm:issn";
    public static final String XPATH_NS_JOURNAL_TITLE = "//nlm:front/nlm:journal-meta/nlm:journal-title-group/nlm:journal-title";
    public static final String XPATH_NS_ISSUE = "//nlm:front/nlm:article-meta/nlm:issue";
    public static final String XPATH_ARTICLE_META = "//front/article-meta";
    public static final String XPATH_PDF_CONTENT_URL = "//front/article-meta/self-uri[@content-type='application/pdf']/@xlink:href";
    public static final String XPATH_PDF_FULLTEXT_URL = "//front/article-meta/ext-link[@ext-link-type='eudml-fulltext:application/pdf']/@xlink:href";
    public static final String XPATH_LANG = "//@xml:lang";
    public static final String XPATH_PDF_COMPRESSED_CONTENT_URL = "//front/article-meta/self-uri[@content-type='application/pdf']/@xlink:href";
    public static final Namespace XLINK_NAMESPACE = Namespace.get("xlink", "http://www.w3.org/1999/xlink");
    public static final String ISSN_TYPE_EPUB = "epub";
    public static final String ISSN_TYPE_PPUB = "ppub";
    public static final String ENHANCED_BY = "enhanced-by";

    public static IdentifiedNLM enrichNLMWithCustomArticleId(Reader reader, IdManagerFacade idManagerFacade, boolean z, boolean z2, String str) throws EudmlServiceException, DocumentException, SAXException, JaxenException {
        String normalizeJournalTitle;
        Document read = new SAXReader().read(reader);
        String namespaceURI = read.getRootElement().getNamespaceURI();
        HashMap hashMap = new HashMap();
        hashMap.put(XPATH_NS_PREFIX, namespaceURI);
        SimpleNamespaceContext simpleNamespaceContext = new SimpleNamespaceContext(hashMap);
        DocumentFactory documentFactory = DocumentFactory.getInstance();
        Dom4jXPath dom4jXPath = new Dom4jXPath(XPATH_NS_ARTICLE_META);
        dom4jXPath.setNamespaceContext(simpleNamespaceContext);
        List selectNodes = dom4jXPath.selectNodes(read);
        if (selectNodes == null || selectNodes.size() == 0 || (z && selectNodes.size() > 1)) {
            throw new DocumentException("single article meta expected in NLM document, found " + (selectNodes != null ? selectNodes.size() : 0));
        }
        Dom4jXPath dom4jXPath2 = new Dom4jXPath(XPATH_NS_JOURNAL_META);
        dom4jXPath2.setNamespaceContext(simpleNamespaceContext);
        List selectNodes2 = dom4jXPath2.selectNodes(read);
        if (selectNodes2 == null || selectNodes2.size() <= 0) {
            throw new DocumentException("Unable to generate journal id! No journal-meta element defined!");
        }
        Node issnNode = getIssnNode(read, simpleNamespaceContext, str);
        if (issnNode != null) {
            normalizeJournalTitle = normalizeISSN(issnNode.getText());
            Element createElement = documentFactory.createElement(JOURNAL_ID_ELEMENT, namespaceURI);
            createElement.addAttribute(ATTR_JOURNAL_ID_TYPE, EUDML_ID_TYPE);
            createElement.setText(buildJournalId(normalizeJournalTitle));
            List content = ((DefaultElement) selectNodes2.get(0)).content();
            boolean z3 = false;
            int i = 0;
            while (true) {
                if (i >= content.size()) {
                    break;
                }
                if (isJournalIdElement(content.get(i))) {
                    content.add(i, createElement);
                    z3 = true;
                    break;
                }
                i++;
            }
            if (!z3) {
                ((DefaultElement) selectNodes2.get(0)).add(createElement);
            }
        } else {
            Dom4jXPath dom4jXPath3 = new Dom4jXPath(XPATH_NS_JOURNAL_TITLE);
            dom4jXPath3.setNamespaceContext(simpleNamespaceContext);
            Node node = (Node) dom4jXPath3.selectSingleNode(read);
            if (node == null) {
                throw new DocumentException("Unable to generate journal id! Neither issn number nor journal title  could be found within journal-meta element!");
            }
            normalizeJournalTitle = normalizeJournalTitle(node.getText());
            Element createElement2 = documentFactory.createElement(JOURNAL_ID_ELEMENT, namespaceURI);
            createElement2.addAttribute(ATTR_JOURNAL_ID_TYPE, EUDML_ID_TYPE);
            createElement2.setText(buildJournalId(normalizeJournalTitle));
            List content2 = ((DefaultElement) selectNodes2.get(0)).content();
            boolean z4 = false;
            int i2 = 0;
            while (true) {
                if (i2 >= content2.size()) {
                    break;
                }
                if (isJournalIdElement(content2.get(i2))) {
                    content2.add(i2, createElement2);
                    z4 = true;
                    break;
                }
                i2++;
            }
            if (!z4) {
                ((DefaultElement) selectNodes2.get(0)).add(createElement2);
            }
        }
        Dom4jXPath dom4jXPath4 = new Dom4jXPath(XPATH_NS_ARTICLE_ID);
        dom4jXPath4.setNamespaceContext(simpleNamespaceContext);
        List selectNodes3 = dom4jXPath4.selectNodes(read);
        ArrayList arrayList = new ArrayList(selectNodes3.size());
        for (Object obj : selectNodes3) {
            if (!(obj instanceof DefaultElement)) {
                throw new DocumentException("invalid element instance " + obj.getClass() + ", expected " + DefaultElement.class);
            }
            DefaultElement defaultElement = (DefaultElement) obj;
            arrayList.add(new Identifier(defaultElement.attributeValue("pub-id-type"), defaultElement.getText()));
        }
        Identifier requestId = idManagerFacade.requestId(arrayList);
        Element createElement3 = documentFactory.createElement(ARTICLE_ID_ELEMENT, namespaceURI);
        createElement3.addAttribute("pub-id-type", requestId.getType());
        createElement3.setText(requestId.getValue());
        Dom4jXPath dom4jXPath5 = new Dom4jXPath(XPATH_NS_ISSUE);
        dom4jXPath5.setNamespaceContext(simpleNamespaceContext);
        DefaultElement defaultElement2 = (Node) dom4jXPath5.selectSingleNode(read);
        if (normalizeJournalTitle == null) {
            throw new DocumentException("unable to generate issue id: journal id could not be generated!");
        }
        Element createElement4 = documentFactory.createElement(ISSUE_ID_ELEMENT, namespaceURI);
        createElement4.addAttribute("pub-id-type", EUDML_ID_TYPE);
        createElement4.setText(buildIssueId(normalizeJournalTitle, (defaultElement2 == null || defaultElement2.getText() == null || defaultElement2.getText().trim().length() <= 0) ? (defaultElement2 == null || !(defaultElement2 instanceof DefaultElement) || defaultElement2.attributeValue("seq") == null) ? normalizeIssue("unknown") : normalizeIssue(defaultElement2.attributeValue("seq")) : normalizeIssue(defaultElement2.getText())));
        List content3 = ((DefaultElement) selectNodes.get(0)).content();
        boolean z5 = false;
        int i3 = 0;
        while (true) {
            if (i3 >= content3.size()) {
                break;
            }
            if (isArtIdElement(content3.get(i3))) {
                content3.add(i3, createElement3);
                if (createElement4 != null) {
                    content3.add(i3, createElement4);
                }
                z5 = true;
            } else {
                i3++;
            }
        }
        if (!z5) {
            ((DefaultElement) selectNodes.get(0)).add(createElement3);
            if (createElement4 != null) {
                ((DefaultElement) selectNodes.get(0)).add(createElement4);
            }
        }
        try {
            StringWriter stringWriter = new StringWriter();
            new XMLWriter(stringWriter, z2 ? OutputFormat.createPrettyPrint() : disableTrimming(OutputFormat.createCompactFormat())).write(read);
            return new IdentifiedNLM(requestId, stringWriter.toString());
        } catch (IOException e) {
            throw new DocumentException("unable to write NLM to output!", e);
        }
    }

    protected static Node getIssnNode(Document document, NamespaceContext namespaceContext, String str) throws JaxenException {
        Dom4jXPath dom4jXPath = new Dom4jXPath(XPATH_NS_ISSN);
        dom4jXPath.setNamespaceContext(namespaceContext);
        if (str == null) {
            return (Node) dom4jXPath.selectSingleNode(document);
        }
        List selectNodes = dom4jXPath.selectNodes(document);
        if (selectNodes.isEmpty()) {
            return null;
        }
        if (selectNodes.size() > 1) {
            for (Object obj : selectNodes) {
                if ((obj instanceof DefaultElement) && str.equals(((DefaultElement) obj).attributeValue(ATTR_ISSN_PUB_TYPE))) {
                    return (Node) obj;
                }
            }
        }
        return (Node) selectNodes.iterator().next();
    }

    protected static OutputFormat disableTrimming(OutputFormat outputFormat) {
        outputFormat.setTrimText(false);
        return outputFormat;
    }

    public static String buildJournalId(String str) {
        return "urn:eudml:doc:" + str;
    }

    public static String buildIssueId(String str, String str2) {
        return "urn:eudml:doc:" + str + EUDML_ID_SEPARATOR + str2;
    }

    protected static boolean isArtIdElement(Object obj) {
        return (obj instanceof DefaultElement) && ARTICLE_ID_ELEMENT.equals(((DefaultElement) obj).getName());
    }

    public static String normalizeISSN(String str) {
        return str.replaceAll("\\D", "");
    }

    public static String normalizeJournalTitle(String str) {
        return DigestUtils.md5Hex(str);
    }

    public static String normalizeIssue(String str) {
        return str.toLowerCase().replaceAll("[^a-z_0-9]", "_").replaceAll("_+", "_").replaceAll("_$", "").replaceAll("^_", "");
    }

    protected static boolean isJournalIdElement(Object obj) {
        return (obj instanceof DefaultElement) && JOURNAL_ID_ELEMENT.equals(((DefaultElement) obj).getName());
    }

    public static String stringValue(Document document, String str, Namespace... namespaceArr) {
        XPath createXPath = document.createXPath(str);
        createXPath.setNamespaceURIs(nsMap(namespaceArr));
        return createXPath.valueOf(document);
    }

    public static Document applyStylesheet(Document document, String str) throws TransformerException {
        Transformer newTransformer = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", null).newTransformer(new StreamSource(NlmProcessorHelper.class.getClassLoader().getResourceAsStream(str)));
        DocumentSource documentSource = new DocumentSource(document);
        DocumentResult documentResult = new DocumentResult();
        newTransformer.transform(documentSource, documentResult);
        return documentResult.getDocument();
    }

    private static Map<String, String> nsMap(Namespace... namespaceArr) {
        HashMap hashMap = new HashMap(namespaceArr.length);
        for (Namespace namespace : namespaceArr) {
            hashMap.put(namespace.getPrefix(), namespace.getURI());
        }
        return hashMap;
    }

    public static Document parseNLM(String str) throws DocumentException {
        return DocumentHelper.parseText(str);
    }

    public static String getPDFURLforEnhancing(String str) throws DocumentException {
        String stringValue = stringValue(parseNLM(str), XPATH_PDF_FULLTEXT_URL, XLINK_NAMESPACE);
        if (stringValue.isEmpty()) {
            stringValue = stringValue(parseNLM(str), "//front/article-meta/self-uri[@content-type='application/pdf']/@xlink:href", XLINK_NAMESPACE);
        }
        return stringValue;
    }
}
