package eu.eudml.util.nlm;

import eu.eudml.service.EudmlServiceException;
import eu.eudml.service.idmanager.IdManagerFacade;
import eu.eudml.service.idmanager.Identifier;
import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamSource;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.XPath;
import org.dom4j.io.DocumentResult;
import org.dom4j.io.DocumentSource;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.dom4j.tree.DefaultElement;
import org.jaxen.JaxenException;
import org.jaxen.NamespaceContext;
import org.jaxen.SimpleNamespaceContext;
import org.jaxen.dom4j.Dom4jXPath;
import org.xml.sax.SAXException;

/* loaded from: input_file:eu/eudml/util/nlm/NlmProcessorHelper.class */
public class NlmProcessorHelper {

    /* loaded from: input_file:eu/eudml/util/nlm/NlmProcessorHelper$TYPE_OF_DOCUMENT.class */
    public enum TYPE_OF_DOCUMENT {
        ARTICLE_FROM_JOURNAL,
        ARTICLE_FROM_BOOK,
        BOOK
    }

    /* loaded from: input_file:eu/eudml/util/nlm/NlmProcessorHelper$TYPE_OF_GENERATED_IDS.class */
    public enum TYPE_OF_GENERATED_IDS {
        GENERATED_ARTICLE_ID,
        GENERATED_BOOK_ID,
        GENERATED_MBOOK_ID,
        GENERATED_JOURNAL_ID,
        GENERATED_ISSUE_ID,
        GENERATED_VOLUME_ID,
        GENERATED_YEAR_ID
    }

    public static Map<TYPE_OF_GENERATED_IDS, Identifier> generateCustomIdsForNLM(Document document, IdManagerFacade idManagerFacade, String str) throws EudmlServiceException, DocumentException, SAXException, JaxenException, IOException {
        return generateCustomIdsForNLM(document, idManagerFacade, str, false);
    }

    public static Map<TYPE_OF_GENERATED_IDS, Identifier> generateCustomIdsForNLM(Document document, IdManagerFacade idManagerFacade, boolean z) throws EudmlServiceException, DocumentException, SAXException, JaxenException, IOException {
        return generateCustomIdsForNLM(document, idManagerFacade, null, true);
    }

    protected static Map<TYPE_OF_GENERATED_IDS, Identifier> generateCustomIdsForNLM(Document document, IdManagerFacade idManagerFacade, String str, boolean z) throws EudmlServiceException, DocumentException, SAXException, JaxenException, IOException {
        TYPE_OF_DOCUMENT documentType = getDocumentType(document);
        HashMap hashMap = new HashMap(0);
        switch (documentType) {
            case ARTICLE_FROM_JOURNAL:
                if (!z) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_ARTICLE_ID, NlmProcessorHelperCommon.generateArticleId(document, idManagerFacade, str, true));
                }
                Identifier generateJournalId = NlmProcessorHelperCommon.generateJournalId(document, idManagerFacade);
                String value = generateJournalId.getValue();
                Identifier generateYearId = NlmProcessorHelperCommon.generateYearId(document, value);
                Identifier generateVolumeId = NlmProcessorHelperCommon.generateVolumeId(document, value);
                Identifier generateIssueId = NlmProcessorHelperCommon.generateIssueId(document, value);
                hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_JOURNAL_ID, generateJournalId);
                hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_YEAR_ID, generateYearId);
                hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_VOLUME_ID, generateVolumeId);
                hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_ISSUE_ID, generateIssueId);
                break;
            case ARTICLE_FROM_BOOK:
                if (!z) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_ARTICLE_ID, NlmProcessorHelperCommon.generateArticleId(document, idManagerFacade, str, false));
                }
                if (checkIfHasBookMeta(document)) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_BOOK_ID, NlmProcessorHelperCommon.generateBookId(document, idManagerFacade, null, false));
                }
                if (checkIfHasMBookMeta(document)) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_MBOOK_ID, NlmProcessorHelperCommon.generateMbookId(document, idManagerFacade));
                    break;
                }
                break;
            case BOOK:
                if (!z) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_BOOK_ID, NlmProcessorHelperCommon.generateBookId(document, idManagerFacade, str, true));
                }
                if (checkIfHasMBookMeta(document)) {
                    hashMap.put(TYPE_OF_GENERATED_IDS.GENERATED_MBOOK_ID, NlmProcessorHelperCommon.generateMbookId(document, idManagerFacade));
                    break;
                }
                break;
            default:
                throw new DocumentException("Document is neither an article nor a book");
        }
        return hashMap;
    }

    public static TYPE_OF_DOCUMENT getDocumentType(Document document) throws JaxenException {
        Element rootElement = document.getRootElement();
        TYPE_OF_DOCUMENT type_of_document = TYPE_OF_DOCUMENT.BOOK;
        if (checkIfArticle(rootElement)) {
            type_of_document = TYPE_OF_DOCUMENT.ARTICLE_FROM_JOURNAL;
            if (checkIfHasBookMeta(document)) {
                type_of_document = TYPE_OF_DOCUMENT.ARTICLE_FROM_BOOK;
            }
        }
        return type_of_document;
    }

    public static IdentifiedNLM enrichNLMWithCustomIds(Document document, boolean z, Map<TYPE_OF_GENERATED_IDS, Identifier> map) throws EudmlServiceException, DocumentException, SAXException, JaxenException {
        Element rootElement = document.getRootElement();
        String namespaceURI = rootElement.getNamespaceURI();
        HashMap hashMap = new HashMap();
        hashMap.put(NlmConstants.XPATH_NS_PREFIX, namespaceURI);
        SimpleNamespaceContext simpleNamespaceContext = new SimpleNamespaceContext(hashMap);
        DocumentFactory documentFactory = DocumentFactory.getInstance();
        boolean z2 = true;
        if (checkIfArticle(rootElement) && checkIfHasBookMeta(document)) {
            z2 = false;
        }
        for (TYPE_OF_GENERATED_IDS type_of_generated_ids : map.keySet()) {
            switch (type_of_generated_ids) {
                case GENERATED_ARTICLE_ID:
                    NlmProcessorHelperCommon.addArticleIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids), z2);
                    break;
                case GENERATED_JOURNAL_ID:
                    NlmProcessorHelperCommon.addJournalIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids));
                    break;
                case GENERATED_BOOK_ID:
                    NlmProcessorHelperCommon.addBookIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids));
                    break;
                case GENERATED_MBOOK_ID:
                    NlmProcessorHelperCommon.addMbookIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids));
                    break;
                case GENERATED_ISSUE_ID:
                    NlmProcessorHelperCommon.addIssueIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids), z2);
                    break;
                case GENERATED_VOLUME_ID:
                    NlmProcessorHelperCommon.addVolumeIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids), z2);
                    break;
                case GENERATED_YEAR_ID:
                    NlmProcessorHelperCommon.addYearIdToNlm(document, documentFactory, simpleNamespaceContext, namespaceURI, map.get(type_of_generated_ids), z2);
                    break;
            }
        }
        Identifier identifier = map.get(TYPE_OF_GENERATED_IDS.GENERATED_ARTICLE_ID);
        Identifier identifier2 = map.get(TYPE_OF_GENERATED_IDS.GENERATED_BOOK_ID);
        try {
            String documentToString = documentToString(document, z);
            if (checkIfArticle(rootElement)) {
                return new IdentifiedNLM(identifier, documentToString);
            }
            if (checkIfBook(rootElement)) {
                return new IdentifiedNLM(identifier2, documentToString);
            }
            throw new DocumentException("it is not a book and not an article");
        } catch (IOException e) {
            throw new DocumentException("unable to write NLM to output!", e);
        }
    }

    public static Document getDocument(Reader reader) throws DocumentException {
        return new SAXReader().read(reader);
    }

    public static String documentToString(Document document, boolean z) throws IOException {
        StringWriter stringWriter = new StringWriter();
        new XMLWriter(stringWriter, z ? OutputFormat.createPrettyPrint() : disableTrimming(OutputFormat.createCompactFormat())).write(document);
        return stringWriter.toString();
    }

    public static String getDocumentProvider(Document document) throws JaxenException, EudmlServiceException {
        String stringValue = stringValue(document, NlmConstants.XPATH_PROVIDER_NAME, NlmConstants.XLINK_NAMESPACE);
        if (stringValue != null) {
            stringValue = stringValue.toLowerCase();
        }
        return stringValue;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Node getIssnNode(Document document, NamespaceContext namespaceContext, String str) throws JaxenException {
        Dom4jXPath dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_ISSN);
        dom4jXPath.setNamespaceContext(namespaceContext);
        List selectNodes = dom4jXPath.selectNodes(document);
        if (selectNodes.isEmpty()) {
            return null;
        }
        for (Object obj : selectNodes) {
            if (obj instanceof DefaultElement) {
                String attributeValue = ((DefaultElement) obj).attributeValue(NlmConstants.ATTR_ISSN_PUB_TYPE);
                if (str.equals(attributeValue)) {
                    return (Node) obj;
                }
                if (str.equals(NlmConstants.ISSN_TYPE_PPUB) && attributeValue == null) {
                    return (Node) obj;
                }
            }
        }
        return null;
    }

    protected static OutputFormat disableTrimming(OutputFormat outputFormat) {
        outputFormat.setTrimText(false);
        return outputFormat;
    }

    public static String normalizeISSN(String str) {
        return str.replaceAll("\\D", "");
    }

    public static String normalizeISBN(String str) {
        return str.replaceAll("\\D", "");
    }

    public static String normalizeIssue(String str) {
        return str.toLowerCase().replaceAll("[^a-z_0-9]", NlmConstants.EUDML_ID_SEPARATOR).replaceAll("_+", NlmConstants.EUDML_ID_SEPARATOR).replaceAll("_$", "").replaceAll("^_", "");
    }

    public static String stringValue(Document document, String str, Namespace... namespaceArr) {
        XPath createXPath = document.createXPath(str);
        createXPath.setNamespaceURIs(nsMap(namespaceArr));
        return createXPath.valueOf(document);
    }

    public static List<String> stringValues(Document document, String str, Namespace... namespaceArr) {
        XPath createXPath = document.createXPath(str);
        createXPath.setNamespaceURIs(nsMap(namespaceArr));
        List selectNodes = createXPath.selectNodes(document);
        ArrayList arrayList = new ArrayList();
        for (Object obj : selectNodes) {
            if (obj instanceof Attribute) {
                arrayList.add(((Attribute) obj).getValue());
            } else {
                if (!(obj instanceof Node)) {
                    throw new RuntimeException("Unexpected type of answer");
                }
                arrayList.add(((Node) obj).asXML());
            }
        }
        return arrayList;
    }

    public static Document applyStylesheet(Document document, String str) throws TransformerException {
        Transformer newTransformer = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", null).newTransformer(new StreamSource(NlmProcessorHelper.class.getClassLoader().getResourceAsStream(str)));
        DocumentSource documentSource = new DocumentSource(document);
        DocumentResult documentResult = new DocumentResult();
        newTransformer.transform(documentSource, documentResult);
        return documentResult.getDocument();
    }

    private static Map<String, String> nsMap(Namespace... namespaceArr) {
        HashMap hashMap = new HashMap(namespaceArr.length);
        for (Namespace namespace : namespaceArr) {
            hashMap.put(namespace.getPrefix(), namespace.getURI());
        }
        return hashMap;
    }

    public static Document parseNLM(String str) throws DocumentException {
        return DocumentHelper.parseText(str);
    }

    public static List<FulltextURL> getURLsForFulltext(String str, String str2) throws DocumentException {
        Document parseNLM = parseNLM(str);
        return getFulltextURLs(parseNLM, str2, NlmConstants.XLINK_NAMESPACE, Namespace.get(NlmConstants.XPATH_NS_PREFIX, parseNLM.getRootElement().getNamespaceURI()));
    }

    private static List<FulltextURL> getFulltextURLs(Document document, String str, Namespace... namespaceArr) {
        XPath createXPath = document.createXPath(str);
        createXPath.setNamespaceURIs(nsMap(namespaceArr));
        List selectNodes = createXPath.selectNodes(document);
        ArrayList arrayList = new ArrayList();
        for (Object obj : selectNodes) {
            if (!(obj instanceof Element)) {
                throw new RuntimeException("Unexpected type of answer");
            }
            arrayList.add(elementToFulltextURL((Element) obj));
        }
        return arrayList;
    }

    private static FulltextURL elementToFulltextURL(Element element) {
        FulltextURL fulltextURL = new FulltextURL();
        Iterator attributeIterator = element.attributeIterator();
        while (attributeIterator.hasNext()) {
            Attribute attribute = (Attribute) attributeIterator.next();
            if (FulltextURL.SPECIFIC_USE_ATTRIBUTE.equals(attribute.getName())) {
                fulltextURL.parseAndSetSpecificUses(attribute.getValue());
            } else if (FulltextURL.HREF_ATTRIBUTE.equals(attribute.getName())) {
                fulltextURL.setHref(attribute.getValue());
            }
        }
        return fulltextURL;
    }

    public static boolean checkIfArticle(Element element) {
        return element.getName().equals("article");
    }

    public static boolean checkIfBook(Element element) {
        return element.getName().equals("book");
    }

    protected static boolean checkIfHasBookMeta(Document document) throws JaxenException {
        Dom4jXPath dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_BOOK_META);
        dom4jXPath.setNamespaceContext(NlmProcessorHelperCommon.getNamespaceContext(document));
        return !dom4jXPath.selectNodes(document).isEmpty();
    }

    protected static boolean checkIfHasMBookMeta(Document document) throws JaxenException {
        Dom4jXPath dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_MBOOK_META);
        dom4jXPath.setNamespaceContext(NlmProcessorHelperCommon.getNamespaceContext(document));
        return !dom4jXPath.selectNodes(document).isEmpty();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Node getIsbnNode(Document document, NamespaceContext namespaceContext, String str) throws JaxenException, EudmlServiceException {
        Dom4jXPath dom4jXPath = new Dom4jXPath(str);
        dom4jXPath.setNamespaceContext(namespaceContext);
        List selectNodes = dom4jXPath.selectNodes(document);
        if (selectNodes.isEmpty()) {
            return null;
        }
        if (selectNodes.size() > 1) {
            throw new EudmlServiceException("more than one ISBN, is this ok?" + document.asXML());
        }
        return (Node) selectNodes.iterator().next();
    }

    public static String normalizeTitle(String str) {
        return str.replaceAll("[^\\p{L}\\p{N} ]", "").replaceAll("(\\p{C}|\\p{Z})+", " ").replaceAll("^(\\p{C}|\\p{Z})+|(\\p{C}|\\p{Z})+$", "").toLowerCase();
    }

    public static String nullToUnknown(String str) {
        return str == null ? "unknown" : str;
    }

    public static Set<Identifier> getDocumentIds(Document document, TYPE_OF_GENERATED_IDS type_of_generated_ids) throws JaxenException, DocumentException {
        String str;
        HashSet hashSet = new HashSet(0);
        TYPE_OF_DOCUMENT documentType = getDocumentType(document);
        Dom4jXPath dom4jXPath = null;
        switch (type_of_generated_ids) {
            case GENERATED_ARTICLE_ID:
                if (documentType.equals(TYPE_OF_DOCUMENT.ARTICLE_FROM_JOURNAL)) {
                    dom4jXPath = new Dom4jXPath("//nlm:front/nlm:article-meta/nlm:article-id");
                } else if (documentType.equals(TYPE_OF_DOCUMENT.ARTICLE_FROM_BOOK)) {
                    dom4jXPath = new Dom4jXPath("//nlm:front/nlm:article-meta/nlm:article-id");
                }
                str = "pub-id-type";
                break;
            case GENERATED_JOURNAL_ID:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_JOURNAL_ID);
                str = NlmConstants.ATTR_JOURNAL_ID_TYPE;
                break;
            case GENERATED_BOOK_ID:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_BOOK_ID);
                str = "pub-id-type";
                break;
            case GENERATED_MBOOK_ID:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_MBOOK_ID);
                str = "pub-id-type";
                break;
            default:
                throw new DocumentException("Not allowed type of id");
        }
        dom4jXPath.setNamespaceContext(NlmProcessorHelperCommon.getNamespaceContext(document));
        for (Object obj : dom4jXPath.selectNodes(document)) {
            if (!(obj instanceof DefaultElement)) {
                throw new DocumentException("invalid element instance " + obj.getClass() + ", expected " + DefaultElement.class);
            }
            DefaultElement defaultElement = (DefaultElement) obj;
            hashSet.add(new Identifier(defaultElement.attributeValue(str), defaultElement.getText()));
        }
        return hashSet;
    }

    public static Set<Identifier> filterIdsByIdType(Set<Identifier> set, Set<String> set2) {
        HashSet hashSet = new HashSet(0);
        for (Identifier identifier : set) {
            if (set2.contains(identifier.getType())) {
                hashSet.add(identifier);
            }
        }
        return hashSet;
    }

    public static Set<Identifier> filterIdsByIdType(Set<Identifier> set, String str) {
        HashSet hashSet = new HashSet(0);
        hashSet.add(str);
        return filterIdsByIdType(set, hashSet);
    }

    public static Set<Identifier> extractExtraIds(Document document) throws JaxenException, DocumentException, EudmlServiceException {
        Dom4jXPath dom4jXPath;
        TYPE_OF_GENERATED_IDS type_of_generated_ids;
        HashSet hashSet = new HashSet();
        switch (getDocumentType(document)) {
            case ARTICLE_FROM_JOURNAL:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_ARTICLE_EXTRA_ID);
                type_of_generated_ids = TYPE_OF_GENERATED_IDS.GENERATED_ARTICLE_ID;
                break;
            case ARTICLE_FROM_BOOK:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_ARTICLE_EXTRA_ID);
                type_of_generated_ids = TYPE_OF_GENERATED_IDS.GENERATED_ARTICLE_ID;
                break;
            case BOOK:
                dom4jXPath = new Dom4jXPath(NlmConstants.XPATH_NS_BOOK_EXTRA_ID);
                type_of_generated_ids = TYPE_OF_GENERATED_IDS.GENERATED_BOOK_ID;
                break;
            default:
                throw new DocumentException("Document is neither article nor book");
        }
        dom4jXPath.setNamespaceContext(NlmProcessorHelperCommon.getNamespaceContext(document));
        for (Object obj : dom4jXPath.selectNodes(document)) {
            if (obj instanceof DefaultElement) {
                DefaultElement defaultElement = (DefaultElement) obj;
                hashSet.add(new Identifier(defaultElement.attributeValue(NlmConstants.ATTR_ART_EXTLINK_ID_TYPE), defaultElement.getText()));
            }
        }
        hashSet.addAll(getDocumentIds(document, type_of_generated_ids));
        return hashSet;
    }
}
