package pl.edu.icm.yadda.imports.bwnjournal;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang.StringUtils;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import pl.edu.icm.yadda.common.utils.Utils;
import pl.edu.icm.yadda.imports.utils.Dom4jUtils;
import pl.edu.icm.yadda.repo.id.YaddaIdConstants;
import pl.edu.icm.yadda.repo.model.Affiliation;
import pl.edu.icm.yadda.repo.model.Content;
import pl.edu.icm.yadda.repo.model.Contributor;
import pl.edu.icm.yadda.repo.model.Element;
import pl.edu.icm.yadda.repo.model.Location;
import pl.edu.icm.yadda.repo.model.ModelUtils;
import pl.edu.icm.yadda.repo.model.Name;
import pl.edu.icm.yadda.repo.model.builder.AttributableBuilder;
import pl.edu.icm.yadda.repo.model.builder.ContributorBuilder;
import pl.edu.icm.yadda.repo.model.builder.DescriptableBuilder;
import pl.edu.icm.yadda.repo.model.builder.ElementBuilder;
import pl.edu.icm.yadda.tools.textcat.LanguageIdentifierBean;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.12.5.jar:pl/edu/icm/yadda/imports/bwnjournal/BwnjournalParser.class */
public class BwnjournalParser {
    private LanguageIdentifierBean langIdentifier;
    private File currentFile;
    private Set<String> pacsErrors = new LinkedHashSet();
    private Set<String> mscErrors = new LinkedHashSet();
    private static final Logger log = LoggerFactory.getLogger(BwnjournalParser.class);
    private static final Properties LANG_MAP = new Properties();

    /* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.12.5.jar:pl/edu/icm/yadda/imports/bwnjournal/BwnjournalParser$NoEntityResolver.class */
    public static class NoEntityResolver implements EntityResolver {
        @Override // org.xml.sax.EntityResolver
        public InputSource resolveEntity(String str, String str2) throws SAXException, IOException {
            InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream("pl/edu/icm/yadda/imports/bwnjournal/bwnjournal-1.0.dtd");
            if (resourceAsStream == null) {
                BwnjournalParser.log.error("resource pl/edu/icm/yadda/imports/bwnjournal/bwnjournal-1.0.dtd not found");
            }
            return new InputSource(resourceAsStream);
        }
    }

    public BwnjournalParser() {
        try {
            this.langIdentifier = new LanguageIdentifierBean();
        } catch (Exception e) {
            log.error("Exception while initializing language identifier", (Throwable) e);
            this.langIdentifier = null;
        }
    }

    public void parse(String str, Map<String, Element> map) throws ParserException {
        File file = new File(str);
        if (file.exists()) {
            parse(file, map);
        } else {
            log.error("File not exists " + str);
        }
    }

    public void parse(File file, Map<String, Element> map) throws ParserException {
        this.currentFile = file;
        SAXReader sAXReader = new SAXReader();
        sAXReader.setEntityResolver(new NoEntityResolver());
        sAXReader.setIncludeExternalDTDDeclarations(false);
        sAXReader.setIncludeInternalDTDDeclarations(false);
        sAXReader.setValidation(true);
        try {
            org.dom4j.Element rootElement = sAXReader.read(file).getRootElement();
            try {
                Iterator it = rootElement.elements("journal").iterator();
                while (it.hasNext()) {
                    transformJournal((org.dom4j.Element) it.next(), map);
                }
                Iterator it2 = rootElement.elements("article").iterator();
                while (it2.hasNext()) {
                    transformArticle((org.dom4j.Element) it2.next(), map);
                }
            } catch (Exception e) {
                throw new ParserException("Parse error (" + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END, e);
            }
        } catch (DocumentException e2) {
            throw new ParserException("Could not get XML root element (" + file.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END, e2);
        }
    }

    private void transformJournal(org.dom4j.Element element, Map<String, Element> map) throws ParserException {
        String attributeValue = element.attributeValue("id");
        if (attributeValue == null) {
            throw new ParserException("Journal without id");
        }
        String journalId = BwnjournalIdGenerator.getJournalId(attributeValue);
        if (map.containsKey(journalId)) {
            return;
        }
        Element element2 = new Element();
        ElementBuilder elementBuilder = new ElementBuilder(element2);
        elementBuilder.setExtId(journalId).setLangs(mapLangs(element.attributeValue("langs")));
        updateDescriptions(elementBuilder, element);
        updateNames(elementBuilder, element);
        updateIdentifiers(elementBuilder, element);
        elementBuilder.addLevel(YaddaIdConstants.ID_LEVEL_JOURNAL_JOURNAL, findPublisher(element, attributeValue, map));
        map.put(journalId, element2);
    }

    private void updateDescriptions(ElementBuilder elementBuilder, org.dom4j.Element element) {
        String str = null;
        String str2 = null;
        for (org.dom4j.Element element2 : element.elements("description")) {
            String mapLang = mapLang(element2.attributeValue("lang"));
            String textTrim = element2.getTextTrim();
            if (str2 == null || "yes".equals(Dom4jUtils.getAttributeValue(element2, "default"))) {
                str = textTrim;
                str2 = mapLang;
            }
            elementBuilder.addDescription(element2.getTextTrim(), mapLang(element2.attributeValue("lang")));
        }
        if (str != null) {
            elementBuilder.setDefaultDescription(str, str2);
        }
    }

    private void updateNames(DescriptableBuilder descriptableBuilder, org.dom4j.Element element) {
        String str = null;
        String str2 = null;
        for (org.dom4j.Element element2 : element.elements("name")) {
            String mapLang = mapLang(element2.attributeValue("lang"));
            String textTrim = element2.getTextTrim();
            if (str == null || "yes".equals(Dom4jUtils.getAttributeValue(element2, "default"))) {
                str = textTrim;
                str2 = mapLang;
            }
            descriptableBuilder.addName(textTrim, mapLang);
        }
        if (str != null) {
            descriptableBuilder.setDefaultName(str, str2);
        }
    }

    private void updateIdentifiers(ElementBuilder elementBuilder, org.dom4j.Element element) throws ParserException {
        for (org.dom4j.Element element2 : element.elements("id")) {
            String attributeValue = element2.attributeValue("class");
            if (Utils.emptyStr(attributeValue)) {
                throw new ParserException("id without class");
            }
            if (!"issn".equals(attributeValue.trim().toLowerCase())) {
                throw new ParserException("Unknown id class (" + attributeValue + DefaultExpressionEngine.DEFAULT_INDEX_END);
            }
            elementBuilder.addIdentifier("bwmeta1.id-class.ISSN", element2.getTextTrim());
        }
    }

    private String findPublisher(org.dom4j.Element element, String str, Map<String, Element> map) throws ParserException {
        List elements = element.elements("contributor");
        if (elements.isEmpty()) {
            throw new ParserException("Journal without contributor (journalId:" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        if (elements.size() > 1) {
            throw new ParserException("Journal has more than one contributor");
        }
        org.dom4j.Element element2 = (org.dom4j.Element) elements.get(0);
        String attributeValue = Dom4jUtils.getAttributeValue(element2, "role");
        if (!"publisher".equals(attributeValue)) {
            throw new ParserException("Unknown journal's contributor role (journalId:" + str + " role:" + attributeValue + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        List elements2 = element2.elements("institution");
        if (elements2.isEmpty()) {
            throw new ParserException("Journal's contributor has no institution (journalId:" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        if (elements2.size() > 1) {
            throw new ParserException("More than one instituion specified in journal's contributor (journalId:" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        org.dom4j.Element element3 = (org.dom4j.Element) elements2.get(0);
        Element element4 = new Element();
        ElementBuilder elementBuilder = new ElementBuilder(element4);
        updateNames(elementBuilder, element3);
        Name defaultName = ModelUtils.getDefaultName(element4);
        if (defaultName == null || Utils.blankStr(defaultName.getText())) {
            throw new ParserException("Journal's contributor has institution without name (journalId:" + str + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String text = defaultName.getText();
        String publisherId = BwnjournalIdGenerator.getPublisherId(text);
        if (!map.containsKey(publisherId)) {
            element4.setExtId(publisherId);
            elementBuilder.addLevel(YaddaIdConstants.ID_LEVEL_JOURNAL_PUBLISHER, null);
            String lang = defaultName.getLang();
            if (Utils.emptyStr(lang)) {
                lang = this.langIdentifier.classify(text);
            }
            element4.setLangs(lang);
            Contributor contributor = new Contributor();
            contributor.setRole("publisher");
            contributor.setTitle(text);
            AttributableBuilder.addAttribute(contributor, "institution.name", text);
            String subelementText = Dom4jUtils.getSubelementText(element3, "address");
            if (!Utils.emptyStr(subelementText)) {
                AttributableBuilder.addAttribute(contributor, "institution.address", subelementText);
            }
            element4.addContributor(contributor);
            map.put(publisherId, element4);
        }
        return publisherId;
    }

    private void transformArticle(org.dom4j.Element element, Map<String, Element> map) throws ParserException {
        String attributeValue = Dom4jUtils.getAttributeValue(element, "id");
        if (Utils.emptyStr(attributeValue)) {
            throw new ParserException("Article without id!");
        }
        String articleId = BwnjournalIdGenerator.getArticleId(attributeValue);
        if (map.containsKey(articleId)) {
            throw new ParserException("Two articles with the same id (" + attributeValue + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        BwnjournalElement bwnjournalElement = new BwnjournalElement();
        bwnjournalElement.setBwnjournalId(attributeValue);
        bwnjournalElement.setExtId(articleId);
        bwnjournalElement.setLangs(mapLangs(element.attributeValue("langs")));
        ElementBuilder elementBuilder = new ElementBuilder(bwnjournalElement);
        updateJournalHierarchy(bwnjournalElement, element, map);
        updateDescriptions(elementBuilder, element);
        updateNames(elementBuilder, element);
        updateIdentifiers(elementBuilder, element);
        updateContributors(elementBuilder, element);
        updateDates(elementBuilder, element);
        updateKeywords(elementBuilder, element);
        updateCategories(elementBuilder, element);
        updateCites(elementBuilder, element);
        updateRemoteContent(elementBuilder, element);
        map.put(articleId, bwnjournalElement);
    }

    private void logPacsError(String str) {
        this.pacsErrors.add(this.currentFile.getAbsolutePath() + ":\t[" + str + "]");
    }

    private void logMscError(String str) {
        this.mscErrors.add(this.currentFile.getAbsolutePath() + ":\t[" + str + "]");
    }

    private void updateCategories(ElementBuilder elementBuilder, org.dom4j.Element element) {
        for (org.dom4j.Element element2 : element.elements("categories")) {
            if (element2.attributeValue("type") != null) {
                if (element2.attributeValue("type").toLowerCase().startsWith("mathematics subject classification")) {
                    Iterator it = element2.elements("c").iterator();
                    while (it.hasNext()) {
                        String textTrim = ((org.dom4j.Element) it.next()).getTextTrim();
                        if (textTrim.matches("[0-9]{2}[A-Z\\-][0-9xp]{2}")) {
                            elementBuilder.addMscNumber(textTrim);
                        } else {
                            logMscError(textTrim);
                        }
                    }
                }
                if ("PACS".equals(element2.attributeValue("type"))) {
                    Iterator it2 = element2.elements("c").iterator();
                    while (it2.hasNext()) {
                        String textTrim2 = ((org.dom4j.Element) it2.next()).getTextTrim();
                        if (textTrim2.matches("\\S{2}\\.\\S{2}\\.\\S{2}")) {
                            elementBuilder.addPacsNumber(textTrim2);
                        } else {
                            logPacsError(textTrim2);
                        }
                    }
                }
            }
        }
    }

    public void logPacsErrors() {
        if (this.pacsErrors.isEmpty()) {
            return;
        }
        log.warn("Following PACS numbers are invalid");
        Iterator<String> it = this.pacsErrors.iterator();
        while (it.hasNext()) {
            log.warn(it.next());
        }
    }

    public void logMscErrors() {
        if (this.mscErrors.isEmpty()) {
            return;
        }
        log.warn("Following MSC numbers are invalid");
        Iterator<String> it = this.mscErrors.iterator();
        while (it.hasNext()) {
            log.warn(it.next());
        }
    }

    private void updateKeywords(ElementBuilder elementBuilder, org.dom4j.Element element) {
        for (org.dom4j.Element element2 : element.elements("keywords")) {
            String mapLang = mapLang(element2.attributeValue("lang"));
            Iterator it = element2.elements("k").iterator();
            while (it.hasNext()) {
                elementBuilder.addKeyword(((org.dom4j.Element) it.next()).getTextTrim(), mapLang);
            }
        }
    }

    private void updateDates(ElementBuilder elementBuilder, org.dom4j.Element element) {
        for (org.dom4j.Element element2 : element.elements("date")) {
            elementBuilder.addDate(element2.getTextTrim(), element2.attributeValue("type"));
        }
    }

    private void updateContributors(ElementBuilder elementBuilder, org.dom4j.Element element) {
        org.dom4j.Element element2;
        int i = 0;
        for (org.dom4j.Element element3 : element.elements("contributor")) {
            ContributorBuilder contributorBuilder = new ContributorBuilder();
            int i2 = i;
            i++;
            contributorBuilder.setIndex(String.format("%05d", Integer.valueOf(i2)));
            contributorBuilder.setRole(element3.attributeValue("role"));
            contributorBuilder.setTitle(element3.attributeValue("title"));
            org.dom4j.Element element4 = element3.element("person");
            if (element4 != null) {
                String attributeValue = element4.attributeValue("firstname");
                String attributeValue2 = element4.attributeValue("surname");
                if (attributeValue != null) {
                    contributorBuilder.setFirstName(attributeValue);
                }
                if (attributeValue2 != null) {
                    contributorBuilder.setLastName(attributeValue2);
                }
            }
            org.dom4j.Element element5 = element3.element("contact");
            if (element5 != null) {
                String attributeValue3 = element5.attributeValue("type");
                String textTrim = element5.getTextTrim();
                if ("email".equals(attributeValue3)) {
                    contributorBuilder.addAttribute("contact.email", textTrim);
                }
            }
            org.dom4j.Element element6 = element3.element("affiliation");
            if (element6 != null && (element2 = element6.element("institution")) != null) {
                StringBuilder sb = new StringBuilder();
                org.dom4j.Element element7 = element2.element("name");
                if (element7 != null) {
                    sb.append(element7.getText());
                }
                org.dom4j.Element element8 = element2.element("address");
                if (element8 != null) {
                    sb.append(sb.length() > 0 ? ", " : "").append(element8.getText());
                }
                Affiliation affiliation = new Affiliation();
                new AttributableBuilder(affiliation).addAttribute("text", sb.toString());
                contributorBuilder.addAffiliation(affiliation);
            }
            elementBuilder.addContributor(contributorBuilder.build());
        }
    }

    private void updateRemoteContent(ElementBuilder elementBuilder, org.dom4j.Element element) {
        for (org.dom4j.Element element2 : element.elements("contents")) {
            Content content = new Content();
            for (org.dom4j.Element element3 : element2.elements("location")) {
                Location location = new Location();
                org.dom4j.Element element4 = element3.element("format");
                org.dom4j.Element element5 = element3.element("localisation");
                if (element5 != null && element5.attributeValue("remote").equals("yes")) {
                    location.setName(element3.attributeValue("name"));
                    location.setLocalisationRemote(true);
                    location.setLocalisationType(element5.attributeValue("type"));
                    location.setLocalisationAddress(element5.getTextTrim());
                    if (element4 != null) {
                        location.setFormatType(element4.attributeValue("type"));
                        location.setFormatSize(element4.attributeValue("size"));
                        location.setFormatText(element4.getTextTrim());
                    }
                    content.addLocation(location);
                }
            }
            if (!content.getLocationSet().isEmpty()) {
                elementBuilder.addContent(content);
            }
        }
    }

    private void updateJournalHierarchy(BwnjournalElement bwnjournalElement, org.dom4j.Element element, Map<String, Element> map) throws ParserException {
        org.dom4j.Element element2 = element.element("hierarchy");
        if (element2 == null) {
            throw new ParserException("Article without 'hierarchy' element");
        }
        org.dom4j.Element element3 = element2.element("journal-ref");
        String attributeValue = element3 == null ? null : Dom4jUtils.getAttributeValue(element3, "ref");
        if (Utils.emptyStr(attributeValue)) {
            throw new ParserException("Article without journal reference (" + bwnjournalElement.getBwnjournalId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String journalId = BwnjournalIdGenerator.getJournalId(attributeValue);
        if (!map.containsKey(journalId)) {
            throw new ParserException("Article points to unknown journal (article:" + bwnjournalElement.getBwnjournalId() + ", journal:" + attributeValue + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String attributeValue2 = Dom4jUtils.getAttributeValue(element2, "year");
        if (Utils.emptyStr(attributeValue2)) {
            throw new ParserException("Article without 'year' reference (" + bwnjournalElement.getBwnjournalId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String attributeValue3 = Dom4jUtils.getAttributeValue(element2, "volume");
        if (Utils.emptyStr(attributeValue3)) {
            throw new ParserException("Article without 'volume' reference (" + bwnjournalElement.getBwnjournalId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String attributeValue4 = Dom4jUtils.getAttributeValue(element2, "number");
        if (Utils.emptyStr(attributeValue4)) {
            throw new ParserException("Article without 'number' reference (" + bwnjournalElement.getBwnjournalId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
        }
        String yearId = BwnjournalIdGenerator.getYearId(attributeValue, attributeValue2);
        resolveJournalHierarchyElement(yearId, journalId, attributeValue2, YaddaIdConstants.ID_LEVEL_JOURNAL_YEAR, map);
        String volumeId = BwnjournalIdGenerator.getVolumeId(attributeValue, attributeValue2, attributeValue3);
        resolveJournalHierarchyElement(volumeId, yearId, attributeValue3, YaddaIdConstants.ID_LEVEL_JOURNAL_VOLUME, map);
        String numberId = BwnjournalIdGenerator.getNumberId(attributeValue, attributeValue2, attributeValue3, attributeValue4);
        resolveJournalHierarchyElement(numberId, volumeId, attributeValue4, YaddaIdConstants.ID_LEVEL_JOURNAL_NUMBER, map);
        bwnjournalElement.setParentExtid(numberId);
        org.dom4j.Element element4 = element2.element("position");
        String str = null;
        String str2 = null;
        if (element4 != null) {
            str = element4.attributeValue("from");
            str2 = element4.attributeValue("to");
        }
        if (Utils.emptyStr(str) || Utils.emptyStr(str2)) {
            log.warn("Article without 'position' in hierarchy (" + bwnjournalElement.getBwnjournalId() + DefaultExpressionEngine.DEFAULT_INDEX_END);
            ElementBuilder.addLevel(bwnjournalElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, numberId);
        } else if (str.equals(str2)) {
            ElementBuilder.addLevelWithPosition(bwnjournalElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, numberId, str);
        } else {
            ElementBuilder.addLevelWithRange(bwnjournalElement, YaddaIdConstants.ID_LEVEL_JOURNAL_ARTICLE, numberId, str, str2);
        }
    }

    private String resolveJournalHierarchyElement(String str, String str2, String str3, String str4, Map<String, Element> map) {
        if (!map.containsKey(str)) {
            Element element = new Element();
            element.setExtId(str);
            ElementBuilder.setDefaultName(element, str3, "");
            ElementBuilder.addLevel(element, str4, str2);
            map.put(str, element);
        }
        return str;
    }

    private void updateCites(ElementBuilder elementBuilder, org.dom4j.Element element) {
        org.dom4j.Element element2 = element.element("references");
        if (element2 == null) {
            return;
        }
        List elements = element2.elements("cite");
        for (int i = 0; i < elements.size(); i++) {
            elementBuilder.addCite(((org.dom4j.Element) elements.get(i)).attributeValue("text"), String.format("%03d", Integer.valueOf(i)));
        }
    }

    private String mapLangs(String str) {
        if (Utils.blankStr(str)) {
            return null;
        }
        String[] split = StringUtils.split(str);
        for (int i = 0; i < split.length; i++) {
            split[i] = mapLang(split[i]);
        }
        return StringUtils.join((Object[]) split, ' ');
    }

    private String mapLang(String str) {
        if (str == null) {
            return null;
        }
        String property = LANG_MAP.getProperty(str.trim());
        if (property != null) {
            return property.toUpperCase();
        }
        if (str.length() == 2) {
            return str.toUpperCase();
        }
        log.warn("Unknown language code: " + str);
        return str;
    }

    static {
        try {
            LANG_MAP.load(BwnjournalParser.class.getResourceAsStream("/pl/edu/icm/yadda/iso-639-2_to_iso-639-1.properties"));
        } catch (IOException e) {
            log.error("Couldn't load the language code mapping", (Throwable) e);
            throw new RuntimeException("Couldn't load the language code mapping", e);
        }
    }
}
