package pl.edu.icm.yadda.imports.cejsh;

import com.google.common.annotations.VisibleForTesting;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang.StringUtils;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.jdbc.core.simple.ParameterizedRowMapper;
import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
import pl.edu.icm.yadda.aas.StaticXACMLInitializer;
import pl.edu.icm.yadda.bwmeta.converters.BwmetaConverterNewToDL;
import pl.edu.icm.yadda.bwmeta.model.YConstants;
import pl.edu.icm.yadda.bwmeta.model.YContributor;
import pl.edu.icm.yadda.bwmeta.model.YDescription;
import pl.edu.icm.yadda.bwmeta.model.YElement;
import pl.edu.icm.yadda.bwmeta.model.YId;
import pl.edu.icm.yadda.bwmeta.model.YLanguage;
import pl.edu.icm.yadda.bwmeta.model.YTagList;
import pl.edu.icm.yadda.imports.commons.DataBatch;
import pl.edu.icm.yadda.imports.commons.IMetadataSource;
import pl.edu.icm.yadda.imports.commons.MetadataPart;
import pl.edu.icm.yadda.imports.utils.YModelToolbox;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;
import pl.edu.icm.yadda.parsing.deprec.auxil.Feature2;
import pl.edu.icm.yadda.repo.model.ContributorConstants;

/* loaded from: input_file:WEB-INF/lib/bwmeta-import-1.12.6.jar:pl/edu/icm/yadda/imports/cejsh/CEJSHMetadataSource.class */
public class CEJSHMetadataSource implements IMetadataSource {
    private static final String AT_CEJSH_PAPERTYPE = "cejsh.paper-type";
    private static final String AT_CEJSH_EMAIL = "cejsh.journal-email";
    private static final String AT_CEJSH_POR = "cejsh.publication-order-reference";
    private SimpleJdbcTemplate jdbc;
    private static final String PAPERS_SQL = "SELECT * FROM PSJC.PAPER";
    private static final String WHERE_STAMP = " WHERE ? <= ZMIANA AND ZMIANA < ?";
    private static final String WHERE_ID = " WHERE ID = ?";
    private static final String KEYWORDS_SQL = "SELECT DISTINCT KEYVAL FROM PSJC.KEYWORD WHERE ID = ?";
    private boolean isVerifyingDL;
    private boolean publisherElements = true;
    private Map<String, YElement> journals = new HashMap();
    private Map<String, YElement> publishers = new HashMap();
    private static final String PUBLISHERS_SQL = "SELECT F05 FROM PSJC.PAPER WHERE F01 = ? GROUP BY F05 ORDER BY max(ZMIANA)";
    private static final String EDITORIAL_OFFICES_SQL = "SELECT F06 FROM PSJC.PAPER WHERE F01 = ? GROUP BY F06 ORDER BY max(ZMIANA)";
    private static final String AUTHORS_SQL = "SELECT DISTINCT NAME,SURNAME FROM PSJC.AUTHOR WHERE ID = ?";
    private static final Logger log = LoggerFactory.getLogger(CEJSHMetadataSource.class);
    private static final BwmetaConverterNewToDL bc = new BwmetaConverterNewToDL();
    private static final YModelToolbox y = new YModelToolbox();
    private static final ParameterizedRowMapper<String> KEYWORDS_MAPPER = new ParameterizedRowMapper<String>() { // from class: pl.edu.icm.yadda.imports.cejsh.CEJSHMetadataSource.1
        @Override // org.springframework.jdbc.core.simple.ParameterizedRowMapper, org.springframework.jdbc.core.RowMapper
        public String mapRow(ResultSet resultSet, int i) throws SQLException {
            return CEJSHMetadataSource.normalize(resultSet.getString(1));
        }
    };
    private static final YElement CEJSH = new YElement("CEJSH");
    private static final ParameterizedRowMapper<String> STRING_MAPPER = new ParameterizedRowMapper<String>() { // from class: pl.edu.icm.yadda.imports.cejsh.CEJSHMetadataSource.2
        @Override // org.springframework.jdbc.core.simple.ParameterizedRowMapper, org.springframework.jdbc.core.RowMapper
        public String mapRow(ResultSet resultSet, int i) throws SQLException {
            return resultSet.getString(1).trim();
        }
    };

    @VisibleForTesting
    static final String[] EMPTY_ARRAY = new String[0];
    private static final Map<String, String> CEJSH_CATEGORIES = new HashMap();
    private static final Map<String, YLanguage> CEJSH_LANGS = new HashMap();

    public CEJSHMetadataSource(SimpleJdbcTemplate simpleJdbcTemplate) {
        CEJSH_CATEGORIES.put("ANTHROPOLOGY", "ANTHROPOLOGY");
        CEJSH_CATEGORIES.put("ARCHAEOLOGY", "ARCHAEOLOGY");
        CEJSH_CATEGORIES.put("ARTSANDARCHITECTURE", "ARTS_&_ARCHITECTURE");
        CEJSH_CATEGORIES.put("ECONOMICS", "ECONOMICS");
        CEJSH_CATEGORIES.put("ETHNOLOGY", "ETHNOLOGY");
        CEJSH_CATEGORIES.put("EDUCATION", "EDUCATION");
        CEJSH_CATEGORIES.put("HISTORY", "HISTORY");
        CEJSH_CATEGORIES.put("LAWANDADMINISTRATION", "LAW_&_ADMINISTRATION");
        CEJSH_CATEGORIES.put("LIBRARYANDINFORMATIONSCIENCE", "LIBRARY_&_INFORMATION_SCIENCE");
        CEJSH_CATEGORIES.put("MEDIAANDCOMMUNICATION", "MEDIA_&_COMMUNICATION");
        CEJSH_CATEGORIES.put("PHILOLOGYANDLINGUISTICS", "PHILOLOGY_&_LINGUISTICS");
        CEJSH_CATEGORIES.put("PHILOSOPHY", "PHILOSOPHY");
        CEJSH_CATEGORIES.put("POLITICALSCIENCES", "POLITICAL_SCIENCES");
        CEJSH_CATEGORIES.put("PSYCHOLOGY", "PSYCHOLOGY");
        CEJSH_CATEGORIES.put("SOCIOLOGY", "SOCIOLOGY");
        CEJSH_CATEGORIES.put("SCIENCEOFSCIENCE", "SCIENCE_OF_SCIENCE");
        CEJSH_CATEGORIES.put("THEOLOGY", "THEOLOGY");
        CEJSH_CATEGORIES.put("OTHERSOCIALSCIENCES", "OTHER_SOCIAL_SCIENCES");
        CEJSH_LANGS.put("belarussian", YLanguage.Belarusian);
        CEJSH_LANGS.put("belorussian", YLanguage.Belarusian);
        CEJSH_LANGS.put("bulgarian", YLanguage.Bulgarian);
        CEJSH_LANGS.put("croatian", YLanguage.Croatian);
        CEJSH_LANGS.put("czech", YLanguage.Czech);
        CEJSH_LANGS.put("english", YLanguage.English);
        CEJSH_LANGS.put("estonian", YLanguage.Estonian);
        CEJSH_LANGS.put("french", YLanguage.French);
        CEJSH_LANGS.put("german", YLanguage.German);
        CEJSH_LANGS.put("hungarian", YLanguage.Hungarian);
        CEJSH_LANGS.put("italian", YLanguage.Italian);
        CEJSH_LANGS.put("latin", YLanguage.Latin);
        CEJSH_LANGS.put("latvian", YLanguage.Latvian);
        CEJSH_LANGS.put("lithuanian", YLanguage.Lithuanian);
        CEJSH_LANGS.put("macedonian", YLanguage.Macedonian);
        CEJSH_LANGS.put("polis", YLanguage.Polish);
        CEJSH_LANGS.put("polish", YLanguage.Polish);
        CEJSH_LANGS.put("russia", YLanguage.Russian);
        CEJSH_LANGS.put("russian", YLanguage.Russian);
        CEJSH_LANGS.put("ruthenian", YLanguage.Uncoded);
        CEJSH_LANGS.put("serbian", YLanguage.Serbian);
        CEJSH_LANGS.put("slovak", YLanguage.Slovak);
        CEJSH_LANGS.put("slovenian", YLanguage.Slovenian);
        CEJSH_LANGS.put("spanish", YLanguage.Spanish);
        CEJSH_LANGS.put("ukrainian", YLanguage.Ukrainian);
        this.jdbc = simpleJdbcTemplate;
    }

    public void setVerifyingDL(boolean z) {
        this.isVerifyingDL = z;
    }

    public boolean isVerifyingDL() {
        return this.isVerifyingDL;
    }

    public void setPublisherElements(boolean z) {
        this.publisherElements = z;
    }

    public boolean getPublisherElements() {
        return this.publisherElements;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.yadda.imports.commons.IDataSource
    public MetadataPart getData(String str) {
        return processPaper(preprocess(this.jdbc.queryForMap("SELECT * FROM PSJC.PAPER WHERE ID = ?", str)));
    }

    @Override // pl.edu.icm.yadda.imports.commons.IDataSource
    public DataBatch<MetadataPart> getBatch(Date date, Date date2) {
        if (date == null) {
            date = new Date(0L);
        }
        if (date2 == null) {
            date2 = new Date();
        }
        List<Map<String, Object>> queryForList = this.jdbc.queryForList("SELECT * FROM PSJC.PAPER WHERE ? <= ZMIANA AND ZMIANA < ?", date, date2);
        ArrayList arrayList = new ArrayList(queryForList.size());
        Iterator<Map<String, Object>> it = queryForList.iterator();
        while (it.hasNext()) {
            arrayList.add(processPaper(preprocess(it.next())));
        }
        return new DataBatch<>(arrayList, null);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String normalize(String str) {
        return str.replaceAll("\\p{Cntrl}", "").trim();
    }

    private Map<String, String> preprocess(Map<String, Object> map) {
        HashMap hashMap = new HashMap(map.size());
        for (Map.Entry<String, Object> entry : map.entrySet()) {
            hashMap.put(entry.getKey(), entry.getValue() != null ? normalize(entry.getValue().toString()) : null);
        }
        return hashMap;
    }

    private MetadataPart processPaper(Map<String, String> map) {
        String str = map.get(SchemaSymbols.ATTVAL_ID);
        MetadataPart metadataPart = new MetadataPart();
        metadataPart.setId(str);
        LinkedList<YElement> processJournal = processJournal(map);
        metadataPart.setEntities(processJournal);
        YElement last = processJournal.getLast();
        YElement yElement = last;
        String str2 = map.get(Feature2.JEST_NAWIASEM_KLAMROWYM_POCZ);
        if (!str2.isEmpty()) {
            yElement = createElement(YConstants.EXT_LEVEL_JOURNAL_YEAR, str2, yElement);
            verifyDL(yElement);
            metadataPart.addEntity(yElement);
        }
        String str3 = map.get(Feature2.JEST_NAWIASEM_KATOWYM_POCZ);
        if (!str3.isEmpty()) {
            yElement = createElement(YConstants.EXT_LEVEL_JOURNAL_VOLUME, str3, yElement);
            verifyDL(yElement);
            metadataPart.addEntity(yElement);
        }
        String str4 = map.get(Feature2.JEST_DWUKROPKIEM);
        if (!str4.isEmpty()) {
            yElement = createElement(YConstants.EXT_LEVEL_JOURNAL_ISSUE, str4, yElement);
            verifyDL(yElement);
            metadataPart.addEntity(yElement);
        }
        PublicationOrderReference parsePublicationOrderReference = parsePublicationOrderReference(map, str);
        YElement addTagList = y.element(YConstants.EXT_LEVEL_JOURNAL_ARTICLE, y.canonicalName(YLanguage.Undetermined, map.get(Feature2.JEST_ZNAKIEM_ZAPYTANIA)), yElement).addAttribute("cejsh.paper-type", map.get(Feature2.JEST_NAWIASEM_OKRAGLYM_POCZ)).addAttribute(y.attribute("cejsh.publication-order-reference", parsePublicationOrderReference.getOriginalValue())).addAttribute(y.attribute(AT_CEJSH_EMAIL, last.getOneAttributeSimpleValue(YConstants.AT_CONTACT_EMAIL))).addCategoryRef(y.categoryRef(YConstants.EXT_CLASSIFICATION_CEJSH, codeOfDiscipline(map.get(Feature2.JEST_AMPERSANDEM), str))).addContent(y.contentFile("1", YConstants.FT_ELEMENT_WEBPAGE, "text/html", maybeURI(map.get("STRONAWWW"), str))).setContributors(authors(map.get(Feature2.JEST_CARET), str, parsePublicationOrderReference, map.get("EMAIL"))).addDescription(new YDescription(YLanguage.English, map.get(Feature2.JEST_ASTERIKSEM), "abstract")).addId(new YId(YConstants.EXT_SCHEME_CEJSH, str)).setLanguages(languagesOf(map.get(Feature2.JEST_SLASHEM), str)).addTagList(new YTagList(YLanguage.English, YConstants.TG_SUBJECT_PRIMARY).addValue(map.get(Feature2.JEST_AMPERSANDEM))).addTagList(new YTagList(YLanguage.English, "keyword").setValues(this.jdbc.query(KEYWORDS_SQL, KEYWORDS_MAPPER, str)));
        if (parsePublicationOrderReference.hasContact()) {
            boolean z = false;
            Iterator<YContributor> it = addTagList.getContributors().iterator();
            while (it.hasNext()) {
                if (!it.next().getAttributes(YConstants.AT_CONTACT_LOCATION).isEmpty()) {
                    z = true;
                }
            }
            if (!z) {
                log.info(str + ": POR matched no authors: " + parsePublicationOrderReference.getOriginalValue());
                addTagList.addAttribute(y.attribute("baztech.autor.adress", parsePublicationOrderReference.getContact()));
            }
        }
        addTagList.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal").getCurrent().setPosition(map.get(Feature2.JEST_SREDNIKIEM));
        verifyDL(addTagList);
        metadataPart.addEntity(addTagList);
        return metadataPart;
    }

    private PublicationOrderReference parsePublicationOrderReference(Map<String, String> map, String str) {
        String str2 = map.get("F70");
        PublicationOrderReference publicationOrderReference = new PublicationOrderReference(str2);
        if (!publicationOrderReference.hasContact() && StringUtils.isNotEmpty(str2)) {
            log.info(str + ": Ignoring an apparently non-contact POR: " + str2);
        }
        return publicationOrderReference;
    }

    private LinkedList<YElement> processJournal(Map<String, String> map) {
        LinkedList<YElement> linkedList = new LinkedList<>();
        String str = map.get(Feature2.TAG_STARTOWY);
        YElement yElement = this.journals.get(str);
        if (yElement == null) {
            LinkedList linkedList2 = new LinkedList();
            Iterator it = this.jdbc.query(PUBLISHERS_SQL, STRING_MAPPER, str).iterator();
            while (it.hasNext()) {
                linkedList2.add(new YContributor("publisher", true).addName(y.canonicalName(YLanguage.Undetermined, (String) it.next())));
            }
            YElement addContributor = y.element(YConstants.EXT_LEVEL_JOURNAL_PUBLISHER, ((YContributor) linkedList2.getLast()).getDefaultName(), CEJSH).addContributor((YContributor) linkedList2.getLast());
            if (!this.publisherElements) {
                addContributor.setStructures(Collections.emptySet());
            }
            verifyDL(addContributor);
            this.publishers.put(str, addContributor);
            yElement = y.element(YConstants.EXT_LEVEL_JOURNAL_JOURNAL, y.canonicalName(YLanguage.Undetermined, str), addContributor).addAttribute("journal.frequency", map.get(Feature2.JEST_NAWIASEM_KWADRATOWYM_KONC)).addId(y.id("bwmeta1.id-class.ISSN", map.get(Feature2.JEST_NAWIASEM_KWADRATOWYM_POCZ))).addName(y.name(YLanguage.Undetermined, map.get(Feature2.TAG_KONCOWY), YConstants.NM_ALTERNATIVE));
            Iterator it2 = linkedList2.iterator();
            while (it2.hasNext()) {
                yElement.addContributor((YContributor) it2.next());
            }
            for (String str2 : this.jdbc.query(EDITORIAL_OFFICES_SQL, STRING_MAPPER, str)) {
                if (!str2.isEmpty()) {
                    yElement.addContributor(new YContributor(ContributorConstants.ROLE_EDITORIAL_OFFICE, true).addName(y.canonicalName(YLanguage.Undetermined, str2)));
                }
            }
            String str3 = map.get("CZASEMAIL");
            if (!str3.isEmpty()) {
                yElement.addAttribute(YConstants.AT_CONTACT_EMAIL, str3);
            }
            verifyDL(yElement);
        }
        if (this.publisherElements) {
            linkedList.add(this.publishers.get(str));
        }
        linkedList.add(yElement);
        return linkedList;
    }

    private String maybeURI(String str, String str2) {
        if (StringUtils.isEmpty(str)) {
            return null;
        }
        try {
            URI uri = new URI(str);
            return httpPrefixIfEmptyScheme(uri) + uri.toString();
        } catch (URISyntaxException e) {
            log.error(str2 + ": Invalid URI: " + str);
            return null;
        }
    }

    private String httpPrefixIfEmptyScheme(URI uri) {
        return uri.getScheme() == null ? StaticXACMLInitializer.HTTP_PREFIX : "";
    }

    private ArrayList<YContributor> authors(String str, String str2, PublicationOrderReference publicationOrderReference, String str3) {
        ArrayList<YContributor> arrayList = new ArrayList<>();
        boolean z = false;
        String[] extractEmailsFrom = extractEmailsFrom(str3);
        int i = 0;
        for (String str4 : str.split(" *, *")) {
            if (!str4.isEmpty()) {
                YContributor addName = new YContributor("author", false).addName(y.canonicalName(YLanguage.NoLinguisticContent, str4));
                arrayList.add(addName);
                if (i < extractEmailsFrom.length) {
                    addContributorEmail(addName, extractEmailsFrom[i]);
                    i++;
                }
                String[] split = StringUtils.split(str4);
                Object[] objArr = new Object[split.length + 1];
                objArr[0] = str2;
                System.arraycopy(split, 0, objArr, 1, split.length);
                List<Map<String, Object>> queryForList = this.jdbc.queryForList(AUTHORS_SQL + (" AND SURNAME IN (?" + StringUtils.repeat(",?", split.length - 1) + DefaultExpressionEngine.DEFAULT_INDEX_END), objArr);
                if (queryForList.size() == 1) {
                    String normalize = normalize((String) queryForList.get(0).get("NAME"));
                    String normalize2 = normalize((String) queryForList.get(0).get("SURNAME"));
                    addName.addName(y.name(YLanguage.NoLinguisticContent, normalize2, "surname")).addName(y.name(YLanguage.NoLinguisticContent, normalize, "forenames"));
                    for (String str5 : StringUtils.split(normalize)) {
                        addName.addName(y.name(YLanguage.NoLinguisticContent, str5, YConstants.NM_FORENAME));
                    }
                    if (publicationOrderReference.getName().matches(".*\\b" + normalize2 + "\\b.*")) {
                        if (z) {
                            log.warn(str2 + ": A second author matching the POR name: " + str4 + " -> " + publicationOrderReference.getName());
                        } else {
                            addContributorAttributes(addName, publicationOrderReference);
                            z = true;
                        }
                    }
                }
            }
        }
        if (arrayList.size() == 1 && !z && publicationOrderReference.hasContact()) {
            log.warn(str2 + ": the only author did not match the POR name (adding contact anyway): " + arrayList.get(0).getDefaultName().getText() + " -> " + publicationOrderReference.getName());
            addContributorAttributes(arrayList.get(0), publicationOrderReference);
        }
        return arrayList;
    }

    private void addContributorEmail(YContributor yContributor, String str) {
        yContributor.addAttribute(y.attribute(YConstants.AT_CONTACT_EMAIL, str));
    }

    @VisibleForTesting
    String[] extractEmailsFrom(String str) {
        if (StringUtils.isBlank(str)) {
            return EMPTY_ARRAY;
        }
        String[] split = StringUtils.split(str, ";");
        for (int i = 0; i < split.length; i++) {
            split[i] = split[i].trim();
        }
        return split;
    }

    private void addContributorAttributes(YContributor yContributor, PublicationOrderReference publicationOrderReference) {
        if (publicationOrderReference.hasContact()) {
            yContributor.addAttribute(y.attribute(YConstants.AT_CONTACT_LOCATION, publicationOrderReference.getContact()));
        }
    }

    private String codeOfDiscipline(String str, String str2) {
        String replaceAll = str.replaceFirst("\\(.*", "").replaceAll("&|/|@", "AND").replaceAll("STUDIES", "SCIENCES").replaceAll("LANGUAGE", "LINGUSTICS").replaceAll("[^A-Z]", "");
        if (replaceAll.isEmpty()) {
            return "";
        }
        int i = 1;
        String str3 = null;
        for (String str4 : CEJSH_CATEGORIES.keySet()) {
            int levenshteinDistance = StringUtils.getLevenshteinDistance(replaceAll, str4);
            if (levenshteinDistance <= i) {
                i = levenshteinDistance;
                if (str3 != null) {
                    log.warn(str2 + ": More than one reasonable match for \"" + str + "\", discarding \"" + CEJSH_CATEGORIES.get(str3) + "\"");
                }
                str3 = str4;
            }
        }
        if (str3 != null) {
            return CEJSH_CATEGORIES.get(str3);
        }
        log.error(str2 + ": No match for category \"" + str + "\"");
        return "";
    }

    private void verifyDL(YElement yElement) {
        try {
            if (this.isVerifyingDL) {
                bc.convert(yElement);
            }
        } catch (TransformationException e) {
            throw new RuntimeException("Element cannot be converted to the DL model", e);
        }
    }

    private Collection<YLanguage> languagesOf(String str, String str2) {
        ArrayList arrayList = new ArrayList(2);
        for (String str3 : str.split("[ ,&]+")) {
            if (CEJSH_LANGS.containsKey(str3.toLowerCase())) {
                arrayList.add(CEJSH_LANGS.get(str3.toLowerCase()));
            } else {
                if (!str3.isEmpty()) {
                    log.error(str2 + ": Unknown language '" + str3 + "'");
                }
                arrayList.add(YLanguage.Undetermined);
            }
        }
        return arrayList;
    }

    private YElement createElement(String str, String str2, YElement yElement) {
        return y.element(str, y.canonicalName(YLanguage.NoLinguisticContent, str2), yElement);
    }

    @Override // pl.edu.icm.yadda.imports.commons.IDataSource
    public DataBatch<MetadataPart> getBatch(Serializable serializable) {
        return null;
    }

    @Override // pl.edu.icm.yadda.imports.commons.IDataSource
    public boolean isRandomAccessSupported() {
        return true;
    }

    @Override // pl.edu.icm.yadda.imports.commons.IDataSource
    public boolean isSequentialAccessSupported() {
        return true;
    }

    @Override // pl.edu.icm.yadda.imports.commons.IMetadataSource
    public boolean doKnowsAboutFiles() {
        return false;
    }
}
