package pl.edu.icm.synat.importer.cejsh.converter;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.hsqldb.Tokens;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.synat.application.model.bwmeta.YContributor;
import pl.edu.icm.synat.application.model.bwmeta.YDescription;
import pl.edu.icm.synat.application.model.bwmeta.YElement;
import pl.edu.icm.synat.application.model.bwmeta.YExportable;
import pl.edu.icm.synat.application.model.bwmeta.YId;
import pl.edu.icm.synat.application.model.bwmeta.YLanguage;
import pl.edu.icm.synat.application.model.bwmeta.YTagList;
import pl.edu.icm.synat.application.model.bwmeta.constants.YConstants;
import pl.edu.icm.synat.application.model.bwmeta.utils.YModelToolbox;
import pl.edu.icm.synat.importer.cejsh.datasource.CEJSHRetrieverConstants;
import pl.edu.icm.synat.importer.core.datasource.jdbc.model.SQLEntity;
import pl.edu.icm.synat.importer.core.io.DataRepository;
import pl.edu.icm.synat.importer.core.model.DictionaryCriterion;
import pl.edu.icm.synat.importer.core.model.DictionaryEntry;

/* loaded from: input_file:WEB-INF/lib/synat-importer-yadda-1.2-alpha-4.jar:pl/edu/icm/synat/importer/cejsh/converter/CejshArticleParser.class */
public class CejshArticleParser implements SQLEntityParser<List<YExportable>> {
    protected Logger log = LoggerFactory.getLogger(CejshArticleParser.class);
    private boolean publisherElements = true;
    protected Map<String, YElement> journals = new HashMap();
    protected Map<String, YElement> publishers = new HashMap();
    protected DataRepository dataRepository;
    private static final YModelToolbox y = new YModelToolbox();
    private static final YElement CEJSH = new YElement("CEJSH");
    private static final Pattern NO_ADDRESS_PATTERN = Pattern.compile("deceased|no +(postal +)?address +given|address +not +given|contact +the +(journal +)?editor|contact +the +(journal +)?publisher|no data|not applicable");

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // pl.edu.icm.synat.importer.cejsh.converter.SQLEntityParser
    public List<YExportable> parseSqlEntity(SQLEntity sQLEntity) {
        String str = (String) sQLEntity.getFields().get(SchemaSymbols.ATTVAL_ID);
        LinkedList linkedList = new LinkedList();
        LinkedList<YElement> processJournal = processJournal(sQLEntity.getFields());
        linkedList.addAll(processJournal);
        YElement last = processJournal.getLast();
        YElement yElement = last;
        String str2 = (String) sQLEntity.getFields().get("F12");
        if (!str2.isEmpty()) {
            yElement = createElement("bwmeta1.level.hierarchy_Journal_Year", str2, yElement);
            linkedList.add(yElement);
        }
        String str3 = (String) sQLEntity.getFields().get("F14");
        if (!str3.isEmpty()) {
            yElement = createElement("bwmeta1.level.hierarchy_Journal_Volume", str3, yElement);
            linkedList.add(yElement);
        }
        String str4 = (String) sQLEntity.getFields().get("F16");
        if (!str4.isEmpty()) {
            yElement = createElement("bwmeta1.level.hierarchy_Journal_Number", str4, yElement);
            linkedList.add(yElement);
        }
        YElement addTagList = y.element("bwmeta1.level.hierarchy_Journal_Article", y.canonicalName(YLanguage.Undetermined, (String) sQLEntity.getFields().get("F21")), yElement).addAttribute("cejsh.paper-type", (String) sQLEntity.getFields().get("F10")).addAttribute(y.attribute("cejsh.publication-order-reference", "")).addAttribute(y.attribute(CejshConstants.AT_CEJSH_EMAIL, last.getOneAttributeSimpleValue(YConstants.AT_CONTACT_EMAIL))).addCategoryRef(y.categoryRef(YConstants.EXT_CLASSIFICATION_CEJSH, codeOfDiscipline((String) sQLEntity.getFields().get("F26"), str))).addContent(y.contentFile("1", YConstants.FT_ELEMENT_WEBPAGE, "text/html", maybeURI((String) sQLEntity.getFields().get("STRONAWWW"), str))).setContributors(authors((String) sQLEntity.getFields().get("F31"), str, "", "")).addDescription(new YDescription(YLanguage.English, (String) sQLEntity.getFields().get("F29"), "abstract")).addId(new YId(YConstants.EXT_SCHEME_CEJSH, str)).setLanguages(languagesOf((String) sQLEntity.getFields().get("F24"), str)).addTagList(new YTagList(YLanguage.English, YConstants.TG_SUBJECT_PRIMARY).addValue((String) sQLEntity.getFields().get("F26"))).addTagList(new YTagList(YLanguage.English, "keyword").setValues(retrieveKeywords(str)));
        boolean z = false;
        Iterator<YContributor> it = addTagList.getContributors().iterator();
        while (it.hasNext()) {
            if (!it.next().getAttributes(YConstants.AT_CONTACT_LOCATION).isEmpty()) {
                z = true;
            }
        }
        if (!z && !"".isEmpty()) {
            this.log.info(str + ": POR matched no authors: ");
            addTagList.addAttribute(y.attribute("baztech.autor.adress", ""));
        }
        addTagList.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal").getCurrent().setPosition((String) sQLEntity.getFields().get("F23"));
        linkedList.add(addTagList);
        return linkedList;
    }

    protected LinkedList<YElement> processJournal(Map<String, Object> map) {
        LinkedList<YElement> linkedList = new LinkedList<>();
        String str = (String) map.get("F01");
        YElement yElement = this.journals.get(str);
        if (yElement == null) {
            LinkedList linkedList2 = new LinkedList();
            Iterator<DictionaryEntry> it = this.dataRepository.queryDictionary("publisher", new DictionaryCriterion("F01", str)).iterator();
            while (it.hasNext()) {
                linkedList2.add(new YContributor("publisher", true).addName(y.canonicalName(YLanguage.Undetermined, (String) it.next().getAttributes().get("F05"))));
            }
            YElement addContributor = y.element("bwmeta1.level.hierarchy_Journal_Publisher", ((YContributor) linkedList2.getLast()).getOneName(), CEJSH).addContributor((YContributor) linkedList2.getLast());
            if (!this.publisherElements) {
                addContributor.setStructures(Collections.emptySet());
            }
            this.publishers.put(str, addContributor);
            yElement = y.element("bwmeta1.level.hierarchy_Journal_Journal", y.canonicalName(YLanguage.Undetermined, str), addContributor).addAttribute("journal.frequency", (String) map.get("F09")).addId(y.id("bwmeta1.id-class.ISSN", (String) map.get("F08"))).addName(y.name(YLanguage.Undetermined, (String) map.get("F02"), YConstants.NM_ALTERNATIVE));
            Iterator it2 = linkedList2.iterator();
            while (it2.hasNext()) {
                yElement.addContributor((YContributor) it2.next());
            }
            Iterator<DictionaryEntry> it3 = this.dataRepository.queryDictionary(CEJSHRetrieverConstants.ENTITY_TYPE_EDITORIAL_OFFICE, new DictionaryCriterion("F01", str)).iterator();
            while (it3.hasNext()) {
                String str2 = (String) it3.next().getAttributes().get("F06");
                if (!str2.isEmpty()) {
                    yElement.addContributor(new YContributor(YConstants.CR_EDITORIAL_OFFICE, true).addName(y.canonicalName(YLanguage.Undetermined, str2)));
                }
            }
            String str3 = (String) map.get("CZASEMAIL");
            if (!str3.isEmpty()) {
                yElement.addAttribute(YConstants.AT_CONTACT_EMAIL, str3);
            }
            this.journals.put(str, yElement);
        }
        if (this.publisherElements) {
            linkedList.add(this.publishers.get(str));
        }
        linkedList.add(yElement);
        return linkedList;
    }

    protected List<String> retrieveKeywords(String str) {
        List<DictionaryEntry> queryDictionary = this.dataRepository.queryDictionary("keyword", new DictionaryCriterion(SchemaSymbols.ATTVAL_ID, str));
        ArrayList arrayList = new ArrayList(queryDictionary.size());
        Iterator<DictionaryEntry> it = queryDictionary.iterator();
        while (it.hasNext()) {
            arrayList.add((String) it.next().getAttributes().get("KEYVAL"));
        }
        return arrayList;
    }

    private ArrayList<YContributor> authors(String str, String str2, String str3, String str4) {
        ArrayList<YContributor> arrayList = new ArrayList<>();
        boolean z = false;
        for (String str5 : str.split(" *, *")) {
            if (!str5.isEmpty()) {
                YContributor addName = new YContributor("author", false).addName(y.canonicalName(YLanguage.NoLinguisticContent, str5));
                arrayList.add(addName);
                List<DictionaryEntry> queryDictionary = this.dataRepository.queryDictionary("author", new DictionaryCriterion(SchemaSymbols.ATTVAL_ID, str2), new DictionaryCriterion("", str5));
                if (queryDictionary.size() == 1) {
                    String normalize = normalize((String) queryDictionary.get(0).getAttributes().get(Tokens.T_NAME));
                    String normalize2 = normalize((String) queryDictionary.get(0).getAttributes().get("SURNAME"));
                    addName.addName(y.name(YLanguage.NoLinguisticContent, normalize2, "surname")).addName(y.name(YLanguage.NoLinguisticContent, normalize, YConstants.NM_FORENAMES));
                    for (String str6 : StringUtils.split(normalize)) {
                        addName.addName(y.name(YLanguage.NoLinguisticContent, str6, YConstants.NM_FORENAME));
                    }
                    if (str3.matches(".*\\b" + normalize2 + "\\b.*")) {
                        if (z) {
                            this.log.warn(str2 + ": A second author matching the POR name: " + str5 + " -> " + str3);
                        } else {
                            addName.addAttribute(y.attribute(YConstants.AT_CONTACT_LOCATION, str4));
                            z = true;
                        }
                    }
                }
            }
        }
        if (arrayList.size() == 1 && arrayList.get(0).getAttributes(YConstants.AT_CONTACT_LOCATION).isEmpty() && !str4.isEmpty()) {
            this.log.warn(str2 + ": the only author did not match the POR name (adding contact anyway): " + arrayList.get(0).getOneName().getText() + " -> " + str3);
            arrayList.get(0).addAttribute(y.attribute(YConstants.AT_CONTACT_LOCATION, str4));
        }
        return arrayList;
    }

    private String codeOfDiscipline(String str, String str2) {
        String replaceAll = str.replaceFirst("\\(.*", "").replaceAll("&|/|@", "AND").replaceAll("STUDIES", "SCIENCES").replaceAll(Tokens.T_LANGUAGE, "LINGUSTICS").replaceAll("[^A-Z]", "");
        if (replaceAll.isEmpty()) {
            return "";
        }
        int i = 1;
        String str3 = null;
        for (String str4 : CejshConstants.CEJSH_CATEGORIES.keySet()) {
            int levenshteinDistance = StringUtils.getLevenshteinDistance(replaceAll, str4);
            if (levenshteinDistance <= i) {
                i = levenshteinDistance;
                if (str3 != null) {
                    this.log.warn(str2 + ": More than one reasonable match for \"" + str + "\", discarding \"" + CejshConstants.CEJSH_CATEGORIES.get(str3) + "\"");
                }
                str3 = str4;
            }
        }
        if (str3 != null) {
            return CejshConstants.CEJSH_CATEGORIES.get(str3);
        }
        this.log.error(str2 + ": No match for category \"" + str + "\"");
        return "";
    }

    private Collection<YLanguage> languagesOf(String str, String str2) {
        ArrayList arrayList = new ArrayList(2);
        for (String str3 : str.split("[ ,&]+")) {
            if (CejshConstants.CEJSH_LANGS.containsKey(str3.toLowerCase())) {
                arrayList.add(CejshConstants.CEJSH_LANGS.get(str3.toLowerCase()));
            } else {
                if (!str3.isEmpty()) {
                    this.log.error(str2 + ": Unknown language '" + str3 + "'");
                }
                arrayList.add(YLanguage.Undetermined);
            }
        }
        return arrayList;
    }

    private String normalize(String str) {
        return str.replaceAll("\\p{Cntrl}", "").trim();
    }

    private String maybeURI(String str, String str2) {
        try {
            return new URI(str).toString();
        } catch (URISyntaxException e) {
            this.log.error(str2 + ": Invalid URI: " + str);
            return null;
        }
    }

    private YElement createElement(String str, String str2, YElement yElement) {
        return y.element(str, y.canonicalName(YLanguage.NoLinguisticContent, str2), yElement);
    }
}
