package eu.eudml.enhancement.bibref;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.bwmeta.RelationsToElements;
import pl.edu.icm.yadda.bwmeta.model.YAncestor;
import pl.edu.icm.yadda.bwmeta.model.YConstants;
import pl.edu.icm.yadda.bwmeta.model.YContributor;
import pl.edu.icm.yadda.bwmeta.model.YElement;
import pl.edu.icm.yadda.bwmeta.model.YId;
import pl.edu.icm.yadda.bwmeta.model.YName;
import pl.edu.icm.yadda.bwmeta.model.YStructure;
import pl.edu.icm.yadda.imports.transformers.NlmToYTransformer;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;
import pl.edu.icm.yadda.service.search.query.SearchOperator;
import pl.edu.icm.yadda.service.search.query.SearchQuery;
import pl.edu.icm.yadda.service.search.query.criteria.BooleanCriterion;
import pl.edu.icm.yadda.service.search.query.criteria.FieldCriterion;
import pl.edu.icm.yadda.service.search.searching.FieldRequest;
import pl.edu.icm.yadda.service.search.searching.ResultField;
import pl.edu.icm.yadda.service.search.searching.ResultsFormat;
import pl.edu.icm.yadda.service.search.searching.SearchResult;
import pl.edu.icm.yadda.service.search.searching.SearchResults;
import pl.edu.icm.yadda.service2.search.ISearchService;
import pl.edu.icm.yadda.service2.search.SearchIndexRequest;
import pl.edu.icm.yadda.tools.abbr.AbbreviationDirectory;

/* loaded from: input_file:eu/eudml/enhancement/bibref/MetadataBibReferenceMatcher.class */
public class MetadataBibReferenceMatcher implements BibReferenceMatcher {
    private static final Logger log = LoggerFactory.getLogger(MetadataBibReferenceMatcher.class);
    private String indexName;
    private ISearchService searchService;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:eu/eudml/enhancement/bibref/MetadataBibReferenceMatcher$AuthorSimpleMetadata.class */
    public static class AuthorSimpleMetadata {
        private String surname;
        private String givennames;

        public AuthorSimpleMetadata(String str, String str2) {
            this.surname = str;
            this.givennames = str2;
        }

        public String getGivennames() {
            return this.givennames;
        }

        public String getSurname() {
            return this.surname;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public String getNormalized() {
            String replaceAll = (this.givennames == null ? "" : this.givennames).replaceAll("\\P{Lu}", "");
            return this.surname + (replaceAll.isEmpty() ? "" : ", ") + replaceAll;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:eu/eudml/enhancement/bibref/MetadataBibReferenceMatcher$DocSimpleMetadata.class */
    public static class DocSimpleMetadata extends SimpleMetadata {
        private List<SimpleMetadata> references;

        private DocSimpleMetadata() {
            super();
            this.references = new ArrayList();
        }

        public List<SimpleMetadata> getReferences() {
            return this.references;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void addReference(SimpleMetadata simpleMetadata) {
            this.references.add(simpleMetadata);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:eu/eudml/enhancement/bibref/MetadataBibReferenceMatcher$SimpleMetadata.class */
    public static class SimpleMetadata {
        private String docId;
        private List<String> ids;
        private List<AuthorSimpleMetadata> authors;
        private String title;
        private String journal;
        private String volume;
        private String issue;
        private String year;
        private int position;

        private SimpleMetadata() {
            this.ids = new ArrayList();
            this.authors = new ArrayList();
        }

        public void addId(String str) {
            if (this.ids.contains(str)) {
                return;
            }
            this.ids.add(str);
        }

        public void addAuthor(String str, String str2) {
            this.authors.add(new AuthorSimpleMetadata(str, str2));
        }

        public void setTitle(String str) {
            this.title = str;
        }

        public void setJournal(String str) {
            this.journal = str;
        }

        public void setIssue(String str) {
            this.issue = str;
        }

        public void setVolume(String str) {
            this.volume = str;
        }

        public void setYear(String str) {
            this.year = str;
        }

        public List<String> getIds() {
            return this.ids;
        }

        public List<AuthorSimpleMetadata> getAuthors() {
            return this.authors;
        }

        public String getTitle() {
            return this.title;
        }

        public String getJournal() {
            return this.journal;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public String getJournalHash() {
            return AbbreviationDirectory.getHash(this.journal);
        }

        public String getIssue() {
            return this.issue;
        }

        public String getVolume() {
            return this.volume;
        }

        public String getYear() {
            return this.year;
        }

        public String getDocId() {
            return this.docId;
        }

        public void setDocId(String str) {
            this.docId = str;
        }

        public int getPosition() {
            return this.position;
        }

        public void setPosition(int i) {
            this.position = i;
        }
    }

    @Override // eu.eudml.enhancement.bibref.BibReferenceMatcher
    public Set<BibReferenceTriple> matchBibReferencedIds(String str) throws TransformationException {
        DocSimpleMetadata documentMetadataFromNLM = getDocumentMetadataFromNLM(str);
        if (documentMetadataFromNLM == null) {
            return null;
        }
        HashSet hashSet = new HashSet();
        for (SimpleMetadata simpleMetadata : documentMetadataFromNLM.getReferences()) {
            Set<BibReferenceTriple> matchByAuthorJournalYear = matchByAuthorJournalYear(simpleMetadata);
            if (matchByAuthorJournalYear.isEmpty()) {
                Set<BibReferenceTriple> matchByAuthorYear = matchByAuthorYear(simpleMetadata);
                if (!matchByAuthorYear.isEmpty()) {
                    hashSet.add(matchByAuthorYear.iterator().next());
                }
            } else {
                hashSet.add(matchByAuthorJournalYear.iterator().next());
            }
        }
        return hashSet;
    }

    @Override // eu.eudml.enhancement.bibref.BibReferenceMatcher
    public Set<BibReferenceTriple> matchBibReferencingIds(String str) throws TransformationException {
        DocSimpleMetadata documentMetadataFromNLM = getDocumentMetadataFromNLM(str);
        if (documentMetadataFromNLM == null) {
            return null;
        }
        HashSet hashSet = new HashSet();
        hashSet.addAll(matchByAuthorJournalYear(documentMetadataFromNLM));
        hashSet.addAll(matchByAuthorYear(documentMetadataFromNLM));
        return hashSet;
    }

    private DocSimpleMetadata getDocumentMetadataFromNLM(String str) throws TransformationException {
        DocSimpleMetadata docSimpleMetadata = new DocSimpleMetadata();
        for (YElement yElement : new NlmToYTransformer().read(str, new Object[0])) {
            if (yElement instanceof YElement) {
                YElement yElement2 = yElement;
                docSimpleMetadata.setDocId(yElement2.getId("bwmeta1.id-class.eudml-id"));
                docSimpleMetadata.setPosition(0);
                if (yElement2.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal").getCurrent().getLevel().equals(YConstants.EXT_LEVEL_JOURNAL_ARTICLE)) {
                    fillDocumentMetadata(yElement2, docSimpleMetadata);
                    int i = 1;
                    for (YElement yElement3 : RelationsToElements.convert(yElement2)) {
                        SimpleMetadata simpleMetadata = new SimpleMetadata();
                        fillDocumentMetadata(yElement3, simpleMetadata);
                        simpleMetadata.setDocId(yElement2.getId("bwmeta1.id-class.eudml-id"));
                        simpleMetadata.setPosition(i);
                        docSimpleMetadata.addReference(simpleMetadata);
                        i++;
                    }
                    return docSimpleMetadata;
                }
            }
        }
        return null;
    }

    private void fillDocumentMetadata(YElement yElement, SimpleMetadata simpleMetadata) {
        for (YContributor yContributor : yElement.getContributors()) {
            YName oneName = yContributor.getOneName("surname");
            YName oneName2 = yContributor.getOneName("forenames");
            String text = oneName == null ? null : oneName.getText();
            String text2 = oneName2 == null ? null : oneName2.getText();
            if (text != null || text2 != null) {
                simpleMetadata.addAuthor(text, text2);
            }
        }
        for (YId yId : yElement.getIds()) {
            simpleMetadata.addId(yId.getScheme() + "###" + yId.getValue());
        }
        YName oneName3 = yElement.getOneName("canonical");
        if (oneName3 != null) {
            simpleMetadata.setTitle(oneName3.getText());
        }
        YStructure structure = yElement.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        if (structure != null) {
            YAncestor ancestor = structure.getAncestor(YConstants.EXT_LEVEL_JOURNAL_JOURNAL);
            if (ancestor != null) {
                YName oneName4 = ancestor.getOneName("canonical");
                simpleMetadata.setJournal(oneName4 == null ? null : oneName4.getText());
            }
            YAncestor ancestor2 = structure.getAncestor(YConstants.EXT_LEVEL_JOURNAL_VOLUME);
            if (ancestor2 != null) {
                YName oneName5 = ancestor2.getOneName("canonical");
                simpleMetadata.setVolume(oneName5 == null ? null : oneName5.getText());
            }
            YAncestor ancestor3 = structure.getAncestor(YConstants.EXT_LEVEL_JOURNAL_ISSUE);
            if (ancestor3 != null) {
                YName oneName6 = ancestor3.getOneName("canonical");
                simpleMetadata.setIssue((oneName6 == null || oneName6.getText().equals("[unknown]")) ? null : oneName6.getText());
            }
        }
        if (yElement.getDate("published") != null) {
            simpleMetadata.setYear(Integer.toString(yElement.getDate("published").getYear()));
        }
    }

    private Set<BibReferenceTriple> matchByAuthorJournalYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        for (SearchResult searchResult : searchByAuthorJournalYear(simpleMetadata)) {
            if (simpleMetadata.getPosition() > 0 && isDocResult(searchResult) && matchesMetadata(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, journal, year: {}", bibReferenceTriple);
                }
            }
            if (simpleMetadata.getPosition() == 0 && isRefResult(searchResult) && matchesMetadata(simpleMetadata, searchResult)) {
                try {
                    BibReferenceTriple bibReferenceTriple2 = new BibReferenceTriple(getOneValue(searchResult, "bibrefSource"), Integer.parseInt(getOneValue(searchResult, "bibrefPosition")), simpleMetadata.getDocId());
                    if (!bibReferenceTriple2.getDocumentId().equals(bibReferenceTriple2.getBibReferenceId())) {
                        hashSet.add(bibReferenceTriple2);
                        log.debug("MetadataBibReferenceMatcher: referencin document found by authors, journal, year: {}", bibReferenceTriple2);
                    }
                } catch (NumberFormatException e) {
                }
            }
        }
        return hashSet;
    }

    private Set<BibReferenceTriple> matchByAuthorYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        for (SearchResult searchResult : searchByAuthorYear(simpleMetadata)) {
            if (simpleMetadata.getPosition() > 0 && isDocResult(searchResult) && matchesMetadataWithTitle(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, year: {}", bibReferenceTriple);
                }
            }
            if (simpleMetadata.getPosition() == 0 && isRefResult(searchResult) && matchesMetadataWithTitle(simpleMetadata, searchResult)) {
                try {
                    BibReferenceTriple bibReferenceTriple2 = new BibReferenceTriple(getOneValue(searchResult, "bibrefSource"), Integer.parseInt(getOneValue(searchResult, "bibrefPosition")), simpleMetadata.getDocId());
                    if (!bibReferenceTriple2.getDocumentId().equals(bibReferenceTriple2.getBibReferenceId())) {
                        hashSet.add(bibReferenceTriple2);
                        log.debug("MetadataBibReferenceMatcher: referencing document found by authors, year: {}", bibReferenceTriple2);
                    }
                } catch (NumberFormatException e) {
                }
            }
        }
        return hashSet;
    }

    private List<SearchResult> searchByAuthorJournalYear(SimpleMetadata simpleMetadata) {
        if (simpleMetadata.getJournal() == null || simpleMetadata.getYear() == null || simpleMetadata.getAuthors() == null || simpleMetadata.getAuthors().isEmpty()) {
            return new ArrayList();
        }
        SearchQuery searchQuery = new SearchQuery();
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion("journalHash", simpleMetadata.getJournalHash()));
        booleanCriterion.addCriterion(new FieldCriterion("publishedYear", simpleMetadata.getYear()));
        for (AuthorSimpleMetadata authorSimpleMetadata : simpleMetadata.getAuthors()) {
            if (authorSimpleMetadata.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion("authorCoauthorSurname", authorSimpleMetadata.getSurname()));
            }
        }
        searchQuery.addCriterion(booleanCriterion);
        return searchByQuery(searchQuery);
    }

    private List<SearchResult> searchByAuthorYear(SimpleMetadata simpleMetadata) {
        if (simpleMetadata.getYear() == null || simpleMetadata.getAuthors() == null || simpleMetadata.getAuthors().isEmpty()) {
            return new ArrayList();
        }
        SearchQuery searchQuery = new SearchQuery();
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion("publishedYear", simpleMetadata.getYear()));
        for (AuthorSimpleMetadata authorSimpleMetadata : simpleMetadata.getAuthors()) {
            if (authorSimpleMetadata.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion("authorCoauthorSurname", authorSimpleMetadata.getSurname()));
            }
        }
        searchQuery.addCriterion(booleanCriterion);
        return searchByQuery(searchQuery);
    }

    private List<SearchResult> searchByQuery(SearchQuery searchQuery) {
        ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest[]{new FieldRequest("bibrefPosition"), new FieldRequest("bibrefSource"), new FieldRequest("authorCoauthorNormalized"), new FieldRequest("defName"), new FieldRequest("journalName"), new FieldRequest("volume"), new FieldRequest("number"), new FieldRequest("publishedYear")});
        SearchIndexRequest searchIndexRequest = new SearchIndexRequest();
        searchIndexRequest.setIndexName(this.indexName);
        searchIndexRequest.setQuery(searchQuery);
        searchIndexRequest.setResultsFormat(resultsFormat);
        return ((SearchResults) this.searchService.search(searchIndexRequest).getResult()).getResults();
    }

    private boolean matchesMetadata(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        List<String> allValues = getAllValues(searchResult, "authorCoauthorNormalized");
        if (allValues == null || allValues.size() != simpleMetadata.getAuthors().size()) {
            return false;
        }
        Iterator<AuthorSimpleMetadata> it = simpleMetadata.getAuthors().iterator();
        while (it.hasNext()) {
            if (!allValues.contains(it.next().getNormalized())) {
                return false;
            }
        }
        return passesSubsequence(simpleMetadata.getJournal(), getAllValues(searchResult, "journalName")) && passesExact(simpleMetadata.getVolume(), getAllValues(searchResult, "volume")) && passesExact(simpleMetadata.getIssue(), getAllValues(searchResult, "number")) && passesExact(simpleMetadata.getYear(), getAllValues(searchResult, "publishedYear"));
    }

    private boolean matchesMetadataWithTitle(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        return matchesMetadata(simpleMetadata, searchResult) && passesSimilarity(simpleMetadata.getTitle(), getAllValues(searchResult, "defName"));
    }

    private boolean passesExact(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (trim.equals(it.next().toLowerCase(Locale.ENGLISH).trim())) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSubsequence(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSimilarity(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
            if (trim.length() > 20 && trim2.length() > 20 && StringUtils.getLevenshteinDistance(trim2, trim) <= 5) {
                return true;
            }
        }
        return false;
    }

    private boolean isDocResult(SearchResult searchResult) {
        String oneValue = getOneValue(searchResult, "bibrefPosition");
        if (oneValue != null) {
            try {
                if (Integer.parseInt(oneValue) == 0) {
                    return true;
                }
            } catch (NumberFormatException e) {
                return false;
            }
        }
        return false;
    }

    private boolean isRefResult(SearchResult searchResult) {
        String oneValue = getOneValue(searchResult, "bibrefSource");
        String oneValue2 = getOneValue(searchResult, "bibrefPosition");
        if (oneValue != null && oneValue2 != null) {
            try {
                if (Integer.parseInt(oneValue2) > 0) {
                    return true;
                }
            } catch (NumberFormatException e) {
                return false;
            }
        }
        return false;
    }

    private String getOneValue(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return resultField.getValues()[0];
            }
        }
        return null;
    }

    private List<String> getAllValues(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return Arrays.asList(resultField.getValues());
            }
        }
        return null;
    }

    public void setIndexName(String str) {
        this.indexName = str;
    }

    public void setSearchService(ISearchService iSearchService) {
        this.searchService = iSearchService;
    }
}
