package pl.edu.icm.yadda.analysis.bibref.manual;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.yadda.analysis.bibref.manual.search.SearchStrategy;
import pl.edu.icm.yadda.bwmeta.model.YConstants;
import pl.edu.icm.yadda.client.indexing.IndexFields;
import pl.edu.icm.yadda.metadata.transformers.TransformationException;
import pl.edu.icm.yadda.service.search.searching.ResultField;
import pl.edu.icm.yadda.service.search.searching.SearchResult;
import pl.edu.icm.yadda.tools.abbr.AbbreviationDirectory;
import pl.edu.icm.yadda.tools.bibref.model.AuthorSimpleMetadata;
import pl.edu.icm.yadda.tools.bibref.model.BibReferenceTriple;
import pl.edu.icm.yadda.tools.bibref.model.DocSimpleMetadata;
import pl.edu.icm.yadda.tools.bibref.model.SimpleMetadata;

/* loaded from: input_file:WEB-INF/lib/yadda-analysis-impl-1.10.0.jar:pl/edu/icm/yadda/analysis/bibref/manual/MetadataBibReferenceMatcher.class */
public class MetadataBibReferenceMatcher implements BibReferenceMatcher {
    private static final Logger log = LoggerFactory.getLogger(MetadataBibReferenceMatcher.class);
    private final MetadataReader metadataReader = new MetadataReader(YConstants.EXT_SCHEMA_EUDML);
    private SearchStrategy searchStrategy;

    @Override // pl.edu.icm.yadda.analysis.bibref.manual.BibReferenceMatcher
    public Set<BibReferenceTriple> matchBibReferencedIds(String str) throws TransformationException {
        DocSimpleMetadata readFromNLMFile = this.metadataReader.readFromNLMFile(str);
        if (readFromNLMFile == null) {
            return null;
        }
        HashSet hashSet = new HashSet();
        for (SimpleMetadata simpleMetadata : readFromNLMFile.getReferences()) {
            Set<BibReferenceTriple> matchByAuthorJournalYear = matchByAuthorJournalYear(simpleMetadata);
            if (matchByAuthorJournalYear.isEmpty()) {
                Set<BibReferenceTriple> matchByAuthorYear = matchByAuthorYear(simpleMetadata);
                if (!matchByAuthorYear.isEmpty()) {
                    hashSet.add(matchByAuthorYear.iterator().next());
                }
            } else {
                hashSet.add(matchByAuthorJournalYear.iterator().next());
            }
        }
        return hashSet;
    }

    @Override // pl.edu.icm.yadda.analysis.bibref.manual.BibReferenceMatcher
    public Set<BibReferenceTriple> matchBibReferencingIds(String str) throws TransformationException {
        DocSimpleMetadata readFromNLMFile = this.metadataReader.readFromNLMFile(str);
        if (readFromNLMFile == null) {
            return null;
        }
        HashSet hashSet = new HashSet();
        hashSet.addAll(matchByAuthorJournalYear(readFromNLMFile));
        hashSet.addAll(matchByAuthorYear(readFromNLMFile));
        return hashSet;
    }

    private Set<BibReferenceTriple> matchByAuthorJournalYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        for (SearchResult searchResult : this.searchStrategy.searchByAuthorJournalYear(simpleMetadata)) {
            if (simpleMetadata.getPosition() > 0 && isDocResult(searchResult) && matchesMetadata(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, journal, year: {}", bibReferenceTriple);
                }
            }
            if (simpleMetadata.getPosition() == 0 && isRefResult(searchResult) && matchesMetadata(simpleMetadata, searchResult)) {
                try {
                    BibReferenceTriple bibReferenceTriple2 = new BibReferenceTriple(getOneValue(searchResult, "bibrefSource"), Integer.parseInt(getOneValue(searchResult, "bibrefPosition")), simpleMetadata.getDocId());
                    if (!bibReferenceTriple2.getDocumentId().equals(bibReferenceTriple2.getBibReferenceId())) {
                        hashSet.add(bibReferenceTriple2);
                        log.debug("MetadataBibReferenceMatcher: referencin document found by authors, journal, year: {}", bibReferenceTriple2);
                    }
                } catch (NumberFormatException e) {
                }
            }
        }
        return hashSet;
    }

    private Set<BibReferenceTriple> matchByAuthorYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        for (SearchResult searchResult : this.searchStrategy.searchByAuthorYear(simpleMetadata)) {
            if (simpleMetadata.getPosition() > 0 && isDocResult(searchResult) && matchesMetadataWithTitle(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, year: {}", bibReferenceTriple);
                }
            }
            if (simpleMetadata.getPosition() == 0 && isRefResult(searchResult) && matchesMetadataWithTitle(simpleMetadata, searchResult)) {
                try {
                    BibReferenceTriple bibReferenceTriple2 = new BibReferenceTriple(getOneValue(searchResult, "bibrefSource"), Integer.parseInt(getOneValue(searchResult, "bibrefPosition")), simpleMetadata.getDocId());
                    if (!bibReferenceTriple2.getDocumentId().equals(bibReferenceTriple2.getBibReferenceId())) {
                        hashSet.add(bibReferenceTriple2);
                        log.debug("MetadataBibReferenceMatcher: referencing document found by authors, year: {}", bibReferenceTriple2);
                    }
                } catch (NumberFormatException e) {
                }
            }
        }
        return hashSet;
    }

    private boolean matchesMetadata(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        List<String> allValues = getAllValues(searchResult, MyIndexFields.F_AUTHOR_COAUTHOR_SUPERNORMALIZED);
        if (allValues == null || allValues.size() != simpleMetadata.getAuthors().size()) {
            return false;
        }
        Iterator<AuthorSimpleMetadata> it = simpleMetadata.getAuthors().iterator();
        while (it.hasNext()) {
            if (!allValues.contains(new AuthorSimpleMetadataSupernormalized(it.next()).getSupernormalized())) {
                return false;
            }
        }
        return passesSubsequence(simpleMetadata.getJournal(), getAllValues(searchResult, IndexFields.F_JOURNAL_NAME)) && passesExact(simpleMetadata.getVolume(), getAllValues(searchResult, "volume")) && passesExact(simpleMetadata.getIssue(), getAllValues(searchResult, "number")) && passesExact(simpleMetadata.getYear(), getAllValues(searchResult, IndexFields.F_DATE_PUBLISHED_YEAR));
    }

    private boolean matchesMetadataWithTitle(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        return matchesMetadata(simpleMetadata, searchResult) && passesSimilarity(simpleMetadata.getTitle(), getAllValues(searchResult, IndexFields.F_DEF_NAME));
    }

    private boolean passesExact(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (trim.equals(it.next().toLowerCase(Locale.ENGLISH).trim())) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSubsequence(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSimilarity(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
            if (trim.length() > 20 && trim2.length() > 20 && StringUtils.getLevenshteinDistance(trim2, trim) <= 5) {
                return true;
            }
        }
        return false;
    }

    private boolean isDocResult(SearchResult searchResult) {
        String oneValue = getOneValue(searchResult, "bibrefPosition");
        if (oneValue != null) {
            try {
                if (Integer.parseInt(oneValue) == 0) {
                    return true;
                }
            } catch (NumberFormatException e) {
                return false;
            }
        }
        return false;
    }

    private boolean isRefResult(SearchResult searchResult) {
        String oneValue = getOneValue(searchResult, "bibrefSource");
        String oneValue2 = getOneValue(searchResult, "bibrefPosition");
        if (oneValue != null && oneValue2 != null) {
            try {
                if (Integer.parseInt(oneValue2) > 0) {
                    return true;
                }
            } catch (NumberFormatException e) {
                return false;
            }
        }
        return false;
    }

    private String getOneValue(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return resultField.getValues()[0];
            }
        }
        return null;
    }

    private List<String> getAllValues(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return Arrays.asList(resultField.getValues());
            }
        }
        return null;
    }

    public void setSearchStrategy(SearchStrategy searchStrategy) {
        this.searchStrategy = searchStrategy;
    }
}
