package pl.edu.icm.yadda.tools.bibref;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.ceon.search.model.query.SearchOperator;
import pl.edu.icm.ceon.search.model.query.SearchQuery;
import pl.edu.icm.ceon.search.model.query.criteria.BooleanCriterion;
import pl.edu.icm.ceon.search.model.query.criteria.FieldCriterion;
import pl.edu.icm.ceon.search.model.query.criteria.FieldRangeCriterion;
import pl.edu.icm.ceon.search.model.searching.FieldRequest;
import pl.edu.icm.ceon.search.model.searching.ResultField;
import pl.edu.icm.ceon.search.model.searching.ResultsFormat;
import pl.edu.icm.ceon.search.model.searching.SearchResult;
import pl.edu.icm.yadda.service.search.query.additional.AdditionalSearchParameter;
import pl.edu.icm.yadda.service2.exception.ServiceException;
import pl.edu.icm.yadda.service2.search.ISearchFacade;
import pl.edu.icm.yadda.service2.search.SearchIndexRequest;
import pl.edu.icm.yadda.tools.abbr.AbbreviationDirectory;
import pl.edu.icm.yadda.tools.bibref.model.AuthorSimpleMetadata;
import pl.edu.icm.yadda.tools.bibref.model.BibReferenceTriple;
import pl.edu.icm.yadda.tools.bibref.model.SimpleMetadata;
import pl.edu.icm.yadda.tools.mdi.MetadataIndexConstants;

/* loaded from: input_file:WEB-INF/lib/yadda-content-4.1.3-polindex-SNAPSHOT.jar:pl/edu/icm/yadda/tools/bibref/MetadataBibReferenceMatcher.class */
public class MetadataBibReferenceMatcher implements BibReferenceMatcher {
    private static final Logger log = LoggerFactory.getLogger(MetadataBibReferenceMatcher.class);
    private String indexName;
    private ISearchFacade searchFacade;

    @Override // pl.edu.icm.yadda.tools.bibref.BibReferenceMatcher
    public List<String> matchBibReferenes(SimpleMetadata simpleMetadata) {
        Set<BibReferenceTriple> matchReferencesByAuthorJournalYear = matchReferencesByAuthorJournalYear(simpleMetadata);
        matchReferencesByAuthorJournalYear.addAll(matchReferencesByAuthorYear(simpleMetadata));
        log.debug("For reference {} found {} matches.", simpleMetadata.getDocId(), Integer.valueOf(matchReferencesByAuthorJournalYear.size()));
        ArrayList arrayList = new ArrayList();
        Iterator<BibReferenceTriple> it = matchReferencesByAuthorJournalYear.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getBibReferenceId());
        }
        return arrayList;
    }

    private Set<BibReferenceTriple> matchReferencesByAuthorJournalYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        List<SearchResult> searchByAuthorJournalYear = searchByAuthorJournalYear(simpleMetadata, true, false);
        log.debug("Reference search by authors-journal-year, found {} potential results", Integer.valueOf(searchByAuthorJournalYear.size()));
        for (SearchResult searchResult : searchByAuthorJournalYear) {
            if (isDocResult(searchResult) && matchesMetadata(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, journal, year: {}", bibReferenceTriple);
                }
            }
        }
        return hashSet;
    }

    private Set<BibReferenceTriple> matchReferencesByAuthorYear(SimpleMetadata simpleMetadata) {
        HashSet hashSet = new HashSet();
        List<SearchResult> searchByAuthorYear = searchByAuthorYear(simpleMetadata, true, false);
        log.debug("Reference search by authors-year, found {} potential results", Integer.valueOf(searchByAuthorYear.size()));
        for (SearchResult searchResult : searchByAuthorYear) {
            if (isDocResult(searchResult) && matchesMetadataWithTitle(simpleMetadata, searchResult)) {
                BibReferenceTriple bibReferenceTriple = new BibReferenceTriple(simpleMetadata.getDocId(), simpleMetadata.getPosition(), searchResult.getDocId());
                if (!bibReferenceTriple.getDocumentId().equals(bibReferenceTriple.getBibReferenceId())) {
                    hashSet.add(bibReferenceTriple);
                    log.debug("MetadataBibReferenceMatcher: referenced document found by authors, year: {}", bibReferenceTriple);
                }
            }
        }
        return hashSet;
    }

    private List<SearchResult> searchByAuthorJournalYear(SimpleMetadata simpleMetadata, boolean z, boolean z2) {
        if (simpleMetadata.getJournal() == null || simpleMetadata.getYear() == null || simpleMetadata.getAuthors() == null || simpleMetadata.getAuthors().isEmpty()) {
            return new ArrayList();
        }
        SearchQuery searchQuery = new SearchQuery();
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion("journalHash", simpleMetadata.getJournalHash()));
        booleanCriterion.addCriterion(new FieldCriterion("year", simpleMetadata.getYear()));
        if (!z || !z2) {
            if (z) {
                booleanCriterion.addCriterion(new FieldCriterion("bibrefPosition", "0"));
            } else if (z2) {
                booleanCriterion.addCriterion(new FieldRangeCriterion("bibrefPosition", "1", null));
            } else if (!z && !z2) {
                throw new IllegalArgumentException("Requested to search mdi index but excluding both: documents and references. Such search is pointless.");
            }
        }
        for (AuthorSimpleMetadata authorSimpleMetadata : simpleMetadata.getAuthors()) {
            if (authorSimpleMetadata.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_AUTHOR_SURNAME, authorSimpleMetadata.getSurname()));
            }
        }
        searchQuery.addCriterion(booleanCriterion);
        return searchByQuery(searchQuery);
    }

    private List<SearchResult> searchByAuthorYear(SimpleMetadata simpleMetadata, boolean z, boolean z2) {
        if (simpleMetadata.getYear() == null || simpleMetadata.getAuthors() == null || simpleMetadata.getAuthors().isEmpty()) {
            return new ArrayList();
        }
        SearchQuery searchQuery = new SearchQuery();
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion("year", simpleMetadata.getYear()));
        if (!z || !z2) {
            if (z) {
                booleanCriterion.addCriterion(new FieldCriterion("bibrefPosition", "0"));
            } else if (z2) {
                booleanCriterion.addCriterion(new FieldRangeCriterion("bibrefPosition", "1", null));
            } else if (!z && !z2) {
                throw new IllegalArgumentException("Requested to search mdi index but excluding both: documents and references. Such search is pointless.");
            }
        }
        for (AuthorSimpleMetadata authorSimpleMetadata : simpleMetadata.getAuthors()) {
            if (authorSimpleMetadata.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_AUTHOR_SURNAME, authorSimpleMetadata.getSurname()));
            }
        }
        searchQuery.addCriterion(booleanCriterion);
        return searchByQuery(searchQuery);
    }

    private List<SearchResult> searchByQuery(SearchQuery searchQuery) {
        ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest("bibrefPosition"), new FieldRequest("bibrefSource"), new FieldRequest(MetadataIndexConstants.F_AUTHOR_NORM), new FieldRequest("title"), new FieldRequest(MetadataIndexConstants.F_JOURNAL_TITLE), new FieldRequest("volume"), new FieldRequest(MetadataIndexConstants.F_ISSUE), new FieldRequest("year"));
        SearchIndexRequest searchIndexRequest = new SearchIndexRequest();
        searchIndexRequest.setIndexName(this.indexName);
        searchIndexRequest.setQuery(searchQuery);
        searchIndexRequest.setResultsFormat(resultsFormat);
        try {
            return this.searchFacade.search(this.indexName, searchQuery, resultsFormat, new AdditionalSearchParameter[0]).getResults();
        } catch (ServiceException e) {
            log.error("Error searching index {}.", this.indexName);
            return new ArrayList();
        }
    }

    private boolean matchesMetadata(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        List<String> allValues = getAllValues(searchResult, MetadataIndexConstants.F_AUTHOR_NORM);
        if (allValues == null || allValues.size() != simpleMetadata.getAuthors().size()) {
            return false;
        }
        Iterator<AuthorSimpleMetadata> it = simpleMetadata.getAuthors().iterator();
        while (it.hasNext()) {
            if (!allValues.contains(it.next().getNormalized())) {
                return false;
            }
        }
        return passesSubsequence(simpleMetadata.getJournal(), getAllValues(searchResult, MetadataIndexConstants.F_JOURNAL_TITLE)) && passesExact(simpleMetadata.getVolume(), getAllValues(searchResult, "volume")) && passesExact(simpleMetadata.getIssue(), getAllValues(searchResult, MetadataIndexConstants.F_ISSUE)) && passesExact(simpleMetadata.getYear(), getAllValues(searchResult, "year"));
    }

    private boolean matchesMetadataWithTitle(SimpleMetadata simpleMetadata, SearchResult searchResult) {
        return matchesMetadata(simpleMetadata, searchResult) && passesSimilarity(simpleMetadata.getTitle(), getAllValues(searchResult, "title"));
    }

    private boolean passesExact(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (trim.equals(it.next().toLowerCase(Locale.ENGLISH).trim())) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSubsequence(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
        }
        return false;
    }

    private boolean passesSimilarity(String str, List<String> list) {
        if (str == null || list == null || list.isEmpty()) {
            return true;
        }
        String trim = str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String trim2 = it.next().replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
            if (AbbreviationDirectory.checkIfSubsequence(trim, trim2) || AbbreviationDirectory.checkIfSubsequence(trim2, trim)) {
                return true;
            }
            if (trim.length() > 20 && trim2.length() > 20 && StringUtils.getLevenshteinDistance(trim2, trim) <= 5) {
                return true;
            }
        }
        return false;
    }

    private boolean isDocResult(SearchResult searchResult) {
        String oneValue = getOneValue(searchResult, "bibrefPosition");
        if (oneValue != null) {
            try {
                if (Integer.parseInt(oneValue) == 0) {
                    return true;
                }
            } catch (NumberFormatException e) {
                log.warn("MDI Index has invalid entry for id={}: position={}", searchResult.getDocId(), oneValue);
                return false;
            }
        }
        return false;
    }

    private String getOneValue(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return resultField.getValues()[0];
            }
        }
        return null;
    }

    private List<String> getAllValues(SearchResult searchResult, String str) {
        for (ResultField resultField : searchResult.getFields()) {
            if (resultField.getName().equals(str)) {
                if (resultField.getValues() == null || resultField.getValues().length == 0) {
                    return null;
                }
                return Arrays.asList(resultField.getValues());
            }
        }
        return null;
    }

    public void setIndexName(String str) {
        this.indexName = str;
    }

    public void setSearchFacade(ISearchFacade iSearchFacade) {
        this.searchFacade = iSearchFacade;
    }
}
