package pl.edu.icm.synat.content.reference;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.util.Assert;
import pl.edu.icm.synat.api.services.index.IndexService;
import pl.edu.icm.synat.api.services.index.fulltext.document.FulltextIndexDocument;
import pl.edu.icm.synat.api.services.index.fulltext.query.FulltextSearchQuery;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.SearchCriterion;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.SearchOperator;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.BooleanCriterion;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.FieldCriterion;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.FieldRequest;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.ResultsFormat;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResult;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResults;
import pl.edu.icm.synat.api.services.index.fulltext.result.ResultField;
import pl.edu.icm.synat.api.services.index.fulltext.schema.FulltextIndexSchema;
import pl.edu.icm.synat.content.abbreviations.AbbreviationDirectory;
import pl.edu.icm.synat.content.authors.AuthorParser;
import pl.edu.icm.synat.content.bibmeta.model.PublicationAuthor;
import pl.edu.icm.synat.content.bibmeta.model.PublicationId;
import pl.edu.icm.synat.content.bibmeta.model.PublicationMetadata;
import pl.edu.icm.synat.content.reference.constants.MetadataIndexConstants;

/* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl.class */
public class MetadataMatcherImpl implements MetadataMatcher, InitializingBean {
    private IndexService<FulltextIndexDocument, FulltextSearchQuery, FulltextSearchResults, FulltextIndexSchema> fulltextIndex;
    private MetadataIndexDocumentBuilder documentBuilder;
    private AbbreviationDirectory abbreviationDirectory;
    private AuthorParser authorParser;

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$AuthorMatcher.class */
    class AuthorMatcher implements ResultMatcher {
        AuthorMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_AUTHOR_NORM.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            if (resultField.getValues() == null || resultField.getValues().length != publicationMetadata.getAuthors().size()) {
                return false;
            }
            List asList = Arrays.asList(resultField.getValues());
            List authors = publicationMetadata.getAuthors();
            if (asList.size() != authors.size()) {
                return false;
            }
            Iterator it = authors.iterator();
            while (it.hasNext()) {
                if (!asList.contains(MetadataMatcherImpl.this.authorParser.normalize((PublicationAuthor) it.next()))) {
                    return false;
                }
            }
            return true;
        }
    }

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$JournalTitleMatcher.class */
    class JournalTitleMatcher implements ResultMatcher {
        JournalTitleMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_JOURNAL_TITLE.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            return MetadataMatcherImpl.this.passesSubsequence(publicationMetadata.getJournal(), resultField.getValues());
        }
    }

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$NumberMatcher.class */
    class NumberMatcher implements ResultMatcher {
        NumberMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_NUMBER.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            return MetadataMatcherImpl.this.passesSubsequence(publicationMetadata.getNumber(), resultField.getValues());
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$ResultMatcher.class */
    public interface ResultMatcher {
        boolean isApplicable(ResultField resultField);

        boolean matches(PublicationMetadata publicationMetadata, ResultField resultField);
    }

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$TitleMatcher.class */
    class TitleMatcher implements ResultMatcher {
        TitleMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_TITLE.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            return MetadataMatcherImpl.this.passesSimilarity(publicationMetadata.getTitle(), resultField.getValues());
        }
    }

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$VolumeMatcher.class */
    class VolumeMatcher implements ResultMatcher {
        VolumeMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_VOLUME.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            return MetadataMatcherImpl.this.passesSubsequence(publicationMetadata.getVolume(), resultField.getValues());
        }
    }

    /* loaded from: input_file:pl/edu/icm/synat/content/reference/MetadataMatcherImpl$YearMatcher.class */
    class YearMatcher implements ResultMatcher {
        YearMatcher() {
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean isApplicable(ResultField resultField) {
            return MetadataIndexConstants.F_YEAR.equals(resultField.getName());
        }

        @Override // pl.edu.icm.synat.content.reference.MetadataMatcherImpl.ResultMatcher
        public boolean matches(PublicationMetadata publicationMetadata, ResultField resultField) {
            return MetadataMatcherImpl.this.passesSubsequence(publicationMetadata.getYear(), resultField.getValues());
        }
    }

    public void afterPropertiesSet() throws Exception {
        Assert.notNull("documentBuilder", "documentBuilder property not set");
        Assert.notNull("abbreviationDirectory", "abbreviationDirectory property not set");
        Assert.notNull("authorParser", "authorParser property not set");
        Assert.notNull("fulltextIndex", "fulltextIndex property not set");
    }

    @Override // pl.edu.icm.synat.content.reference.MetadataMatcher
    public List<PublicationMetadata> match(PublicationMetadata publicationMetadata) {
        List<PublicationMetadata> matchIds = matchIds(publicationMetadata);
        if (!matchIds.isEmpty()) {
            return matchIds;
        }
        List<PublicationMetadata> matchMetadata = matchMetadata(buildAuthorJournalCriterion(publicationMetadata), new ResultMatcher[]{new AuthorMatcher(), new JournalTitleMatcher(), new VolumeMatcher(), new NumberMatcher(), new YearMatcher()}, publicationMetadata);
        if (!matchMetadata.isEmpty()) {
            return matchMetadata;
        }
        List<PublicationMetadata> matchMetadata2 = matchMetadata(buildAuthorYearCriterion(publicationMetadata), new ResultMatcher[]{new AuthorMatcher(), new TitleMatcher(), new JournalTitleMatcher(), new VolumeMatcher(), new NumberMatcher(), new YearMatcher()}, publicationMetadata);
        return !matchMetadata2.isEmpty() ? matchMetadata2 : Collections.emptyList();
    }

    protected SearchCriterion buildAuthorJournalCriterion(PublicationMetadata publicationMetadata) {
        if (publicationMetadata.getYear() == null || publicationMetadata.getJournal() == null || publicationMetadata.getAuthors() == null || publicationMetadata.getAuthors().isEmpty()) {
            return null;
        }
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_YEAR, publicationMetadata.getYear()));
        booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_JOURNAL_HASH, this.abbreviationDirectory.toAcronym(publicationMetadata.getJournal())));
        for (PublicationAuthor publicationAuthor : publicationMetadata.getAuthors()) {
            if (publicationAuthor.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_AUTHOR_SURNAME, publicationAuthor.getSurname()));
            }
        }
        return booleanCriterion;
    }

    protected SearchCriterion buildAuthorYearCriterion(PublicationMetadata publicationMetadata) {
        if (publicationMetadata.getYear() == null || publicationMetadata.getAuthors() == null || publicationMetadata.getAuthors().isEmpty()) {
            return null;
        }
        BooleanCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.AND);
        booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_YEAR, publicationMetadata.getYear()));
        for (PublicationAuthor publicationAuthor : publicationMetadata.getAuthors()) {
            if (publicationAuthor.getSurname() != null) {
                booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_AUTHOR_SURNAME, publicationAuthor.getSurname()));
            }
        }
        return booleanCriterion;
    }

    public List<PublicationMetadata> matchMetadata(SearchCriterion searchCriterion, ResultMatcher[] resultMatcherArr, PublicationMetadata publicationMetadata) {
        if (searchCriterion == null) {
            return Collections.emptyList();
        }
        ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest[]{new FieldRequest(MetadataIndexConstants.F_AUTHOR_NORM), new FieldRequest(MetadataIndexConstants.F_JOURNAL_TITLE), new FieldRequest(MetadataIndexConstants.F_NUMBER), new FieldRequest(MetadataIndexConstants.F_PAGE_FROM), new FieldRequest(MetadataIndexConstants.F_TITLE), new FieldRequest(MetadataIndexConstants.F_VOLUME), new FieldRequest(MetadataIndexConstants.F_YEAR)});
        HashSet hashSet = new HashSet();
        for (FulltextSearchResult fulltextSearchResult : this.fulltextIndex.performSearch(new FulltextSearchQuery(0, 100, resultsFormat, new SearchCriterion[]{searchCriterion})).getResults()) {
            if (itemMatches(fulltextSearchResult, publicationMetadata, resultMatcherArr)) {
                hashSet.add(this.documentBuilder.restore(fulltextSearchResult));
            }
        }
        return new ArrayList(hashSet);
    }

    protected boolean itemMatches(FulltextSearchResult fulltextSearchResult, PublicationMetadata publicationMetadata, ResultMatcher[] resultMatcherArr) {
        for (ResultField resultField : fulltextSearchResult.getFields()) {
            for (ResultMatcher resultMatcher : resultMatcherArr) {
                if (resultMatcher.isApplicable(resultField) && !resultMatcher.matches(publicationMetadata, resultField)) {
                    return false;
                }
            }
        }
        return true;
    }

    private String normalizeText(String str) {
        return str.replaceAll("[^\\p{L}0-9]++", "").toLowerCase(Locale.ENGLISH).trim();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean passesSubsequence(String str, String[] strArr) {
        if (str == null || strArr == null || strArr.length == 0) {
            return true;
        }
        String normalizeText = normalizeText(str);
        for (String str2 : strArr) {
            String normalizeText2 = normalizeText(str2);
            if (this.abbreviationDirectory.isSubsequence(normalizeText, normalizeText2) || this.abbreviationDirectory.isSubsequence(normalizeText2, normalizeText)) {
                return true;
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean passesSimilarity(String str, String[] strArr) {
        if (str == null || strArr == null || strArr.length == 0) {
            return true;
        }
        String normalizeText = normalizeText(str);
        for (String str2 : strArr) {
            String normalizeText2 = normalizeText(str2);
            if (this.abbreviationDirectory.isSubsequence(normalizeText, normalizeText2) || this.abbreviationDirectory.isSubsequence(normalizeText2, normalizeText)) {
                return true;
            }
            if (normalizeText.length() > 20 && normalizeText2.length() > 20 && StringUtils.getLevenshteinDistance(normalizeText2, normalizeText) <= 5) {
                return true;
            }
        }
        return false;
    }

    public List<PublicationMetadata> matchIds(PublicationMetadata publicationMetadata) {
        List ids = publicationMetadata.getIds();
        if (ids == null) {
            ids = Collections.emptyList();
        }
        if (ids.isEmpty()) {
            return Collections.emptyList();
        }
        ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest[]{new FieldRequest(MetadataIndexConstants.F_AUTHOR_NORM), new FieldRequest(MetadataIndexConstants.F_JOURNAL_TITLE), new FieldRequest(MetadataIndexConstants.F_NUMBER), new FieldRequest(MetadataIndexConstants.F_PAGE_FROM), new FieldRequest(MetadataIndexConstants.F_TITLE), new FieldRequest(MetadataIndexConstants.F_VOLUME), new FieldRequest(MetadataIndexConstants.F_YEAR)});
        HashSet hashSet = new HashSet();
        SearchCriterion booleanCriterion = new BooleanCriterion();
        booleanCriterion.setOperator(SearchOperator.OR);
        Iterator it = ids.iterator();
        while (it.hasNext()) {
            booleanCriterion.addCriterion(new FieldCriterion(MetadataIndexConstants.F_IDENTIFIER, idToString((PublicationId) it.next())), SearchOperator.OR);
        }
        Iterator it2 = this.fulltextIndex.performSearch(new FulltextSearchQuery(0, 100, resultsFormat, new SearchCriterion[]{booleanCriterion})).getResults().iterator();
        while (it2.hasNext()) {
            hashSet.add(this.documentBuilder.restore((FulltextSearchResult) it2.next()));
        }
        return new ArrayList(hashSet);
    }

    protected static String idToString(PublicationId publicationId) {
        return (publicationId.getDomain() == null ? "" : publicationId.getDomain()) + ":" + publicationId.getId();
    }

    public void setFulltextIndex(IndexService<FulltextIndexDocument, FulltextSearchQuery, FulltextSearchResults, FulltextIndexSchema> indexService) {
        this.fulltextIndex = indexService;
    }

    public void setDocumentBuilder(MetadataIndexDocumentBuilder metadataIndexDocumentBuilder) {
        this.documentBuilder = metadataIndexDocumentBuilder;
    }

    public void setAbbreviationDirectory(AbbreviationDirectory abbreviationDirectory) {
        this.abbreviationDirectory = abbreviationDirectory;
    }

    public void setAuthorParser(AuthorParser authorParser) {
        this.authorParser = authorParser;
    }
}
