package pl.edu.icm.coansys.citations;

import com.google.common.collect.Lists;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import pl.edu.icm.coansys.citations.data.HeuristicHashMatchingResult;
import pl.edu.icm.coansys.citations.data.IdWithSimilarity;
import pl.edu.icm.coansys.citations.data.MatchableEntity;

/* loaded from: input_file:pl/edu/icm/coansys/citations/CoreCitationMatchingService.class */
public class CoreCitationMatchingService {
    private DocumentAttacher documentAttacher = new DocumentAttacher();
    private CitationAttacherWithMatchedLimiter citationAttacher = new CitationAttacherWithMatchedLimiter();
    private BestMatchedCitationPicker bestMatchedCitationPicker = new BestMatchedCitationPicker();
    private HeuristicHashCitationMatcherFactory heuristicHashCitationMatcherFactory = new HeuristicHashCitationMatcherFactory();
    private JavaSparkContext sparkContext;
    private List<Pair<MatchableEntityHasher, MatchableEntityHasher>> matchableEntityHashers;
    private long maxHashBucketSize;

    public JavaPairRDD<MatchableEntity, IdWithSimilarity> matchCitations(JavaPairRDD<String, MatchableEntity> javaPairRDD, JavaPairRDD<String, MatchableEntity> javaPairRDD2) {
        return this.bestMatchedCitationPicker.pickBest(this.citationAttacher.attachCitationsAndLimitDocs(this.documentAttacher.attachDocuments(matchCitDocByHashes(javaPairRDD, javaPairRDD2), javaPairRDD2), javaPairRDD));
    }

    private JavaPairRDD<String, String> matchCitDocByHashes(JavaPairRDD<String, MatchableEntity> javaPairRDD, JavaPairRDD<String, MatchableEntity> javaPairRDD2) {
        JavaPairRDD<String, MatchableEntity> javaPairRDD3 = javaPairRDD;
        JavaPairRDD<String, String> parallelizePairs = this.sparkContext.parallelizePairs(Lists.newArrayList());
        Iterator<Pair<MatchableEntityHasher, MatchableEntityHasher>> it = this.matchableEntityHashers.iterator();
        while (it.hasNext()) {
            Pair<MatchableEntityHasher, MatchableEntityHasher> next = it.next();
            HeuristicHashMatchingResult matchCitations = this.heuristicHashCitationMatcherFactory.create((MatchableEntityHasher) next.getLeft(), (MatchableEntityHasher) next.getRight(), this.maxHashBucketSize).matchCitations(javaPairRDD3, javaPairRDD2, it.hasNext());
            parallelizePairs = parallelizePairs.union(matchCitations.getCitDocIdPairs());
            javaPairRDD3 = matchCitations.getUnmatchedCitations();
        }
        return parallelizePairs;
    }

    public void setDocumentAttacher(DocumentAttacher documentAttacher) {
        this.documentAttacher = documentAttacher;
    }

    public void setCitationAttacher(CitationAttacherWithMatchedLimiter citationAttacherWithMatchedLimiter) {
        this.citationAttacher = citationAttacherWithMatchedLimiter;
    }

    public void setBestMatchedCitationPicker(BestMatchedCitationPicker bestMatchedCitationPicker) {
        this.bestMatchedCitationPicker = bestMatchedCitationPicker;
    }

    public void setSparkContext(JavaSparkContext javaSparkContext) {
        this.sparkContext = javaSparkContext;
    }

    public void setMatchableEntityHashers(List<Pair<MatchableEntityHasher, MatchableEntityHasher>> list) {
        this.matchableEntityHashers = list;
    }

    public void setMaxHashBucketSize(long j) {
        this.maxHashBucketSize = j;
    }

    public void setHeuristicHashCitationMatcherFactory(HeuristicHashCitationMatcherFactory heuristicHashCitationMatcherFactory) {
        this.heuristicHashCitationMatcherFactory = heuristicHashCitationMatcherFactory;
    }
}
