package ws.palladian.retrieval.cooccurrence;

import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.extraction.token.Tokenizer;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.retrieval.DocumentRetriever;
import ws.palladian.retrieval.resources.WebContent;
import ws.palladian.retrieval.search.Searcher;
import ws.palladian.retrieval.search.SearcherException;

/* loaded from: input_file:ws/palladian/retrieval/cooccurrence/CoOccurrenceRetriever.class */
public class CoOccurrenceRetriever {
    private static final Logger LOGGER = LoggerFactory.getLogger(CoOccurrenceRetriever.class);
    private final CoOccurrenceContext coOccurrenceContext;
    private int numberOfResults;
    private final Language language;

    /* loaded from: input_file:ws/palladian/retrieval/cooccurrence/CoOccurrenceRetriever$CoOccurrenceContext.class */
    public enum CoOccurrenceContext {
        DOCUMENT,
        SENTENCE,
        CONTEXT_200_CHARS
    }

    public CoOccurrenceRetriever(CoOccurrenceContext coOccurrenceContext, int i, Language language) {
        this.numberOfResults = 10;
        this.coOccurrenceContext = coOccurrenceContext;
        this.numberOfResults = i;
        this.language = language;
    }

    public CoOccurrenceStatistics getCoOccurrenceStatistics(String str, String str2, Collection<Searcher<WebContent>> collection, boolean z) {
        return getCoOccurrenceStatistics(str, str2, new HashSet(), collection, z);
    }

    public CoOccurrenceStatistics getCoOccurrenceStatistics(String str, String str2, Collection<String> collection, Collection<Searcher<WebContent>> collection2, boolean z) {
        CoOccurrenceStatistics coOccurrenceStatistics = new CoOccurrenceStatistics(str, str2);
        DocumentRetriever documentRetriever = new DocumentRetriever();
        String buildQuery = buildQuery(str, str2, collection);
        for (Searcher<WebContent> searcher : collection2) {
            try {
                for (WebContent webContent : searcher.search(buildQuery, this.numberOfResults, this.language)) {
                    String summary = webContent.getSummary();
                    if (webContent.getUrl() != null) {
                        summary = documentRetriever.getText(webContent.getUrl());
                    }
                    if (summary != null) {
                        findCoOccurrences(summary, coOccurrenceStatistics, searcher, z);
                    }
                }
            } catch (SearcherException e) {
                LOGGER.error("Searcher exception while searching {}", buildQuery, e);
            }
        }
        return coOccurrenceStatistics;
    }

    private String buildQuery(String str, String str2, Collection<String> collection) {
        String str3 = "\"" + str + "\" \"" + str2 + "\"";
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            str3 = str3 + " \"" + it.next() + "\"";
        }
        return str3;
    }

    private void findCoOccurrences(String str, CoOccurrenceStatistics coOccurrenceStatistics, Searcher<WebContent> searcher, boolean z) {
        String term1 = coOccurrenceStatistics.getTerm1();
        String term2 = coOccurrenceStatistics.getTerm2();
        if (z) {
            str = str.toLowerCase();
            term1 = term1.toLowerCase();
            term2 = term2.toLowerCase();
        }
        if (this.coOccurrenceContext.equals(CoOccurrenceContext.DOCUMENT)) {
            if (str.contains(term1) && str.contains(term2)) {
                coOccurrenceStatistics.addCoOccurrence(searcher.getName(), str);
                return;
            }
            return;
        }
        if (this.coOccurrenceContext.equals(CoOccurrenceContext.SENTENCE)) {
            for (String str2 : Tokenizer.getSentences(StringHelper.clean(str))) {
                if (str2.contains(term1) && str2.contains(term2)) {
                    coOccurrenceStatistics.addCoOccurrence(searcher.getName(), str2);
                }
            }
            return;
        }
        if (this.coOccurrenceContext.equals(CoOccurrenceContext.CONTEXT_200_CHARS)) {
            String clean = StringHelper.clean(str);
            List regexpMatches = StringHelper.getRegexpMatches(term1 + ".{0,200}" + term2, clean);
            if (!coOccurrenceStatistics.getTerm1().equals(coOccurrenceStatistics.getTerm2())) {
                regexpMatches.addAll(StringHelper.getRegexpMatches(term2 + ".{0,200}" + term1, clean));
            }
            Iterator it = regexpMatches.iterator();
            while (it.hasNext()) {
                coOccurrenceStatistics.addCoOccurrence(searcher.getName(), (String) it.next());
            }
        }
    }

    public static void main(String[] strArr) {
        System.out.println(new CoOccurrenceRetriever(CoOccurrenceContext.CONTEXT_200_CHARS, 10, Language.GERMAN).getCoOccurrenceStatistics("financial meltdown", "2008", new HashSet(), true));
    }
}
