package com.s24.search.solr.analyzers;

import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/s24/search/solr/analyzers/AnalyzingSentenceTokenizerFactory.class */
public class AnalyzingSentenceTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
    private static final Logger logger = LoggerFactory.getLogger(AnalyzingSentenceTokenizerFactory.class);
    private static final String FILTER_ARG = "filter";
    private boolean filter;
    private static final String STOP_WORD_FILE = "stopwordfile";
    private String stopWordFilePath;
    private CharArraySet stopWords;

    @VisibleForTesting
    static final float DEFAULT_COMMA_WORD_THRESHOLD = 0.2f;
    private static final String COMMA_WORD_THRESHOLD_ARG = "commaWordThreshold";
    private float commaWordThreshold;
    static final float DEFAULT_MAX_STOPWORD_RATIO = 0.21f;
    private static final String MAX_STOPWORD_RATIO_ARG = "maxStopwordRatio";
    private float maxStopwordRatio;
    static final int DEFAULT_MIN_SENTENCE_LENGTH = 5;
    private static final String MIN_SENTENCE_LENGTH_ARG = "minSentenceLength";
    private int minSentenceLength;

    public AnalyzingSentenceTokenizerFactory(Map<String, String> map) {
        super(map);
        this.filter = false;
        this.stopWords = null;
        this.commaWordThreshold = DEFAULT_COMMA_WORD_THRESHOLD;
        this.maxStopwordRatio = DEFAULT_MAX_STOPWORD_RATIO;
        this.minSentenceLength = DEFAULT_MIN_SENTENCE_LENGTH;
        if (map.containsKey(FILTER_ARG)) {
            this.filter = Boolean.parseBoolean(map.get(FILTER_ARG));
        }
        if (map.containsKey(COMMA_WORD_THRESHOLD_ARG)) {
            this.commaWordThreshold = Float.parseFloat(map.get(COMMA_WORD_THRESHOLD_ARG));
        }
        if (map.containsKey(MAX_STOPWORD_RATIO_ARG)) {
            this.maxStopwordRatio = Float.parseFloat(map.get(MAX_STOPWORD_RATIO_ARG));
        }
        if (map.containsKey(MIN_SENTENCE_LENGTH_ARG)) {
            this.minSentenceLength = Integer.parseInt(map.get(MIN_SENTENCE_LENGTH_ARG));
        }
        if (map.containsKey(STOP_WORD_FILE)) {
            this.stopWordFilePath = map.get(STOP_WORD_FILE);
        } else {
            logger.warn("The {} param is not set. The sentences could not be analyzed (due to wrong calcuation of the information gain).", STOP_WORD_FILE);
        }
    }

    public void inform(ResourceLoader resourceLoader) throws IOException {
        if (this.stopWordFilePath == null) {
            this.stopWords = new CharArraySet(0, false);
            return;
        }
        try {
            this.stopWords = getWordSet(resourceLoader, this.stopWordFilePath, true);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public Tokenizer create(AttributeFactory attributeFactory) {
        return new AnalyzingSentenceTokenizer(attributeFactory, this.filter, this.stopWords, this.commaWordThreshold, this.maxStopwordRatio, this.minSentenceLength);
    }
}
