package ws.palladian.extraction.location.scope;

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.Validate;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import ws.palladian.classification.text.FeatureSetting;
import ws.palladian.classification.text.FeatureSettingBuilder;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:ws/palladian/extraction/location/scope/FeatureSettingAnalyzer.class */
public class FeatureSettingAnalyzer extends Analyzer {
    private final FeatureSetting featureSetting;
    private final Version luceneVersion;

    public FeatureSettingAnalyzer(FeatureSetting featureSetting) {
        this(featureSetting, Version.LUCENE_47);
    }

    public FeatureSettingAnalyzer(FeatureSetting featureSetting, Version version) {
        Validate.notNull(featureSetting, "featureSetting must not be null", new Object[0]);
        this.featureSetting = featureSetting;
        this.luceneVersion = version;
    }

    protected Analyzer.TokenStreamComponents createComponents(String str, Reader reader) {
        NGramTokenizer standardTokenizer;
        int minNGramLength = this.featureSetting.getMinNGramLength();
        int maxNGramLength = this.featureSetting.getMaxNGramLength();
        if (this.featureSetting.getTextFeatureType() == FeatureSetting.TextFeatureType.CHAR_NGRAMS) {
            standardTokenizer = new NGramTokenizer(this.luceneVersion, reader, minNGramLength, maxNGramLength);
        } else {
            if (this.featureSetting.getTextFeatureType() != FeatureSetting.TextFeatureType.WORD_NGRAMS) {
                throw new UnsupportedOperationException("Unsupported text feature type: " + this.featureSetting.getTextFeatureType());
            }
            standardTokenizer = new StandardTokenizer(this.luceneVersion, reader);
        }
        TokenStream lowerCaseFilter = new LowerCaseFilter(this.luceneVersion, standardTokenizer);
        if (this.featureSetting.getTextFeatureType() == FeatureSetting.TextFeatureType.WORD_NGRAMS && maxNGramLength > 1) {
            TokenStream shingleFilter = new ShingleFilter(lowerCaseFilter, Math.max(2, minNGramLength), maxNGramLength);
            if (minNGramLength > 1) {
                shingleFilter.setOutputUnigrams(false);
            }
            lowerCaseFilter = shingleFilter;
        }
        TokenStream limitTokenCountFilter = new LimitTokenCountFilter(lowerCaseFilter, this.featureSetting.getMaxTerms());
        if (this.featureSetting.isWordUnigrams()) {
            limitTokenCountFilter = new LengthFilter(this.luceneVersion, limitTokenCountFilter, this.featureSetting.getMinimumTermLength(), this.featureSetting.getMaximumTermLength());
        }
        return new Analyzer.TokenStreamComponents(standardTokenizer, limitTokenCountFilter);
    }

    public List<String> analyze(String str) {
        ArrayList arrayList = new ArrayList();
        Closeable closeable = null;
        try {
            try {
                closeable = tokenStream(null, new StringReader(str));
                closeable.reset();
                while (closeable.incrementToken()) {
                    arrayList.add(closeable.getAttribute(CharTermAttribute.class).toString());
                }
                FileHelper.close(new Closeable[]{closeable});
                return arrayList;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } catch (Throwable th) {
            FileHelper.close(new Closeable[]{closeable});
            throw th;
        }
    }

    public String toString() {
        return "FeatureSettingAnalyzer " + this.featureSetting;
    }

    public static void main(String[] strArr) {
        FeatureSettingAnalyzer featureSettingAnalyzer = new FeatureSettingAnalyzer(FeatureSettingBuilder.words(1).termLength(4, 10).maxTerms(10).create(), Version.LUCENE_47);
        List<String> analyze = featureSettingAnalyzer.analyze("The quick brown fox jumps over the lazy dog.");
        System.out.println(featureSettingAnalyzer);
        CollectionHelper.print(analyze);
        featureSettingAnalyzer.close();
    }
}
