package com.quasiris.qsf.commons.text.normalizer;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

@Deprecated
/* loaded from: input_file:com/quasiris/qsf/commons/text/normalizer/TextNormalizerService.class */
public class TextNormalizerService {
    private Analyzer analyzer;

    public TextNormalizerService(NormalizerConfig normalizerConfig) {
        try {
            CustomAnalyzer.Builder addCharFilter = CustomAnalyzer.builder(Paths.get(".", new String[0])).withTokenizer("whitespace", new String[0]).addCharFilter("patternReplace", new String[]{"pattern", "\\\\n|\\\\r\\\\n|\\n|\\r\\n", "replacement", " "}).addCharFilter("patternReplace", new String[]{"pattern", "(\\w+)[.|!]{1}(\\w+)", "replacement", "$1-$2"}).addCharFilter(HTMLStripCharFilterFactory.class, new String[0]).addCharFilter("patternReplace", new String[]{"pattern", "\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", "replacement", " "}).addCharFilter("patternReplace", new String[]{"pattern", normalizerConfig.isKeepPunctuation() ? "[^a-zA-Z0-9ÄÖÜäöüß.,!?:-]+" : "[^a-zA-Z0-9ÄÖÜäöüß-]+", "replacement", " "});
            if (normalizerConfig.isRemoveNumbers()) {
                addCharFilter.addCharFilter("patternReplace", new String[]{"pattern", "\\b(\\d+[.,-/]?\\d*?)\\b", "replacement", " "});
            }
            addCharFilter.addCharFilter("patternReplace", new String[]{"pattern", "(?<=[\\s])-(?=[\\s])|(?<=[\\S])-(?=[\\s])|(?<=[\\s])-(?=[\\S])", "replacement", " "}).addCharFilter("patternReplace", new String[]{"pattern", "\\b((?:[\\w|ÄÖÜäöüß]{1}[-!._]{1})+[\\w|ÄÖÜäöüß]{1})\\b", "replacement", " "});
            if (normalizerConfig.isNormalizeUmlaut()) {
                addCharFilter.addCharFilter(MappingCharFilterFactory.class, new String[]{"mapping", "normalizer/umlaute-mapping.txt"});
            }
            addCharFilter.addTokenFilter("lowercase", new String[0]);
            if (StringUtils.isNotEmpty(normalizerConfig.getStopwordFilepath())) {
                addCharFilter.addTokenFilter("stop", new String[]{"ignoreCase", "false", "words", normalizerConfig.getStopwordFilepath()});
            }
            if (normalizerConfig.isStem()) {
                addCharFilter.addTokenFilter("germanLightStem", new String[0]);
            }
            if (StringUtils.isNotEmpty(normalizerConfig.getSynonymsFilepath())) {
                addCharFilter.addTokenFilter(SynonymGraphFilterFactory.class, new String[]{"synonyms", normalizerConfig.getSynonymsFilepath()});
            }
            if (normalizerConfig.isRemoveDuplicates()) {
                addCharFilter.addTokenFilter("removeDuplicates", new String[0]);
            }
            addCharFilter.addTokenFilter("trim", new String[0]);
            this.analyzer = addCharFilter.build();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public List<String> normalizeToken(String str) {
        if (StringUtils.isEmpty(str)) {
            return Collections.emptyList();
        }
        try {
            return analyze(str, this.analyzer);
        } catch (IOException e) {
            return Collections.emptyList();
        }
    }

    public String normalize(String str) {
        return String.join(" ", normalizeToken(str));
    }

    public static String normalizeWhitespace(String str) {
        String str2 = str;
        if (StringUtils.isNotEmpty(str2)) {
            str2 = str2.trim().replaceAll(" +", " ");
        }
        return str2;
    }

    public static List<String> analyze(String str, Analyzer analyzer) throws IOException {
        ArrayList arrayList = new ArrayList();
        TokenStream tokenStream = analyzer.tokenStream("", str);
        try {
            CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                arrayList.add(addAttribute.toString());
            }
            if (tokenStream != null) {
                tokenStream.close();
            }
            return arrayList;
        } catch (Throwable th) {
            if (tokenStream != null) {
                try {
                    tokenStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }
}
