package org.apache.lucene.analysis.jate;

import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.util.BytesRef;

/* loaded from: input_file:org/apache/lucene/analysis/jate/MWEFilter.class */
public abstract class MWEFilter extends TokenFilter implements SentenceContextAware {
    private static final Logger log = Logger.getLogger(MWEFilter.class.getName());
    public static final int DEFAULT_MAX_TOKENS = 2;
    public static final int DEFAULT_MIN_TOKENS = 2;
    public static final int DEFAULT_MAX_CHAR_LENGTH = 50;
    public static final int DEFAULT_MIN_CHAR_LENGTH = 1;
    public static final boolean DEFAULT_REMOVE_LEADING_STOPWORDS = false;
    public static final boolean DEFAULT_REMOVE_TRAILING_STOPWORDS = false;
    public static final boolean DEFAULT_REMOVE_LEADING_SYMBOLS = false;
    public static final boolean DEFAULT_REMOVE_TRAILING_SYMBOLS = false;
    public static final boolean DEFAULT_STOP_WORDS_IGNORE_CASE = false;
    public static final boolean DEFAULT_STRIP_LEADING_SYMBOL_CHARS = false;
    public static final boolean DEFAULT_STRIP_TRAILING_SYMBOL_CHARS = false;
    public static final boolean DEFAULT_STRIP_ANY_SYMBOL_CHARS = false;
    protected int maxTokens;
    protected int minTokens;
    protected int maxCharLength;
    protected int minCharLength;
    protected boolean removeLeadingStopwords;
    protected boolean removeTrailingStopwords;
    protected boolean removeLeadingSymbolicTokens;
    protected boolean removeTrailingSymbolicTokens;
    protected boolean stripLeadingSymbolChars;
    protected boolean stripTrailingSymbolChars;
    protected boolean stripAllSymbolChars;
    protected Set<String> stopWords;
    protected boolean stopWordsIgnoreCase;
    protected final PayloadAttribute metadataAttr;

    /* JADX INFO: Access modifiers changed from: protected */
    public MWEFilter(TokenStream tokenStream) {
        super(tokenStream);
        this.metadataAttr = addAttribute(PayloadAttribute.class);
    }

    public MWEFilter(TokenStream tokenStream, int i, int i2, int i3, int i4, boolean z, boolean z2, boolean z3, boolean z4, boolean z5, boolean z6, boolean z7, Set<String> set, boolean z8) {
        super(tokenStream);
        this.metadataAttr = addAttribute(PayloadAttribute.class);
        this.minTokens = i;
        this.maxTokens = i2;
        this.minCharLength = i3;
        this.maxCharLength = i4;
        this.removeLeadingStopwords = z;
        this.removeTrailingStopwords = z2;
        this.removeLeadingSymbolicTokens = z3;
        this.removeTrailingSymbolicTokens = z4;
        this.stripAllSymbolChars = z7;
        this.stripLeadingSymbolChars = z5;
        this.stripTrailingSymbolChars = z6;
        this.stopWords = set;
        this.stopWordsIgnoreCase = z8;
    }

    @Override // org.apache.lucene.analysis.jate.SentenceContextAware
    public MWEMetadata addSentenceContext(MWEMetadata mWEMetadata, int i, int i2, String str, int i3) {
        mWEMetadata.addMetaData(MWEMetadataType.FIRST_COMPOSING_TOKEN_ID_IN_SENT, String.valueOf(i));
        mWEMetadata.addMetaData(MWEMetadataType.LAST_COMPOSING_TOKEN_ID_IN_SENT, String.valueOf(i2));
        mWEMetadata.addMetaData(MWEMetadataType.POS, str);
        mWEMetadata.addMetaData(MWEMetadataType.SOURCE_SENTENCE_ID_IN_DOC, String.valueOf(i3));
        return mWEMetadata;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public MWEMetadata inheritOtherMetadata(MWEMetadata mWEMetadata, MWEMetadata mWEMetadata2) {
        for (Map.Entry<MWEMetadataType, String> entry : mWEMetadata2.metadata.entrySet()) {
            if (!mWEMetadata.metadata.containsKey(entry.getKey())) {
                mWEMetadata.addMetaData(entry.getKey(), entry.getValue());
            }
        }
        return mWEMetadata;
    }

    public void addPayloadAttribute(PayloadAttribute payloadAttribute, MWEMetadata mWEMetadata) {
        payloadAttribute.setPayload(new BytesRef(MWEMetadata.serialize(mWEMetadata)));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String stripSymbolChars(String str) {
        return PunctuationRemover.stripPunctuations(str, this.stripAllSymbolChars, this.stripLeadingSymbolChars, this.stripTrailingSymbolChars);
    }
}
