package ws.palladian.extraction.token;

import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import ws.palladian.core.ImmutableToken;
import ws.palladian.core.Instance;
import ws.palladian.core.TextTokenizer;
import ws.palladian.core.Token;
import ws.palladian.helper.collection.AbstractIterator2;

/* loaded from: input_file:ws/palladian/extraction/token/CharacterNGramTokenizer.class */
public final class CharacterNGramTokenizer implements TextTokenizer {
    private static final String PADDING_CHARACTER = "#";
    private final int minLength;
    private final int maxLength;
    private final boolean padding;

    public CharacterNGramTokenizer(int i, int i2) {
        this(i, i2, false);
    }

    public CharacterNGramTokenizer(int i, int i2, boolean z) {
        Validate.isTrue(i > 0, "minLength must be greater zero", new Object[0]);
        Validate.isTrue(i2 >= i, "maxLength must be greater/equal zero", new Object[0]);
        this.minLength = i;
        this.maxLength = i2;
        this.padding = z;
    }

    @Override // ws.palladian.core.TextTokenizer
    public Iterator<Token> iterateTokens(String str) {
        Validate.notNull(str, "text must not be null", new Object[0]);
        final String createPadding = this.padding ? createPadding(str) : str;
        return new AbstractIterator2<Token>() { // from class: ws.palladian.extraction.token.CharacterNGramTokenizer.1
            private int offset = 0;
            private int length;

            {
                this.length = CharacterNGramTokenizer.this.minLength;
            }

            /* JADX INFO: Access modifiers changed from: protected */
            /* renamed from: getNext, reason: merged with bridge method [inline-methods] */
            public Token m208getNext() {
                while (this.offset + CharacterNGramTokenizer.this.minLength <= createPadding.length()) {
                    String substring = createPadding.substring(this.offset, this.offset + this.length);
                    if (this.offset + this.length == createPadding.length() || this.length == CharacterNGramTokenizer.this.maxLength) {
                        this.offset++;
                        this.length = CharacterNGramTokenizer.this.minLength;
                    } else {
                        this.length++;
                    }
                    if (substring.replace(CharacterNGramTokenizer.PADDING_CHARACTER, Instance.NO_CATEGORY_DUMMY).length() != 0) {
                        return new ImmutableToken(this.offset, substring);
                    }
                }
                return (Token) finished();
            }
        };
    }

    private String createPadding(String str) {
        return StringUtils.repeat(PADDING_CHARACTER, this.maxLength - 1) + str + StringUtils.repeat(PADDING_CHARACTER, this.maxLength - 1);
    }

    public String toString() {
        return "CharacterNGramTokenizer [minLength=" + this.minLength + ", maxLength=" + this.maxLength + ", padding=" + this.padding + "]";
    }
}
