package edu.umass.cs.mallet.base.extract;

import edu.umass.cs.mallet.base.types.Label;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.Sequence;
import java.io.Serializable;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/base/extract/DefaultTokenizationFilter.class */
public class DefaultTokenizationFilter implements TokenizationFilter, Serializable {
    @Override // edu.umass.cs.mallet.base.extract.TokenizationFilter
    public LabeledSpans constructLabeledSpans(LabelAlphabet labelAlphabet, Object obj, Label label, Tokenization tokenization, Sequence sequence) {
        LabeledSpans labeledSpans = new LabeledSpans(obj);
        addSpansFromTags(labeledSpans, tokenization, sequence, labelAlphabet, label);
        return labeledSpans;
    }

    private void addSpansFromTags(LabeledSpans labeledSpans, Tokenization tokenization, Sequence sequence, LabelAlphabet labelAlphabet, Label label) {
        int i = 0;
        int i2 = 0;
        while (i < sequence.size()) {
            Label lookupLabel = labelAlphabet.lookupLabel(sequence.get(i).toString());
            int i3 = i;
            while (i < sequence.size() && lookupLabel == labelAlphabet.lookupLabel(sequence.get(i).toString())) {
                i++;
            }
            Span subspan = tokenization.subspan(i3, i);
            addBackgroundIfNecessary(labeledSpans, (StringSpan) subspan, i2, label);
            i2 = ((StringSpan) subspan).getEndIdx();
            labeledSpans.add(new LabeledSpan(subspan, lookupLabel, lookupLabel == label));
        }
    }

    private void addBackgroundIfNecessary(LabeledSpans labeledSpans, StringSpan stringSpan, int i, Label label) {
        int startIdx = stringSpan.getStartIdx();
        if (i < startIdx) {
            labeledSpans.add(new LabeledSpan(new StringSpan((CharSequence) stringSpan.getDocument(), i, startIdx), label, true));
        }
    }
}
