package edu.umass.cs.mallet.base.extract;

import edu.umass.cs.mallet.base.types.Label;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.Sequence;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/base/extract/BIOTokenizationFilter.class */
public class BIOTokenizationFilter implements TokenizationFilter, Serializable {
    private static final long serialVersionUID = -8726127297313150023L;
    private static final int CURRENT_SERIAL_VERSION = 1;

    @Override // edu.umass.cs.mallet.base.extract.TokenizationFilter
    public LabeledSpans constructLabeledSpans(LabelAlphabet labelAlphabet, Object obj, Label label, Tokenization tokenization, Sequence sequence) {
        LabeledSpans labeledSpans = new LabeledSpans(obj);
        addSpansFromTags(labeledSpans, tokenization, sequence, labelAlphabet, label);
        return labeledSpans;
    }

    private void addSpansFromTags(LabeledSpans labeledSpans, Tokenization tokenization, Sequence sequence, LabelAlphabet labelAlphabet, Label label) {
        Label lookupLabel;
        int i = 0;
        int i2 = 0;
        while (i < sequence.size()) {
            Label lookupLabel2 = labelAlphabet.lookupLabel(sequence.get(i).toString());
            int i3 = i;
            do {
                i++;
                if (i >= sequence.size()) {
                    break;
                }
                lookupLabel = labelAlphabet.lookupLabel(sequence.get(i).toString());
                if (isBeginTag(lookupLabel)) {
                    break;
                }
            } while (tagsMatch(lookupLabel2, lookupLabel));
            Span createSpan = createSpan(tokenization, i3, i);
            addBackgroundIfNecessary(labeledSpans, (StringSpan) createSpan, i2, label);
            i2 = ((StringSpan) createSpan).getEndIdx();
            if (isBeginTag(lookupLabel2) || isInsideTag(lookupLabel2)) {
                lookupLabel2 = trimTag(labelAlphabet, lookupLabel2);
            }
            labeledSpans.add(new LabeledSpan(createSpan, lookupLabel2, lookupLabel2 == label));
        }
    }

    protected Span createSpan(Tokenization tokenization, int i, int i2) {
        return tokenization.subspan(i, i2);
    }

    private Label trimTag(LabelAlphabet labelAlphabet, Label label) {
        return labelAlphabet.lookupLabel(((String) label.getEntry()).substring(2));
    }

    private boolean tagsMatch(Label label, Label label2) {
        String str = (String) label.getEntry();
        String str2 = (String) label2.getEntry();
        if (isBeginTag(label) || isInsideTag(label)) {
            str = str.substring(2);
        }
        if (isInsideTag(label2)) {
            str2 = str2.substring(2);
        }
        return str.equals(str2);
    }

    private boolean isBeginTag(Label label) {
        return ((String) label.getEntry()).startsWith("B-");
    }

    private boolean isInsideTag(Label label) {
        return ((String) label.getEntry()).startsWith("I-");
    }

    private void addBackgroundIfNecessary(LabeledSpans labeledSpans, StringSpan stringSpan, int i, Label label) {
        int startIdx = stringSpan.getStartIdx();
        if (i < startIdx) {
            labeledSpans.add(new LabeledSpan(new StringSpan((CharSequence) stringSpan.getDocument(), i, startIdx), label, true));
        }
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.defaultWriteObject();
        objectOutputStream.writeInt(1);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        objectInputStream.readInt();
    }
}
