package edu.umass.cs.mallet.base.extract;

import edu.umass.cs.mallet.base.fst.CRF4;
import edu.umass.cs.mallet.base.pipe.Noop;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.iterator.InstanceListIterator;
import edu.umass.cs.mallet.base.pipe.iterator.PipeInputIterator;
import edu.umass.cs.mallet.base.types.Alphabet;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.Sequence;
import edu.umass.cs.mallet.projects.seg_plus_coref.coreference.Citation;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/base/extract/CRFExtractor.class */
public class CRFExtractor implements Extractor {
    private CRF4 crf;
    private Pipe tokenizationPipe;
    private Pipe featurePipe;
    private String backgroundTag;
    private TokenizationFilter filter;
    private static final int CURRENT_SERIAL_VERSION = 2;
    private static final long serialVersionUID = 1;

    public CRFExtractor(CRF4 crf4) {
        this(crf4, new Noop());
    }

    public CRFExtractor(File file) throws IOException {
        this(loadCrf(file), new Noop());
    }

    public CRFExtractor(CRF4 crf4, Pipe pipe) {
        this(crf4, pipe, new BIOTokenizationFilter());
    }

    public CRFExtractor(CRF4 crf4, Pipe pipe, TokenizationFilter tokenizationFilter) {
        this(crf4, pipe, tokenizationFilter, Citation.other);
    }

    public CRFExtractor(CRF4 crf4, Pipe pipe, TokenizationFilter tokenizationFilter, String str) {
        this.crf = crf4;
        this.tokenizationPipe = pipe;
        this.featurePipe = crf4.getInputPipe();
        this.filter = tokenizationFilter;
        this.backgroundTag = str;
    }

    private static CRF4 loadCrf(File file) throws IOException {
        ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(file));
        try {
            CRF4 crf4 = (CRF4) objectInputStream.readObject();
            objectInputStream.close();
            return crf4;
        } catch (ClassNotFoundException e) {
            System.err.println(new StringBuffer().append("Internal MALLET error: Could not read CRF from file ").append(file).append("\n").append(e).toString());
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Extraction extract(Object obj) {
        return obj instanceof Tokenization ? extract((Tokenization) obj) : obj instanceof InstanceList ? extract((InstanceList) obj) : extract(doTokenize(obj));
    }

    private Tokenization doTokenize(Object obj) {
        Instance instance = new Instance(obj, null, null, null);
        this.tokenizationPipe.pipe(instance);
        return (Tokenization) instance.getData();
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Extraction extract(Tokenization tokenization) {
        Sequence transduce = this.crf.transduce((Sequence) new Instance(tokenization, null, null, null, this.featurePipe).getData());
        Extraction extraction = new Extraction(this, getTargetAlphabet());
        extraction.addDocumentExtraction(new DocumentExtraction("Extraction", getTargetAlphabet(), tokenization, transduce, null, this.backgroundTag, this.filter));
        return extraction;
    }

    public InstanceList pipeInstances(PipeInputIterator pipeInputIterator) {
        InstanceList instanceList = new InstanceList(this.tokenizationPipe);
        instanceList.add(pipeInputIterator);
        InstanceList instanceList2 = new InstanceList(getFeaturePipe());
        instanceList2.add(new InstanceListIterator(instanceList));
        return instanceList2;
    }

    public Extraction extract(InstanceList instanceList) {
        Extraction extraction = new Extraction(this, getTargetAlphabet());
        for (int i = 0; i < instanceList.size(); i++) {
            Instance instanceList2 = instanceList.getInstance(i);
            Tokenization tokenization = (Tokenization) instanceList2.getSource();
            String obj = instanceList2.getName().toString();
            Sequence sequence = (Sequence) instanceList2.getData();
            extraction.addDocumentExtraction(new DocumentExtraction(obj, getTargetAlphabet(), tokenization, this.crf.transduce(sequence), (Sequence) instanceList2.getTarget(), this.backgroundTag, this.filter));
        }
        return extraction;
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Extraction extract(PipeInputIterator pipeInputIterator) {
        Extraction extraction = new Extraction(this, getTargetAlphabet());
        InstanceList instanceList = new InstanceList(this.tokenizationPipe);
        instanceList.add(pipeInputIterator);
        InstanceList instanceList2 = new InstanceList(getFeaturePipe());
        instanceList2.add(new InstanceListIterator(instanceList));
        InstanceList.Iterator it = instanceList.iterator();
        InstanceList.Iterator it2 = instanceList2.iterator();
        while (it.hasNext()) {
            Instance nextInstance = it.nextInstance();
            Instance nextInstance2 = it2.nextInstance();
            Tokenization tokenization = (Tokenization) nextInstance.getData();
            String obj = nextInstance2.getName().toString();
            Sequence sequence = (Sequence) nextInstance2.getData();
            Sequence sequence2 = (Sequence) nextInstance2.getTarget();
            extraction.addDocumentExtraction(new DocumentExtraction(obj, getTargetAlphabet(), tokenization, this.crf.transduce(sequence), sequence2, this.backgroundTag, this.filter));
        }
        return extraction;
    }

    public TokenizationFilter getTokenizationFilter() {
        return this.filter;
    }

    public String getBackgroundTag() {
        return this.backgroundTag;
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Pipe getTokenizationPipe() {
        return this.tokenizationPipe;
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public void setTokenizationPipe(Pipe pipe) {
        this.tokenizationPipe = pipe;
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Pipe getFeaturePipe() {
        return this.featurePipe;
    }

    public void setFeaturePipe(Pipe pipe) {
        this.featurePipe = pipe;
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public Alphabet getInputAlphabet() {
        return this.crf.getInputAlphabet();
    }

    @Override // edu.umass.cs.mallet.base.extract.Extractor
    public LabelAlphabet getTargetAlphabet() {
        return (LabelAlphabet) this.crf.getOutputAlphabet();
    }

    public CRF4 getCrf() {
        return this.crf;
    }

    public void slicePipes(int i) {
        Pipe featurePipe = getFeaturePipe();
        if (!(featurePipe instanceof SerialPipes)) {
            throw new IllegalArgumentException("slicePipes: FeaturePipe must be a SerialPipes.");
        }
        SerialPipes serialPipes = (SerialPipes) featurePipe;
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < i; i2++) {
            arrayList.add(serialPipes.getPipe(0));
            serialPipes.removePipe(0);
        }
        setTokenizationPipe(serialPipes);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        int readInt = objectInputStream.readInt();
        if (readInt == 0 || this.featurePipe == null) {
            this.featurePipe = this.crf.getInputPipe();
        }
        if (readInt < 2) {
            this.filter = new BIOTokenizationFilter();
        }
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.defaultWriteObject();
        objectOutputStream.writeInt(2);
    }

    public Sequence pipeInput(Object obj) {
        InstanceList instanceList = new InstanceList(getFeaturePipe());
        instanceList.add(obj, null, null, null);
        return (Sequence) instanceList.getInstance(0).getData();
    }
}
