package edu.umass.cs.mallet.base.classify.tui;

import edu.umass.cs.mallet.base.pipe.CharSequence2TokenSequence;
import edu.umass.cs.mallet.base.pipe.FeatureSequence2AugmentableFeatureVector;
import edu.umass.cs.mallet.base.pipe.Noop;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.Target2Label;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureSequence;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureSequenceWithBigrams;
import edu.umass.cs.mallet.base.pipe.TokenSequenceLowercase;
import edu.umass.cs.mallet.base.pipe.TokenSequenceRemoveNonAlpha;
import edu.umass.cs.mallet.base.pipe.TokenSequenceRemoveStopwords;
import edu.umass.cs.mallet.base.pipe.iterator.CsvIterator;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.util.CharSequenceLexer;
import edu.umass.cs.mallet.base.util.CommandOption;
import edu.umass.cs.mallet.base.util.MalletLogger;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.cli.HelpFormatter;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/base/classify/tui/Csv2Vectors.class */
public class Csv2Vectors {
    private static Logger logger;
    static CommandOption.File inputFile;
    static CommandOption.File outputFile;
    static CommandOption.String lineRegex;
    static CommandOption.Integer labelOption;
    static CommandOption.Integer nameOption;
    static CommandOption.Integer dataOption;
    static CommandOption.File usePipeFromVectorsFile;
    static CommandOption.Boolean keepSequence;
    static CommandOption.Boolean keepSequenceBigrams;
    static CommandOption.Boolean removeStopWords;
    static CommandOption.Boolean preserveCase;
    static Class class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;

    public static void main(String[] strArr) throws FileNotFoundException, IOException {
        Class cls;
        Class cls2;
        Pipe pipe;
        Class cls3;
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls;
        } else {
            cls = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        CommandOption.setSummary(cls, "A tool for creating instance lists of feature vectors from comma-separated-values");
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls2 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls2;
        } else {
            cls2 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        CommandOption.process(cls2, strArr);
        if (strArr.length == 0) {
            if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
                cls3 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
                class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls3;
            } else {
                cls3 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
            }
            CommandOption.getList(cls3).printUsage(false);
            System.exit(-1);
        }
        if (inputFile == null) {
            System.err.println("You must include `--input FILE ...' in order to specify afile containing the instances, one per line.");
            System.exit(-1);
        }
        InstanceList instanceList = null;
        if (usePipeFromVectorsFile.wasInvoked()) {
            instanceList = InstanceList.load(usePipeFromVectorsFile.value);
            pipe = instanceList.getPipe();
        } else {
            Pipe[] pipeArr = new Pipe[7];
            pipeArr[0] = new Target2Label();
            pipeArr[1] = keepSequenceBigrams.value ? new CharSequence2TokenSequence(CharSequenceLexer.LEX_NONWHITESPACE_CLASSES) : new CharSequence2TokenSequence();
            pipeArr[2] = preserveCase.value ? new Noop() : new TokenSequenceLowercase();
            pipeArr[3] = keepSequenceBigrams.value ? new TokenSequenceRemoveNonAlpha(true) : new Noop();
            pipeArr[4] = removeStopWords.value ? new TokenSequenceRemoveStopwords(false, keepSequenceBigrams.value) : new Noop();
            pipeArr[5] = keepSequenceBigrams.value ? new TokenSequence2FeatureSequenceWithBigrams() : new TokenSequence2FeatureSequence();
            pipeArr[6] = (keepSequence.value || keepSequenceBigrams.value) ? new Noop() : new FeatureSequence2AugmentableFeatureVector();
            pipe = new SerialPipes(pipeArr);
        }
        InstanceList instanceList2 = new InstanceList(pipe);
        instanceList2.add(new CsvIterator(inputFile.value.toString().equals(HelpFormatter.DEFAULT_OPT_PREFIX) ? new InputStreamReader(System.in) : new FileReader(inputFile.value), Pattern.compile(lineRegex.value), dataOption.value, labelOption.value, nameOption.value));
        ObjectOutputStream objectOutputStream = outputFile.value.toString().equals(HelpFormatter.DEFAULT_OPT_PREFIX) ? new ObjectOutputStream(System.out) : new ObjectOutputStream(new FileOutputStream(outputFile.value));
        objectOutputStream.writeObject(instanceList2);
        objectOutputStream.close();
        if (usePipeFromVectorsFile.wasInvoked()) {
            System.out.println(new StringBuffer().append(" output usepipe ilist pipe instance id =").append(instanceList.getPipe().getInstanceId()).toString());
            ObjectOutputStream objectOutputStream2 = new ObjectOutputStream(new FileOutputStream(usePipeFromVectorsFile.value));
            objectOutputStream2.writeObject(instanceList);
            objectOutputStream2.close();
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        Class cls2;
        Class cls3;
        Class cls4;
        Class cls5;
        Class cls6;
        Class cls7;
        Class cls8;
        Class cls9;
        Class cls10;
        Class cls11;
        Class cls12;
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls;
        } else {
            cls = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        logger = MalletLogger.getLogger(cls.getName());
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls2 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls2;
        } else {
            cls2 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        inputFile = new CommandOption.File(cls2, "input", "FILE", true, null, "The file containing data to be classified, one instance per line", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls3 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls3;
        } else {
            cls3 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        outputFile = new CommandOption.File(cls3, "output", "FILE", true, new File("text.vectors"), "Write the instance list to this file; Using - indicates stdout.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls4 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls4;
        } else {
            cls4 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        lineRegex = new CommandOption.String(cls4, "line-regex", "REGEX", true, "^(\\S*)[\\s,]*(\\S*)[\\s,]*(.*)$", "Regular expression containing regex-groups for label, name and data.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls5 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls5;
        } else {
            cls5 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        labelOption = new CommandOption.Integer(cls5, "label", "INTEGER", true, 2, "The index of the group containing the label string.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls6 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls6;
        } else {
            cls6 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        nameOption = new CommandOption.Integer(cls6, "name", "INTEGER", true, 1, "The index of the group containing the instance name.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls7 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls7;
        } else {
            cls7 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        dataOption = new CommandOption.Integer(cls7, "data", "INTEGER", true, 3, "The index of the group containing the data.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls8 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls8;
        } else {
            cls8 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        usePipeFromVectorsFile = new CommandOption.File(cls8, "use-pipe-from", "FILE", true, new File("text.vectors"), "Use the pipe and alphabets from a previously created vectors file. Allows the creation, for example, of a test set of vectors that arecompatible with a previously created set of training vectors", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls9 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls9;
        } else {
            cls9 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        keepSequence = new CommandOption.Boolean(cls9, "keep-sequence", "[TRUE|FALSE]", false, false, "If true, final data will be a FeatureSequence rather than a FeatureVector.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls10 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls10;
        } else {
            cls10 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        keepSequenceBigrams = new CommandOption.Boolean(cls10, "keep-sequence-bigrams", "[TRUE|FALSE]", false, false, "If true, final data will be a FeatureSequenceWithBigrams rather than a FeatureVector.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls11 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls11;
        } else {
            cls11 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        removeStopWords = new CommandOption.Boolean(cls11, "remove-stopwords", "[TRUE|FALSE]", false, false, "If true, remove common \"stop words\" from the text.", null);
        if (class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors == null) {
            cls12 = class$("edu.umass.cs.mallet.base.classify.tui.Csv2Vectors");
            class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors = cls12;
        } else {
            cls12 = class$edu$umass$cs$mallet$base$classify$tui$Csv2Vectors;
        }
        preserveCase = new CommandOption.Boolean(cls12, "preserve-case", "[TRUE|FALSE]", false, false, "If true, do not force all strings to lowercase.", null);
    }
}
