package edu.emory.mathcs.nlp.bin;

import edu.emory.mathcs.nlp.common.constant.StringConst;
import edu.emory.mathcs.nlp.common.util.BinUtils;
import edu.emory.mathcs.nlp.common.util.FileUtils;
import edu.emory.mathcs.nlp.common.util.IOUtils;
import edu.emory.mathcs.nlp.common.util.Joiner;
import edu.emory.mathcs.nlp.common.util.Language;
import edu.emory.mathcs.nlp.component.template.node.NLPNode;
import edu.emory.mathcs.nlp.tokenization.Tokenizer;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.kohsuke.args4j.Option;

/* loaded from: input_file:edu/emory/mathcs/nlp/bin/Tokenize.class */
public class Tokenize {
    public static final String RAW = "raw";
    public static final String LINE = "line";
    public static final String TSV = "tsv";

    @Option(name = "-i", usage = "input path (required)", required = true, metaVar = "<filepath>")
    private String input_path;

    @Option(name = "-l", usage = "language (default: english)", required = false, metaVar = "<language>")
    private String language = Language.ENGLISH.toString();

    @Option(name = "-ie", usage = "input file extension (default: *)", required = false, metaVar = "<regex>")
    private String input_ext = "*";

    @Option(name = "-oe", usage = "output file extension (default: tok)", required = false, metaVar = "<string>")
    private String output_ext = "tok";

    @Option(name = "-input_format", usage = "format of the input data (raw|line; default: raw)", required = false, metaVar = "<string>")
    private String input_format = RAW;

    @Option(name = "-output_format", usage = "format of the output data (line|tsv; default: line)", required = false, metaVar = "<string>")
    private String output_format = LINE;

    @Option(name = "-threads", usage = "number of threads (default: 2)", required = false, metaVar = "<integer>")
    protected int thread_size = 2;

    /* loaded from: input_file:edu/emory/mathcs/nlp/bin/Tokenize$NLPTask.class */
    class NLPTask implements Runnable {
        private Tokenizer tokenizer;
        private String input_file;
        private String output_file;

        public NLPTask(Tokenizer tokenizer, String str, String str2) {
            this.tokenizer = tokenizer;
            this.input_file = str;
            this.output_file = str2;
        }

        /* JADX WARN: Failed to find 'out' block for switch in B:11:0x0062. Please report as an issue. */
        @Override // java.lang.Runnable
        public void run() {
            try {
                BinUtils.LOG.info(FileUtils.getBaseName(this.input_file) + "\n");
                String str = Tokenize.this.input_format;
                boolean z = -1;
                switch (str.hashCode()) {
                    case 112680:
                        if (str.equals(Tokenize.RAW)) {
                            z = false;
                            break;
                        }
                        break;
                    case 3321844:
                        if (str.equals(Tokenize.LINE)) {
                            z = true;
                            break;
                        }
                        break;
                }
                switch (z) {
                    case false:
                        Tokenize.this.tokenizeRaw(this.tokenizer, this.input_file, this.output_file);
                    case true:
                        Tokenize.this.tokenizeLine(this.tokenizer, this.input_file, this.output_file);
                    default:
                        return;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public Tokenize() {
    }

    public Tokenize(String[] strArr) {
        BinUtils.initArgs(strArr, this);
        Tokenizer create = Tokenizer.create(Language.getType(this.language));
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(this.thread_size);
        for (String str : FileUtils.getFileList(this.input_path, this.input_ext, false)) {
            newFixedThreadPool.submit(new NLPTask(create, str, str + StringConst.PERIOD + this.output_ext));
        }
        newFixedThreadPool.shutdown();
    }

    public void tokenizeRaw(Tokenizer tokenizer, String str, String str2) throws IOException {
        FileInputStream createFileInputStream = IOUtils.createFileInputStream(str);
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(str2);
        String str3 = this.output_format.equals(LINE) ? StringConst.SPACE : StringConst.NEW_LINE;
        String str4 = this.output_format.equals(LINE) ? "" : StringConst.NEW_LINE;
        Iterator<NLPNode[]> it = tokenizer.segmentize(createFileInputStream).iterator();
        while (it.hasNext()) {
            createBufferedPrintStream.println(Joiner.join(it.next(), str3, 1) + str4);
        }
        createFileInputStream.close();
        createBufferedPrintStream.close();
    }

    public void tokenizeLine(Tokenizer tokenizer, String str, String str2) throws IOException {
        BufferedReader createBufferedReader = IOUtils.createBufferedReader(str);
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(str2);
        String str3 = this.output_format.equals(LINE) ? StringConst.SPACE : StringConst.NEW_LINE;
        String str4 = this.output_format.equals(LINE) ? "" : StringConst.NEW_LINE;
        while (true) {
            String readLine = createBufferedReader.readLine();
            if (readLine == null) {
                createBufferedReader.close();
                createBufferedPrintStream.close();
                return;
            }
            createBufferedPrintStream.println(Joiner.join(tokenizer.tokenize(readLine), str3) + str4);
        }
    }

    public static void main(String[] strArr) {
        new Tokenize(strArr);
    }
}
