package org.apache.mahout.text;

import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.FileLineIterable;
import org.apache.mahout.utils.clustering.ClusterDumper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromDirectory.class */
public final class SequenceFilesFromDirectory {
    private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectory.class);

    /* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromDirectory$ChunkedWriter.class */
    public static class ChunkedWriter implements Closeable {
        private final int maxChunkSizeInBytes;
        private final String outputDir;
        private SequenceFile.Writer writer;
        private int currentChunkID;
        private int currentChunkSize;
        private final Configuration conf = new Configuration();
        private final FileSystem fs;

        public ChunkedWriter(int i, String str) throws IOException {
            this.maxChunkSizeInBytes = (i > 1984 ? 1984 : i) * 1024 * 1024;
            this.outputDir = str;
            this.fs = FileSystem.get(this.conf);
            this.currentChunkID = 0;
            this.writer = new SequenceFile.Writer(this.fs, this.conf, getPath(this.currentChunkID), Text.class, Text.class);
        }

        private Path getPath(int i) {
            return new Path(this.outputDir + "/chunk-" + i);
        }

        public void write(String str, String str2) throws IOException {
            if (this.currentChunkSize > this.maxChunkSizeInBytes) {
                this.writer.close();
                FileSystem fileSystem = this.fs;
                Configuration configuration = this.conf;
                int i = this.currentChunkID;
                this.currentChunkID = i + 1;
                this.writer = new SequenceFile.Writer(fileSystem, configuration, getPath(i), Text.class, Text.class);
                this.currentChunkSize = 0;
            }
            Text text = new Text(str);
            Text text2 = new Text(str2);
            this.currentChunkSize += text.getBytes().length + text2.getBytes().length;
            this.writer.append(text, text2);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.writer.close();
        }
    }

    /* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromDirectory$PrefixAdditionFilter.class */
    public class PrefixAdditionFilter implements FileFilter {
        private final String prefix;
        private final ChunkedWriter writer;
        private final Charset charset;

        public PrefixAdditionFilter(String str, ChunkedWriter chunkedWriter, Charset charset) {
            this.prefix = str;
            this.writer = chunkedWriter;
            this.charset = charset;
        }

        @Override // java.io.FileFilter
        public boolean accept(File file) {
            if (file.isDirectory()) {
                file.listFiles(new PrefixAdditionFilter(this.prefix + File.separator + file.getName(), this.writer, this.charset));
                return false;
            }
            try {
                StringBuilder sb = new StringBuilder();
                Iterator it = new FileLineIterable(file, this.charset, false).iterator();
                while (it.hasNext()) {
                    sb.append((String) it.next()).append('\n');
                }
                this.writer.write(this.prefix + File.separator + file.getName(), sb.toString());
                return false;
            } catch (FileNotFoundException e) {
                return false;
            } catch (IOException e2) {
                throw new IllegalStateException(e2);
            }
        }
    }

    private static ChunkedWriter createNewChunkedWriter(int i, String str) throws IOException {
        return new ChunkedWriter(i, str);
    }

    public void createSequenceFiles(File file, String str, String str2, int i, Charset charset) throws IOException {
        ChunkedWriter createNewChunkedWriter = createNewChunkedWriter(i, str);
        file.listFiles(new PrefixAdditionFilter(str2, createNewChunkedWriter, charset));
        createNewChunkedWriter.close();
    }

    public static void main(String[] strArr) throws Exception {
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName("input").withRequired(true).withArgument(argumentBuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription("The input dir containing the documents").withShortName("i").create();
        DefaultOption create2 = defaultOptionBuilder.withLongName(ClusterDumper.OUTPUT_OPTION).withRequired(true).withArgument(argumentBuilder.withName(ClusterDumper.OUTPUT_OPTION).withMinimum(1).withMaximum(1).create()).withDescription("The output directory").withShortName("o").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName("chunkSize").withArgument(argumentBuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).withDescription("The chunkSize in MegaBytes. Defaults to 64").withShortName("chunk").create();
        DefaultOption create4 = defaultOptionBuilder.withLongName("keyPrefix").withArgument(argumentBuilder.withName("keyPrefix").withMinimum(1).withMaximum(1).create()).withDescription("The prefix to be prepended to the key").withShortName("prefix").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("charset").withRequired(true).withArgument(argumentBuilder.withName("charset").withMinimum(1).withMaximum(1).create()).withDescription("The name of the character encoding of the input files").withShortName("c").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        Group create7 = groupBuilder.withName("Options").withOption(create4).withOption(create3).withOption(create5).withOption(create2).withOption(create6).withOption(create).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(create7);
            parser.setHelpOption(create6);
            CommandLine parse = parser.parse(strArr);
            if (parse.hasOption(create6)) {
                CommandLineUtil.printHelp(create7);
                return;
            }
            File file = new File((String) parse.getValue(create));
            String str = (String) parse.getValue(create2);
            int i = 64;
            if (parse.hasOption(create3)) {
                i = Integer.parseInt((String) parse.getValue(create3));
            }
            new SequenceFilesFromDirectory().createSequenceFiles(file, str, parse.hasOption(create4) ? (String) parse.getValue(create4) : "", i, Charset.forName((String) parse.getValue(create5)));
        } catch (OptionException e) {
            log.error("Exception", e);
            CommandLineUtil.printHelp(create7);
        }
    }
}
