package pl.edu.icm.coansys.commons.hbase;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.InputSampler;
import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/* loaded from: input_file:WEB-INF/lib/commons-1.9-CDH-4.3.0-SNAPSHOT.jar:pl/edu/icm/coansys/commons/hbase/SequenceFileKeysSampler.class */
public class SequenceFileKeysSampler implements Tool {
    private Configuration conf;
    private static final String OUTPUT_KEYS_FILE_NAME_KEY = "output.keys.file.name";
    private static final String OUTPUT_KEYS_FILE_NAME_DEFAULT_VALUE = "keys";
    private static final String SAMPLER_FREQUENCY_KEY = "sampler.frequency";
    private static final float SAMPLER_FREQUENCY_DEFAULT_VALUE = 0.1f;
    private static final String SAMPLER_NUM_SAMPLES_KEY = "sampler.num.samples";
    private static final int SAMPLER_NUM_SAMPLES_DEFAULT_VALUE = 1000;

    @Override // org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override // org.apache.hadoop.conf.Configurable
    public Configuration getConf() {
        return this.conf;
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws IOException, ClassNotFoundException, InterruptedException {
        if (strArr.length >= 2) {
            return createParitionFile(strArr[0], Integer.parseInt(strArr[1]));
        }
        usage("Wrong number of arguments: " + strArr.length);
        return -1;
    }

    private int createParitionFile(String str, int i) throws IOException, ClassNotFoundException, InterruptedException {
        Path path = new Path(str);
        Job job = new Job(getConf());
        job.setNumReduceTasks(i);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(BytesWritable.class);
        SequenceFileInputFormat.addInputPath(job, path);
        Configuration configuration = job.getConfiguration();
        InputSampler.RandomSampler randomSampler = new InputSampler.RandomSampler(configuration.getFloat(SAMPLER_FREQUENCY_KEY, SAMPLER_FREQUENCY_DEFAULT_VALUE), configuration.getInt(SAMPLER_NUM_SAMPLES_KEY, 1000));
        TotalOrderPartitioner.setPartitionFile(configuration, new Path(configuration.get(OUTPUT_KEYS_FILE_NAME_KEY, "keys")));
        InputSampler.writePartitionFile(job, randomSampler);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new SequenceFileKeysSampler(), strArr));
    }

    private static void usage(String str) {
        System.out.println(str);
        System.out.println("Two parameters needed: <sequence-file-input, number-of-regions>");
        System.out.println("Example: hadoop jar target/commons-1.0-SNAPSHOT.jar " + SequenceFileKeysSampler.class.getName() + " bazekon-20120228.sf 20");
    }
}
