package pl.edu.icm.coansys.commons.hbase;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/* loaded from: input_file:WEB-INF/lib/commons-1.8-SNAPSHOT.jar:pl/edu/icm/coansys/commons/hbase/SequenceFileKeysSamplerMR.class */
public class SequenceFileKeysSamplerMR implements Tool {
    private Configuration conf;
    private static final String SAMPLE_SAMPLES_TOTAL_COUNT = "sampler.samples.region.count";
    private static final int SAMPLE_SAMPLES_TOTAL_COUNT_DV = 20;
    private static final String SAMPLE_SAMPLES_PER_SPLIT = "sampler.samples.per.split";
    private static final int SAMPLE_SAMPLES_PER_SPLIT_DV = 100;
    private static final String[] DEFAULT_ARGS = {"/home/akawa/bwndata/sf/", "output-keys"};

    /* loaded from: input_file:WEB-INF/lib/commons-1.8-SNAPSHOT.jar:pl/edu/icm/coansys/commons/hbase/SequenceFileKeysSamplerMR$Map.class */
    public static class Map extends Mapper<BytesWritable, BytesWritable, BooleanWritable, BytesWritable> {
        private int count = 0;
        private int limit = 100;
        private static final BooleanWritable TRUE = new BooleanWritable(true);

        protected void setup(Mapper<BytesWritable, BytesWritable, BooleanWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            this.limit = context.getConfiguration().getInt(SequenceFileKeysSamplerMR.SAMPLE_SAMPLES_PER_SPLIT, 100);
        }

        public void run(Mapper<BytesWritable, BytesWritable, BooleanWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            setup(context);
            while (this.count < this.limit && context.nextKeyValue()) {
                map((BytesWritable) context.getCurrentKey(), (BytesWritable) context.getCurrentValue(), context);
                this.count++;
            }
            cleanup(context);
        }

        public void map(BytesWritable bytesWritable, BytesWritable bytesWritable2, Mapper<BytesWritable, BytesWritable, BooleanWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            context.write(TRUE, bytesWritable);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((BytesWritable) obj, (BytesWritable) obj2, (Mapper<BytesWritable, BytesWritable, BooleanWritable, BytesWritable>.Context) context);
        }
    }

    /* loaded from: input_file:WEB-INF/lib/commons-1.8-SNAPSHOT.jar:pl/edu/icm/coansys/commons/hbase/SequenceFileKeysSamplerMR$Reduce.class */
    public static class Reduce extends Reducer<BooleanWritable, BytesWritable, Text, NullWritable> {
        private int samplesLimit = 20;
        private Text rangeKey = new Text();
        private static final NullWritable NULL = NullWritable.get();

        protected void setup(Reducer<BooleanWritable, BytesWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
            this.samplesLimit = context.getConfiguration().getInt(SequenceFileKeysSamplerMR.SAMPLE_SAMPLES_TOTAL_COUNT, 20);
        }

        public void reduce(BooleanWritable booleanWritable, Iterable<BytesWritable> iterable, Reducer<BooleanWritable, BytesWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
            List<String> stringList = getStringList(iterable);
            Collections.sort(stringList);
            Iterator<String> it = getIntervalSamples(stringList, stringList.size() / this.samplesLimit, this.samplesLimit).iterator();
            while (it.hasNext()) {
                this.rangeKey.set(it.next());
                context.write(this.rangeKey, NULL);
            }
        }

        public List<String> getStringList(Iterable<BytesWritable> iterable) {
            ArrayList arrayList = new ArrayList();
            Iterator<BytesWritable> it = iterable.iterator();
            while (it.hasNext()) {
                arrayList.add(Bytes.toString(it.next().copyBytes()));
            }
            return arrayList;
        }

        public List<String> getIntervalSamples(List<String> list, float f, int i) {
            ArrayList arrayList = new ArrayList();
            int i2 = -1;
            for (int i3 = 1; i3 < Math.min(i, list.size()); i3++) {
                int round = Math.round(f * i3);
                while (i2 >= round && list.get(i2).equals(list.get(round))) {
                    round++;
                }
                arrayList.add(list.get(round));
                i2 = round;
            }
            return arrayList;
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((BooleanWritable) obj, (Iterable<BytesWritable>) iterable, (Reducer<BooleanWritable, BytesWritable, Text, NullWritable>.Context) context);
        }
    }

    @Override // org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override // org.apache.hadoop.conf.Configurable
    public Configuration getConf() {
        return this.conf;
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws IOException, ClassNotFoundException, InterruptedException {
        if (strArr.length >= 2) {
            return createParitionFile(strArr[0], strArr[1]);
        }
        usage("Wrong number of arguments: " + strArr.length);
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    private int createParitionFile(String str, String str2) throws IOException, ClassNotFoundException, InterruptedException {
        Path path = new Path(str);
        Job job = new Job(getConf());
        job.setNumReduceTasks(1);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setMapOutputKeyClass(BooleanWritable.class);
        job.setMapOutputValueClass(BytesWritable.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(NullWritable.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        SequenceFileInputFormat.addInputPath(job, path);
        FileOutputFormat.setOutputPath(job, new Path(str2));
        job.waitForCompletion(true);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        String[] strArr2 = new String[2];
        if (strArr.length == 0) {
            strArr2[0] = DEFAULT_ARGS[0];
            strArr2[1] = DEFAULT_ARGS[1];
            FileUtils.deleteDirectory(new File(strArr2[1]));
        } else if (strArr.length >= 2) {
            strArr2[0] = strArr[0];
            strArr2[1] = strArr[1];
        }
        System.exit(ToolRunner.run(new SequenceFileKeysSamplerMR(), strArr2));
    }

    private static void usage(String str) {
        System.err.println(str);
        System.err.println("Two parameters needed: <sequence-file-input> <output-path>");
        System.err.println("Example: hadoop jar target/commons-1.0-SNAPSHOT.jar " + SequenceFileKeysSamplerMR.class.getName() + " bazekon-20120228.sf sf-split");
    }
}
