package pl.edu.icm.coansys.io.writers.hbase;

import com.google.protobuf.InvalidProtocolBufferException;
import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.pig.backend.executionengine.ExecException;
import pl.edu.icm.coansys.importers.constants.HBaseConstant;
import pl.edu.icm.coansys.importers.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/io/writers/hbase/HBaseToDocumentProtoSequenceFile.class */
public class HBaseToDocumentProtoSequenceFile implements Tool {
    private static Logger logger = Logger.getLogger(HBaseToDocumentProtoSequenceFile.class);
    private Configuration conf;

    /* loaded from: input_file:pl/edu/icm/coansys/io/writers/hbase/HBaseToDocumentProtoSequenceFile$Counters.class */
    public enum Counters {
        DPROTO,
        CPROTO,
        MPROTO,
        DPROTO_SKIPPED,
        MPROTO_SKIPPED
    }

    /* loaded from: input_file:pl/edu/icm/coansys/io/writers/hbase/HBaseToDocumentProtoSequenceFile$ResultToProtoBytesConverter.class */
    public static class ResultToProtoBytesConverter {
        private Result result;
        private DocumentProtos.DocumentWrapper.Builder dw;

        public ResultToProtoBytesConverter() {
        }

        public ResultToProtoBytesConverter(Result result, DocumentProtos.DocumentWrapper.Builder builder) {
            this.result = result;
            this.dw = builder;
        }

        public void set(Result result, DocumentProtos.DocumentWrapper.Builder builder) {
            this.result = result;
            this.dw = builder;
        }

        public byte[] getRowId() {
            return this.result.getRow();
        }

        public DocumentProtos.DocumentWrapper toDocumentWrapper() throws ExecException, InvalidProtocolBufferException {
            return toDocumentWrapper(getRowId(), getDocumentMetadata(), getDocumentMedia());
        }

        public DocumentProtos.DocumentWrapper toDocumentWrapper(byte[] bArr, byte[] bArr2, byte[] bArr3) throws ExecException, InvalidProtocolBufferException {
            this.dw.setRowId(Bytes.toString(bArr));
            if (bArr2 != null) {
                this.dw.setDocumentMetadata(DocumentProtos.DocumentMetadata.parseFrom(bArr2));
            }
            if (bArr3 != null) {
                this.dw.setMediaContainer(DocumentProtos.MediaContainer.parseFrom(bArr3));
            }
            return this.dw.build();
        }

        public byte[] getDocumentMetadata() throws ExecException, InvalidProtocolBufferException {
            return this.result.getValue(Bytes.toBytes(HBaseConstant.FAMILY_METADATA), Bytes.toBytes(HBaseConstant.FAMILY_METADATA_QUALIFIER_PROTO));
        }

        public byte[] getDocumentMedia() throws ExecException, InvalidProtocolBufferException {
            return this.result.getValue(Bytes.toBytes(HBaseConstant.FAMILY_CONTENT), Bytes.toBytes(HBaseConstant.FAMILY_CONTENT_QUALIFIER_PROTO));
        }
    }

    /* loaded from: input_file:pl/edu/icm/coansys/io/writers/hbase/HBaseToDocumentProtoSequenceFile$RowToDocumentProtoMapper.class */
    public static class RowToDocumentProtoMapper extends TableMapper<BytesWritable, BytesWritable> {
        private BytesWritable key = new BytesWritable();
        private BytesWritable documentProto = new BytesWritable();
        private BytesWritable metatdataProto = new BytesWritable();
        private ResultToProtoBytesConverter converter = new ResultToProtoBytesConverter();
        private DocumentProtos.DocumentWrapper.Builder dw = DocumentProtos.DocumentWrapper.newBuilder();
        private MultipleOutputs<?, ?> mos = null;

        public void setup(Mapper<ImmutableBytesWritable, Result, BytesWritable, BytesWritable>.Context context) {
            this.mos = new MultipleOutputs<>(context);
        }

        public void map(ImmutableBytesWritable immutableBytesWritable, Result result, Mapper<ImmutableBytesWritable, Result, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            this.converter.set(result, this.dw);
            byte[] rowId = this.converter.getRowId();
            byte[] documentMetadata = this.converter.getDocumentMetadata();
            byte[] byteArray = this.converter.toDocumentWrapper(rowId, documentMetadata, this.converter.getDocumentMedia()).toByteArray();
            this.key.set(rowId, 0, rowId.length);
            if (byteArray != null) {
                this.documentProto.set(byteArray, 0, byteArray.length);
                this.mos.write("dproto", this.key, this.documentProto);
                context.getCounter(Counters.DPROTO).increment(1L);
            } else {
                context.getCounter(Counters.DPROTO_SKIPPED).increment(1L);
            }
            if (documentMetadata == null) {
                context.getCounter(Counters.MPROTO_SKIPPED).increment(1L);
                return;
            }
            this.metatdataProto.set(documentMetadata, 0, documentMetadata.length);
            this.mos.write(HBaseConstant.FAMILY_METADATA_QUALIFIER_PROTO, this.key, this.metatdataProto);
            context.getCounter(Counters.MPROTO).increment(1L);
        }

        public void cleanup(Mapper<ImmutableBytesWritable, Result, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException {
            this.mos.close();
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((ImmutableBytesWritable) obj, (Result) obj2, (Mapper<ImmutableBytesWritable, Result, BytesWritable, BytesWritable>.Context) context);
        }
    }

    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public int run(String[] strArr) throws Exception {
        if ("DEBUG".equals(this.conf.get("job.logging"))) {
            logger.setLevel(Level.DEBUG);
            logger.debug("** Log Level set to DEBUG **");
        }
        if (strArr.length < 2) {
            usage("Wrong number of arguments: " + strArr.length);
            System.exit(-1);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        getOptimizedConfiguration(this.conf);
        Job job = new Job(this.conf, HBaseToDocumentProtoSequenceFile.class.getSimpleName() + "_" + str + "_" + str2);
        job.setJarByClass(HBaseToDocumentProtoSequenceFile.class);
        Scan scan = new Scan();
        scan.setCaching(100);
        scan.setCacheBlocks(false);
        TableMapReduceUtil.initTableMapperJob(str, scan, (Class<? extends TableMapper>) RowToDocumentProtoMapper.class, (Class<? extends WritableComparable>) BytesWritable.class, (Class<? extends Writable>) BytesWritable.class, job);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(BytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, new Path(str2));
        MultipleOutputs.addNamedOutput(job, HBaseConstant.FAMILY_METADATA_QUALIFIER_PROTO, SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class);
        MultipleOutputs.addNamedOutput(job, HBaseConstant.FAMILY_CONTENT_QUALIFIER_PROTO, SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class);
        MultipleOutputs.addNamedOutput(job, "dproto", SequenceFileOutputFormat.class, BytesWritable.class, BytesWritable.class);
        if (job.waitForCompletion(true)) {
            return 0;
        }
        throw new IOException("Error with job!");
    }

    private void getOptimizedConfiguration(Configuration configuration) {
        configuration.set("mapred.child.java.opts", "-Xmx8000m");
        configuration.set("io.sort.mb", "1024");
        configuration.set("io.sort.spill.percent", "0.90");
        configuration.set("io.sort.record.percent", "0.15");
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr == null || strArr.length == 0) {
            strArr = new String[]{"grotoap10", strArr[0] + "_dump_" + new Date().getTime()};
        }
        System.exit(ToolRunner.run(HBaseConfiguration.create(), new HBaseToDocumentProtoSequenceFile(), strArr));
    }

    private static void usage(String str) {
        logger.warn(str);
        logger.warn("Exemplary command: ");
        logger.warn("hadoop jar target/importers-1.0-SNAPSHOT-jar-with-dependencies.jar " + HBaseToDocumentProtoSequenceFile.class.getName() + " <table> <directory>");
    }
}
