package org.kitesdk.tools;

import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.io.Closeables;
import edu.umd.cs.findbugs.annotations.SuppressWarnings;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.avro.reflect.ReflectData;
import org.apache.crunch.DoFn;
import org.apache.crunch.Emitter;
import org.apache.crunch.MapFn;
import org.apache.crunch.PCollection;
import org.apache.crunch.PTable;
import org.apache.crunch.Pair;
import org.apache.crunch.Pipeline;
import org.apache.crunch.PipelineResult;
import org.apache.crunch.impl.mem.MemPipeline;
import org.apache.crunch.impl.mr.MRPipeline;
import org.apache.crunch.types.avro.Avros;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hive.conf.HiveConf;
import org.codehaus.jackson.JsonNode;
import org.kitesdk.compat.DynMethods;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.ValidationException;
import org.kitesdk.data.View;
import org.kitesdk.data.crunch.CrunchDatasets;
import org.kitesdk.data.spi.FieldPartitioner;
import org.kitesdk.data.spi.PartitionStrategyParser;
import org.kitesdk.data.spi.SchemaUtil;
import org.kitesdk.data.spi.partition.IdentityFieldPartitioner;

/* loaded from: input_file:org/kitesdk/tools/CopyTask.class */
public class CopyTask<E> extends Configured {
    private static DynMethods.StaticMethod getEnumByName = new DynMethods.Builder("valueOf").impl("org.apache.hadoop.mapred.Task$Counter", String.class).impl("org.apache.hadoop.mapreduce.TaskCounter", String.class).defaultNoop().buildStatic();
    private static final Enum<?> MAP_INPUT_RECORDS = (Enum) getEnumByName.invoke("MAP_INPUT_RECORDS");
    private static final String LOCAL_FS_SCHEME = "file";
    private final View<E> from;
    private final View<E> to;
    private final Class<E> entityClass;
    private boolean compact = true;
    private int numWriters = -1;
    private long count = 0;

    /* JADX INFO: Access modifiers changed from: private */
    @SuppressWarnings(value = {"SE_NO_SERIALVERSIONID"}, justification = "Purposely not supported across versions")
    /* loaded from: input_file:org/kitesdk/tools/CopyTask$AsKeyTable.class */
    public static class AsKeyTable<E> extends DoFn<E, Pair<E, Void>> {
        private AsKeyTable() {
        }

        @Override // org.apache.crunch.DoFn
        public void process(E e, Emitter<Pair<E, Void>> emitter) {
            emitter.emit(Pair.of(e, (Void) null));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    @SuppressWarnings(value = {"EQ_DOESNT_OVERRIDE_EQUALS"}, justification = "StorageKey equals is correct, compares the values")
    /* loaded from: input_file:org/kitesdk/tools/CopyTask$AvroStorageKey.class */
    public static class AvroStorageKey extends GenericData.Record {
        private final PartitionStrategy strategy;
        private final Schema schema;

        private AvroStorageKey(PartitionStrategy partitionStrategy, Schema schema) {
            super(keySchema(partitionStrategy, schema));
            this.strategy = partitionStrategy;
            this.schema = schema;
        }

        public AvroStorageKey reuseFor(Object obj) {
            List<FieldPartitioner> fieldPartitioners = this.strategy.getFieldPartitioners();
            for (int i = 0; i < fieldPartitioners.size(); i++) {
                FieldPartitioner fieldPartitioner = fieldPartitioners.get(i);
                Schema.Field field = this.schema.getField(fieldPartitioner.getSourceName());
                put(i, fieldPartitioner.apply(ReflectData.get().getField(obj, field.name(), field.pos())));
            }
            return this;
        }

        private static Schema keySchema(PartitionStrategy partitionStrategy, Schema schema) {
            Schema create;
            ArrayList arrayList = new ArrayList();
            for (FieldPartitioner fieldPartitioner : partitionStrategy.getFieldPartitioners()) {
                if (fieldPartitioner instanceof IdentityFieldPartitioner) {
                    create = schema.getField(fieldPartitioner.getSourceName()).schema();
                } else {
                    Class partitionType = SchemaUtil.getPartitionType(fieldPartitioner, schema);
                    if (partitionType == Integer.class) {
                        create = Schema.create(Schema.Type.INT);
                    } else if (partitionType == Long.class) {
                        create = Schema.create(Schema.Type.LONG);
                    } else {
                        if (partitionType != String.class) {
                            throw new ValidationException("Cannot encode partition " + fieldPartitioner.getName() + " with type " + fieldPartitioner.getSourceType());
                        }
                        create = Schema.create(Schema.Type.STRING);
                    }
                }
                arrayList.add(new Schema.Field(fieldPartitioner.getName(), create, (String) null, (JsonNode) null));
            }
            Schema createRecord = Schema.createRecord(schema.getName() + "AvroKeySchema", (String) null, (String) null, false);
            createRecord.setFields(arrayList);
            return createRecord;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    @SuppressWarnings(value = {"SE_NO_SERIALVERSIONID"}, justification = "Purposely not supported across versions")
    /* loaded from: input_file:org/kitesdk/tools/CopyTask$GetStorageKey.class */
    public static class GetStorageKey<E> extends MapFn<E, GenericData.Record> {
        private final String strategyString;
        private final String schemaString;
        private transient AvroStorageKey key;

        private GetStorageKey(PartitionStrategy partitionStrategy, Schema schema) {
            this.key = null;
            this.strategyString = partitionStrategy.toString(false);
            this.schemaString = schema.toString(false);
        }

        public Schema schema() {
            initialize();
            return this.key.getSchema();
        }

        @Override // org.apache.crunch.DoFn
        public void initialize() {
            if (this.key == null) {
                this.key = new AvroStorageKey(PartitionStrategyParser.parse(this.strategyString), new Schema.Parser().parse(this.schemaString));
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // org.apache.crunch.MapFn
        public GenericData.Record map(E e) {
            return this.key.reuseFor(e);
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // org.apache.crunch.MapFn
        public /* bridge */ /* synthetic */ GenericData.Record map(Object obj) {
            return map((GetStorageKey<E>) obj);
        }
    }

    public CopyTask(View<E> view, View<E> view2, Class<E> cls) {
        this.from = view;
        this.to = view2;
        this.entityClass = cls;
    }

    public long getCount() {
        return this.count;
    }

    public CopyTask noCompaction() {
        this.compact = false;
        this.numWriters = 0;
        return this;
    }

    public CopyTask setNumWriters(int i) {
        Preconditions.checkArgument(i >= 0, "Invalid number of reducers: " + i);
        if (i == 0) {
            noCompaction();
        } else {
            this.numWriters = i;
        }
        return this;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r11v0 */
    /* JADX WARN: Type inference failed for: r11v1 */
    /* JADX WARN: Type inference failed for: r11v2, types: [org.kitesdk.data.DatasetWriter, java.io.Closeable] */
    public PipelineResult run() throws IOException {
        boolean z = true;
        if (isLocal(this.from.getDataset()) || isLocal(this.to.getDataset())) {
            z = false;
        }
        if (z) {
            TaskUtil.configure(getConf()).addJarPathForClass(HiveConf.class).addJarForClass(AvroKeyInputFormat.class);
            MRPipeline mRPipeline = new MRPipeline(getClass(), getConf());
            PCollection<?> read = mRPipeline.read(CrunchDatasets.asSource(this.from, this.entityClass));
            if (this.compact) {
                read = partition(read, this.to.getDataset().getDescriptor(), this.numWriters);
            }
            mRPipeline.write(read, CrunchDatasets.asTarget(this.to));
            PipelineResult done = mRPipeline.done();
            PipelineResult.StageResult stageResult = (PipelineResult.StageResult) Iterables.getFirst(done.getStageResults(), (Object) null);
            if (stageResult != null && MAP_INPUT_RECORDS != null) {
                this.count = stageResult.getCounterValue(MAP_INPUT_RECORDS);
            }
            return done;
        }
        Pipeline memPipeline = MemPipeline.getInstance();
        PCollection read2 = memPipeline.read(CrunchDatasets.asSource(this.from, this.entityClass));
        Closeable closeable = 0;
        try {
            closeable = this.to.newWriter();
            Iterator it = read2.materialize().iterator();
            while (it.hasNext()) {
                closeable.write(it.next());
                this.count++;
            }
            Closeables.close((Closeable) closeable, false);
            return memPipeline.done();
        } catch (Throwable th) {
            Closeables.close(closeable, true);
            throw th;
        }
    }

    private static boolean isLocal(Dataset<?> dataset) {
        URI location = dataset.getDescriptor().getLocation();
        return location != null && LOCAL_FS_SCHEME.equals(location.getScheme());
    }

    private static <E> PCollection<E> partition(PCollection<E> pCollection, DatasetDescriptor datasetDescriptor, int i) {
        return datasetDescriptor.isPartitioned() ? partition(pCollection, datasetDescriptor.getPartitionStrategy(), datasetDescriptor.getSchema(), i) : partition(pCollection, i);
    }

    private static <E> PCollection<E> partition(PCollection<E> pCollection, PartitionStrategy partitionStrategy, Schema schema, int i) {
        GetStorageKey getStorageKey = new GetStorageKey(partitionStrategy, schema);
        PTable<K, E> by = pCollection.by(getStorageKey, Avros.generics(getStorageKey.schema()));
        return (i > 0 ? by.groupByKey(i) : by.groupByKey()).ungroup().values();
    }

    private static <E> PCollection<E> partition(PCollection<E> pCollection, int i) {
        PTable<K, V> parallelDo = pCollection.parallelDo((DoFn<E, Pair<K, V>>) new AsKeyTable(), Avros.tableOf(pCollection.getPType(), Avros.nulls()));
        return (i > 0 ? parallelDo.groupByKey(i) : parallelDo.groupByKey()).ungroup().keys();
    }
}
