package za.co.absa.cobrix.spark.cobol.source.index;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SQLContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import za.co.absa.cobrix.spark.cobol.reader.Constants$;
import za.co.absa.cobrix.spark.cobol.reader.Reader;
import za.co.absa.cobrix.spark.cobol.reader.index.entry.SparseIndexEntry;
import za.co.absa.cobrix.spark.cobol.reader.varlen.VarLenReader;
import za.co.absa.cobrix.spark.cobol.source.SerializableConfiguration;
import za.co.absa.cobrix.spark.cobol.source.parameters.LocalityParameters;
import za.co.absa.cobrix.spark.cobol.source.types.FileWithOrder;
import za.co.absa.cobrix.spark.cobol.utils.SparkUtils$;

/* compiled from: IndexBuilder.scala */
/* loaded from: input_file:za/co/absa/cobrix/spark/cobol/source/index/IndexBuilder$.class */
public final class IndexBuilder$ {
    public static final IndexBuilder$ MODULE$ = null;
    private final Logger za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger;

    static {
        new IndexBuilder$();
    }

    public Logger za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger() {
        return this.za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger;
    }

    public RDD<SparseIndexEntry> buildIndex(FileWithOrder[] fileWithOrderArr, Reader reader, SQLContext sQLContext, LocalityParameters localityParameters) {
        RDD<SparseIndexEntry> rdd;
        if (reader instanceof VarLenReader) {
            VarLenReader varLenReader = (VarLenReader) reader;
            rdd = (varLenReader.isIndexGenerationNeeded() && localityParameters.improveLocality()) ? buildIndexForVarLenReaderWithFullLocality(fileWithOrderArr, varLenReader, sQLContext, localityParameters.optimizeAllocation()) : buildIndexForVarLenReader(fileWithOrderArr, varLenReader, sQLContext);
        } else {
            rdd = null;
        }
        return rdd;
    }

    private RDD<SparseIndexEntry> buildIndexForVarLenReaderWithFullLocality(FileWithOrder[] fileWithOrderArr, VarLenReader varLenReader, SQLContext sQLContext, boolean z) {
        Configuration hadoopConfiguration = sQLContext.sparkContext().hadoopConfiguration();
        RDD<FileWithOrder> rDDWithLocality = toRDDWithLocality(fileWithOrderArr, hadoopConfiguration, sQLContext);
        RDD mapPartitions = rDDWithLocality.mapPartitions(new IndexBuilder$$anonfun$1(varLenReader, new SerializableConfiguration(hadoopConfiguration)), rDDWithLocality.mapPartitions$default$2(), ClassTag$.MODULE$.apply(Tuple2.class));
        za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().info("Going to collect located indexes into driver.");
        Seq<Tuple2<SparseIndexEntry, Seq<String>>> optimizeDistribution = z ? optimizeDistribution(Predef$.MODULE$.wrapRefArray((Object[]) mapPartitions.collect()), sQLContext.sparkContext()) : Predef$.MODULE$.wrapRefArray((Object[]) mapPartitions.collect());
        za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Creating RDD for ", " located indexes."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(optimizeDistribution.length())})));
        if (za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().isDebugEnabled()) {
            za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().debug("Preferred locations per index entry");
            optimizeDistribution.foreach(new IndexBuilder$$anonfun$buildIndexForVarLenReaderWithFullLocality$1());
        }
        return sQLContext.sparkContext().makeRDD(optimizeDistribution, ClassTag$.MODULE$.apply(SparseIndexEntry.class));
    }

    public long za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$getBlockLengthByIndexEntry(SparseIndexEntry sparseIndexEntry) {
        long offsetTo = sparseIndexEntry.offsetTo() > 0 ? sparseIndexEntry.offsetTo() : Long.MAX_VALUE;
        return offsetTo < 10 * ((long) Constants$.MODULE$.megabyte()) ? offsetTo : offsetTo - Constants$.MODULE$.megabyte();
    }

    private Seq<Tuple2<SparseIndexEntry, Seq<String>>> optimizeDistribution(Seq<Tuple2<SparseIndexEntry, Seq<String>>> seq, SparkContext sparkContext) {
        Seq<String> currentActiveExecutors = SparkUtils$.MODULE$.currentActiveExecutors(sparkContext);
        za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Trying to balance ", " partitions among all available executors (", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(seq.size()), currentActiveExecutors})));
        return LocationBalancer$.MODULE$.balance(seq, currentActiveExecutors);
    }

    private RDD<FileWithOrder> toRDDWithLocality(FileWithOrder[] fileWithOrderArr, Configuration configuration, SQLContext sQLContext) {
        Seq seq = Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(fileWithOrderArr).map(new IndexBuilder$$anonfun$2(FileSystem.get(configuration)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toSeq();
        seq.foreach(new IndexBuilder$$anonfun$toRDDWithLocality$1());
        return sQLContext.sparkContext().makeRDD(seq, ClassTag$.MODULE$.apply(FileWithOrder.class));
    }

    public RDD<SparseIndexEntry> buildIndexForVarLenReader(FileWithOrder[] fileWithOrderArr, VarLenReader varLenReader, SQLContext sQLContext) {
        RDD parallelize = sQLContext.sparkContext().parallelize(Predef$.MODULE$.wrapRefArray(fileWithOrderArr), fileWithOrderArr.length, ClassTag$.MODULE$.apply(FileWithOrder.class));
        RDD cache = parallelize.mapPartitions(new IndexBuilder$$anonfun$3(varLenReader, new SerializableConfiguration(sQLContext.sparkContext().hadoopConfiguration())), parallelize.mapPartitions$default$2(), ClassTag$.MODULE$.apply(SparseIndexEntry.class)).cache();
        long count = cache.count();
        int min = (int) Math.min(count, Constants$.MODULE$.maxNumPartitions());
        za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger().warn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Index elements count: ", ", number of partitions = ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(count), BoxesRunTime.boxToInteger(min)})));
        return cache.repartition(min, cache.repartition$default$2(min)).cache();
    }

    private IndexBuilder$() {
        MODULE$ = this;
        this.za$co$absa$cobrix$spark$cobol$source$index$IndexBuilder$$logger = LoggerFactory.getLogger(getClass());
    }
}
