package ai.starlake.job.ingest;

import ai.starlake.config.CometColumns$;
import ai.starlake.schema.model.Attribute;
import ai.starlake.schema.model.Position;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.MatchError;
import scala.Option$;
import scala.Predef$;
import scala.collection.Seq;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.reflect.ClassTag$;

/* compiled from: PositionIngestionJob.scala */
/* loaded from: input_file:ai/starlake/job/ingest/PositionIngestionUtil$.class */
public final class PositionIngestionUtil$ {
    public static PositionIngestionUtil$ MODULE$;

    static {
        new PositionIngestionUtil$();
    }

    public RDD<String> loadDfWithEncoding(SparkSession sparkSession, List<Path> list, String str) {
        return (RDD) ((TraversableOnce) ((List) list.map(path -> {
            return path.toString();
        }, List$.MODULE$.canBuildFrom())).map(str2 -> {
            return sparkSession.sparkContext().hadoopFile(str2, ClassTag$.MODULE$.apply(LongWritable.class), ClassTag$.MODULE$.apply(Text.class), ClassTag$.MODULE$.apply(TextInputFormat.class)).map(tuple2 -> {
                if (tuple2 == null) {
                    throw new MatchError(tuple2);
                }
                Text text = (Text) tuple2._2();
                return new String(text.getBytes(), 0, text.getLength(), str);
            }, ClassTag$.MODULE$.apply(String.class));
        }, List$.MODULE$.canBuildFrom())).fold(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(String.class)), (rdd, rdd2) -> {
            return rdd.union(rdd2);
        });
    }

    public Dataset<Row> prepare(SparkSession sparkSession, Dataset<Row> dataset, List<Attribute> list) {
        List list2 = (List) list.flatMap(attribute -> {
            return Option$.MODULE$.option2Iterable(attribute.position());
        }, List$.MODULE$.canBuildFrom());
        StructField[] structFieldArr = new StructField[list2.length()];
        list.indices().foreach$mVc$sp(i -> {
            structFieldArr[i] = new StructField(new StringBuilder(3).append("col").append(i).toString(), StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4());
        });
        return sparkSession.createDataFrame(dataset.rdd().map(row -> {
            return getRow$1(row.getString(0), list2);
        }, ClassTag$.MODULE$.apply(Row.class)), new StructType(structFieldArr)).toDF((Seq) list.map(attribute2 -> {
            return attribute2.name();
        }, List$.MODULE$.canBuildFrom())).withColumn(CometColumns$.MODULE$.cometInputFileNameColumn(), functions$.MODULE$.input_file_name());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static final Row getRow$1(String str, List list) {
        String[] strArr = new String[list.length()];
        int length = str.length();
        list.indices().foreach$mVc$sp(i -> {
            int first = ((Position) list.apply(i)).first();
            int last = ((Position) list.apply(i)).last() + 1;
            strArr[i] = last <= length ? str.substring(first, last) : "";
        });
        return Row$.MODULE$.fromSeq(Predef$.MODULE$.wrapRefArray(strArr));
    }

    private PositionIngestionUtil$() {
        MODULE$ = this;
    }
}
