package info.vizierdb.spark.load;

import info.vizierdb.Vizier$;
import info.vizierdb.commands.FileArgument;
import info.vizierdb.commands.FileArgument$;
import info.vizierdb.spark.DataFrameConstructor;
import info.vizierdb.spark.DataFrameConstructorCodec;
import info.vizierdb.spark.SparkSchema$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.csv.CSVOptions;
import org.apache.spark.sql.catalyst.expressions.BoundReference;
import org.apache.spark.sql.catalyst.expressions.Cast;
import org.apache.spark.sql.catalyst.expressions.Cast$;
import org.apache.spark.sql.catalyst.expressions.CsvToStructs;
import org.apache.spark.sql.catalyst.expressions.CsvToStructs$;
import org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType$;
import play.api.libs.functional.FunctionalCanBuild$;
import play.api.libs.functional.syntax.package$;
import play.api.libs.json.Format;
import play.api.libs.json.Format$;
import play.api.libs.json.JsError$;
import play.api.libs.json.JsObject;
import play.api.libs.json.JsPath$;
import play.api.libs.json.JsResult$;
import play.api.libs.json.JsValue;
import play.api.libs.json.Json$MacroOptions$Default$macroOptionsDefault$;
import play.api.libs.json.JsonConfiguration$;
import play.api.libs.json.OFormat;
import play.api.libs.json.OFormat$;
import play.api.libs.json.OWrites$;
import play.api.libs.json.Reads$;
import play.api.libs.json.Writes$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.Tuple6;
import scala.collection.GenTraversableOnce;
import scala.collection.Iterable$;
import scala.collection.IterableLike;
import scala.collection.MapLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Map$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: LoadSparkCSV.scala */
/* loaded from: input_file:info/vizierdb/spark/load/LoadSparkCSV$.class */
public final class LoadSparkCSV$ implements DataFrameConstructorCodec, Serializable {
    public static LoadSparkCSV$ MODULE$;
    private final String ERROR_COL;
    private final Map<String, String> OPTIONS;
    private final Format<LoadSparkCSV> format;

    static {
        new LoadSparkCSV$();
    }

    public boolean $lessinit$greater$default$5() {
        return true;
    }

    public Map<String, String> $lessinit$greater$default$6() {
        return Predef$.MODULE$.Map().empty();
    }

    public String ERROR_COL() {
        return this.ERROR_COL;
    }

    public Map<String, String> OPTIONS() {
        return this.OPTIONS;
    }

    public Format<LoadSparkCSV> format() {
        return this.format;
    }

    @Override // info.vizierdb.spark.DataFrameConstructorCodec
    public DataFrameConstructor apply(JsValue jsValue) {
        return (DataFrameConstructor) jsValue.as(format());
    }

    public boolean apply$default$5() {
        return true;
    }

    public Map<String, String> apply$default$6() {
        return Predef$.MODULE$.Map().empty();
    }

    public Seq<StructField> applyProposedSchema(Seq<StructField> seq, Seq<StructField> seq2) {
        return seq2.isEmpty() ? seq : (Seq) ((TraversableLike) seq2.take(seq.size())).$plus$plus((GenTraversableOnce) cleanSchema(seq).drop(seq2.size()), Seq$.MODULE$.canBuildFrom());
    }

    public Seq<StructField> cleanSchema(Seq<StructField> seq) {
        Seq seq2 = (Seq) seq.map(structField -> {
            return structField.copy(LoadSparkDataset$.MODULE$.cleanColumnName(structField.name()), structField.copy$default$2(), structField.copy$default$3(), structField.copy$default$4());
        }, Seq$.MODULE$.canBuildFrom());
        scala.collection.mutable.Map apply = Map$.MODULE$.apply(((TraversableOnce) ((MapLike) seq2.groupBy(structField2 -> {
            return structField2.name().toLowerCase();
        }).filter(tuple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$cleanSchema$3(tuple2));
        })).keys().map(str -> {
            return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str), BoxesRunTime.boxToInteger(1));
        }, Iterable$.MODULE$.canBuildFrom())).toSeq());
        return (Seq) seq2.map(structField3 -> {
            String lowerCase = structField3.name().toLowerCase();
            if (!apply.contains(lowerCase)) {
                return structField3;
            }
            int unboxToInt = BoxesRunTime.unboxToInt(apply.apply(lowerCase));
            apply.update(lowerCase, BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(apply.apply(lowerCase)) + 1));
            return structField3.copy(new StringBuilder(1).append(structField3.name()).append("_").append(unboxToInt).toString(), structField3.copy$default$2(), structField3.copy$default$3(), structField3.copy$default$4());
        }, Seq$.MODULE$.canBuildFrom());
    }

    public LoadSparkCSV infer(FileArgument fileArgument, long j, String str, Option<Object> option, Seq<StructField> seq, Map<String, String> map, boolean z) {
        SparkSession sparkSession = Vizier$.MODULE$.sparkSession();
        Dataset load = sparkSession.read().format("text").load((String) fileArgument.getPath(j, true)._1());
        Option map2 = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) load.take(1))).headOption().map(row -> {
            return (String) row.getAs(0);
        });
        ObjectRef create = ObjectRef.create(Predef$.MODULE$.wrapRefArray(TextInputCSVDataSource$.MODULE$.inferFromDataset(sparkSession, load.map(row2 -> {
            return (String) row2.getAs(0);
        }, sparkSession.implicits().newStringEncoder()), map2, new CSVOptions(OPTIONS().$plus$plus(map).$plus$plus(Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("header"), option.getOrElse(() -> {
            return true;
        }).toString()), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("inferSchema"), Boolean.toString(true))}))), sparkSession.sessionState().conf().csvColumnPruning(), sparkSession.sessionState().conf().sessionLocalTimeZone())).fields()));
        create.elem = cleanSchema((Seq) create.elem);
        if (!z) {
            create.elem = (Seq) ((Seq) create.elem).map(structField -> {
                return structField.copy(structField.copy$default$1(), StringType$.MODULE$, structField.copy$default$3(), structField.copy$default$4());
            }, Seq$.MODULE$.canBuildFrom());
        }
        create.elem = applyProposedSchema((Seq) create.elem, seq);
        boolean unboxToBoolean = map2.isEmpty() ? false : BoxesRunTime.unboxToBoolean(option.getOrElse(() -> {
            InternalRow internalRow = (InternalRow) new CsvToStructs(StructType$.MODULE$.apply((Seq) ((Seq) create.elem).map(structField2 -> {
                return structField2.copy(structField2.copy$default$1(), StringType$.MODULE$, structField2.copy$default$3(), structField2.copy$default$4());
            }, Seq$.MODULE$.canBuildFrom())), MODULE$.OPTIONS().$plus$plus(map), functions$.MODULE$.lit(map2.get()).expr(), new Some(sparkSession.sessionState().conf().sessionLocalTimeZone()), CsvToStructs$.MODULE$.apply$default$5()).eval((InternalRow) null);
            return ((IterableLike) ((Seq) create.elem).zipWithIndex(Seq$.MODULE$.canBuildFrom())).exists(tuple2 -> {
                return BoxesRunTime.boxToBoolean($anonfun$infer$7(internalRow, tuple2));
            });
        }));
        return new LoadSparkCSV(fileArgument, (Seq) create.elem, j, new Some(str), apply$default$5(), map);
    }

    public Seq<StructField> infer$default$5() {
        return Nil$.MODULE$;
    }

    public Map<String, String> infer$default$6() {
        return Predef$.MODULE$.Map().empty();
    }

    public boolean infer$default$7() {
        return true;
    }

    public LoadSparkCSV apply(FileArgument fileArgument, Seq<StructField> seq, long j, Option<String> option, boolean z, Map<String, String> map) {
        return new LoadSparkCSV(fileArgument, seq, j, option, z, map);
    }

    public Option<Tuple6<FileArgument, Seq<StructField>, Object, Option<String>, Object, Map<String, String>>> unapply(LoadSparkCSV loadSparkCSV) {
        return loadSparkCSV == null ? None$.MODULE$ : new Some(new Tuple6(loadSparkCSV.url(), loadSparkCSV.mo560schema(), BoxesRunTime.boxToLong(loadSparkCSV.projectId()), loadSparkCSV.contextText(), BoxesRunTime.boxToBoolean(loadSparkCSV.skipHeader()), loadSparkCSV.sparkOptions()));
    }

    private Object readResolve() {
        return MODULE$;
    }

    public static final /* synthetic */ LoadSparkCSV $anonfun$format$1(FileArgument fileArgument, Seq seq, long j, Option option, boolean z, Map map) {
        return new LoadSparkCSV(fileArgument, seq, j, option, z, map);
    }

    public static final /* synthetic */ boolean $anonfun$cleanSchema$3(Tuple2 tuple2) {
        return ((SeqLike) tuple2._2()).size() > 2;
    }

    public static final /* synthetic */ boolean $anonfun$infer$7(InternalRow internalRow, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        StructField structField = (StructField) tuple2._1();
        return new Cast(new BoundReference(tuple2._2$mcI$sp(), StringType$.MODULE$, structField.nullable()), structField.dataType(), Cast$.MODULE$.apply$default$3(), Cast$.MODULE$.apply$default$4()).eval(internalRow) == null;
    }

    private LoadSparkCSV$() {
        MODULE$ = this;
        this.ERROR_COL = "__MIMIR_CSV_LOAD_ERROR";
        this.OPTIONS = Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("mode"), "PERMISSIVE"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("columnNameOfCorruptRecord"), ERROR_COL())}));
        OFormat oFormat = (OFormat) package$.MODULE$.toFunctionalBuilderOps(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("url")).format(FileArgument$.MODULE$.format()), OFormat$.MODULE$.functionalCanBuildFormats(FunctionalCanBuild$.MODULE$.functionalCanBuildApplicative(Reads$.MODULE$.applicative(JsResult$.MODULE$.applicativeJsResult())), OWrites$.MODULE$.functionalCanBuildOWrites())).and(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("schema")).format(Format$.MODULE$.GenericFormat(Reads$.MODULE$.traversableReads(Predef$.MODULE$.fallbackStringCanBuildFrom(), SparkSchema$.MODULE$.fieldFormat()), Writes$.MODULE$.iterableWrites2(Predef$.MODULE$.$conforms(), SparkSchema$.MODULE$.fieldFormat())))).and(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("projectId")).format(Format$.MODULE$.GenericFormat(Reads$.MODULE$.LongReads(), Writes$.MODULE$.LongWrites()))).and(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).optionHandlers().formatHandler(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("contextText")), Format$.MODULE$.GenericFormat(Reads$.MODULE$.StringReads(), Writes$.MODULE$.StringWrites()))).and(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("skipHeader")).format(Format$.MODULE$.GenericFormat(Reads$.MODULE$.BooleanReads(), Writes$.MODULE$.BooleanWrites()))).and(JsPath$.MODULE$.$bslash(JsonConfiguration$.MODULE$.default(Json$MacroOptions$Default$macroOptionsDefault$.MODULE$).naming().apply("sparkOptions")).format(Format$.MODULE$.GenericFormat(Reads$.MODULE$.mapReads(Reads$.MODULE$.StringReads()), Writes$.MODULE$.genericMapWrites(Writes$.MODULE$.StringWrites())))).apply((fileArgument, seq, obj, option, obj2, map) -> {
            return $anonfun$format$1(fileArgument, seq, BoxesRunTime.unboxToLong(obj), option, BoxesRunTime.unboxToBoolean(obj2), map);
        }, package$.MODULE$.unlift(loadSparkCSV -> {
            return MODULE$.unapply(loadSparkCSV);
        }), OFormat$.MODULE$.invariantFunctorOFormat());
        this.format = OFormat$.MODULE$.apply(jsValue -> {
            return jsValue instanceof JsObject ? oFormat.reads((JsObject) jsValue) : JsError$.MODULE$.apply("error.expected.jsobject");
        }, loadSparkCSV2 -> {
            return oFormat.writes(loadSparkCSV2);
        });
    }
}
