package ai.tripl.arc.extract;

import ai.tripl.arc.api.API;
import ai.tripl.arc.util.CloudUtils$;
import ai.tripl.arc.util.ExtractUtils$;
import ai.tripl.arc.util.MetadataUtils$;
import ai.tripl.arc.util.log.logger.Logger;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple15;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.mutable.ArrayOps;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Either;
import scala.util.Left;
import scala.util.Right;

/* compiled from: TextExtract.scala */
/* loaded from: input_file:ai/tripl/arc/extract/TextExtractStage$.class */
public final class TextExtractStage$ implements Serializable {
    public static TextExtractStage$ MODULE$;

    static {
        new TextExtractStage$();
    }

    public Option<Dataset<Row>> execute(TextExtractStage textExtractStage, SparkSession sparkSession, Logger logger, API.ARCContext aRCContext) {
        String str;
        Right apply;
        Right apply2;
        Right apply3;
        Dataset<Row> createDataFrame;
        Dataset<Row> dataset;
        Dataset<Row> dataset2;
        Dataset<Row> dataset3;
        Right apply4;
        try {
            Some schema = ExtractUtils$.MODULE$.getSchema(textExtractStage.schema(), sparkSession, logger);
            CloudUtils$.MODULE$.setHadoopConfiguration(textExtractStage.authentication(), sparkSession, logger, aRCContext);
            try {
                Left input = textExtractStage.input();
                if (input instanceof Left) {
                    Dataset table = sparkSession.table((String) input.value());
                    StructType schema2 = table.schema();
                    int liftedTree1$1 = liftedTree1$1(schema2, "TextExtract requires 'inputView' to be dataset with [value: string] signature.", table, textExtractStage);
                    if (!(schema2.fields()[liftedTree1$1].dataType() instanceof StringType)) {
                        throw new TextExtractStage$$anon$3("TextExtract requires 'inputView' to be dataset with [value: string] signature.", schema2, liftedTree1$1, textExtractStage);
                    }
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                    str = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) table.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("value")})).collect())).map(row -> {
                        return row.getString(0);
                    }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))))).mkString(",");
                } else {
                    if (!(input instanceof Right)) {
                        throw new MatchError(input);
                    }
                    str = (String) ((Right) input).value();
                }
                String str2 = str;
                if (!aRCContext.isStreaming()) {
                    try {
                        if (textExtractStage.multiLine()) {
                            Some basePath = textExtractStage.basePath();
                            if (basePath instanceof Some) {
                                apply3 = package$.MODULE$.Right().apply(sparkSession.read().option("mergeSchema", "true").option("basePath", (String) basePath.value()).textFile(str2).toDF());
                            } else {
                                if (!None$.MODULE$.equals(basePath)) {
                                    throw new MatchError(basePath);
                                }
                                apply3 = package$.MODULE$.Right().apply(sparkSession.read().option("wholetext", "true").textFile(str2).toDF());
                            }
                            apply = apply3;
                        } else {
                            Some basePath2 = textExtractStage.basePath();
                            if (basePath2 instanceof Some) {
                                apply2 = package$.MODULE$.Right().apply(sparkSession.read().option("mergeSchema", "false").option("basePath", (String) basePath2.value()).textFile(str2).toDF());
                            } else {
                                if (!None$.MODULE$.equals(basePath2)) {
                                    throw new MatchError(basePath2);
                                }
                                apply2 = package$.MODULE$.Right().apply(sparkSession.read().option("wholetext", "false").textFile(str2).toDF());
                            }
                            apply = apply2;
                        }
                    } catch (Throwable th) {
                        if ((th instanceof AnalysisException) && th.getMessage().contains("Path does not exist")) {
                            apply = package$.MODULE$.Left().apply(new API.PathNotExistsExtractError(Option$.MODULE$.apply(str2)));
                        }
                        if (th instanceof Exception) {
                            throw ((Exception) th);
                        }
                        throw th;
                    }
                } else {
                    if (!(schema instanceof Some)) {
                        if (None$.MODULE$.equals(schema)) {
                            throw new Exception("TextExtract requires 'schemaURI' to be set if Arc is running in streaming mode.");
                        }
                        throw new MatchError(schema);
                    }
                    StructType structType = (StructType) schema.value();
                    Some watermark = textExtractStage.watermark();
                    if (watermark instanceof Some) {
                        API.Watermark watermark2 = (API.Watermark) watermark.value();
                        apply4 = package$.MODULE$.Right().apply(sparkSession.readStream().option("mergeSchema", "true").schema(structType).text(str2).withWatermark(watermark2.eventTime(), watermark2.delayThreshold()));
                    } else {
                        if (!None$.MODULE$.equals(watermark)) {
                            throw new MatchError(watermark);
                        }
                        apply4 = package$.MODULE$.Right().apply(sparkSession.readStream().option("mergeSchema", "true").schema(structType).text(str2));
                    }
                    apply = apply4;
                }
                Right right = apply;
                try {
                    if (right instanceof Right) {
                        Dataset<Row> dataset4 = (Dataset) right.value();
                        if (dataset4.schema().length() != 0) {
                            dataset3 = dataset4;
                        } else {
                            if (!(schema instanceof Some)) {
                                if (None$.MODULE$.equals(schema)) {
                                    throw new Exception(new API.EmptySchemaExtractError(None$.MODULE$).getMessage());
                                }
                                throw new MatchError(schema);
                            }
                            dataset3 = sparkSession.createDataFrame(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), (StructType) schema.value());
                        }
                        createDataFrame = dataset3;
                    } else {
                        if (!(right instanceof Left)) {
                            throw new MatchError(right);
                        }
                        API.PathNotExistsExtractError pathNotExistsExtractError = (API.PathNotExistsExtractError) ((Left) right).value();
                        textExtractStage.stageDetail().put("records", 0);
                        if (!(schema instanceof Some)) {
                            if (None$.MODULE$.equals(schema)) {
                                throw new Exception(pathNotExistsExtractError.getMessage());
                            }
                            throw new MatchError(schema);
                        }
                        createDataFrame = sparkSession.createDataFrame(sparkSession.sparkContext().emptyRDD(ClassTag$.MODULE$.apply(Row.class)), (StructType) schema.value());
                    }
                    Dataset<Row> addInternalColumns = ExtractUtils$.MODULE$.addInternalColumns(createDataFrame, textExtractStage.contiguousIndex(), sparkSession, aRCContext);
                    if (schema instanceof Some) {
                        dataset = MetadataUtils$.MODULE$.setMetadata(addInternalColumns, (StructType) schema.value(), sparkSession, logger);
                    } else {
                        if (!None$.MODULE$.equals(schema)) {
                            throw new MatchError(schema);
                        }
                        dataset = addInternalColumns;
                    }
                    Dataset<Row> dataset5 = dataset;
                    Some numPartitions = textExtractStage.numPartitions();
                    if (numPartitions instanceof Some) {
                        dataset2 = dataset5.repartition(BoxesRunTime.unboxToInt(numPartitions.value()));
                    } else {
                        if (!None$.MODULE$.equals(numPartitions)) {
                            throw new MatchError(numPartitions);
                        }
                        dataset2 = dataset5;
                    }
                    Dataset<Row> dataset6 = dataset2;
                    if (aRCContext.immutableViews()) {
                        dataset6.createTempView(textExtractStage.outputView());
                    } else {
                        dataset6.createOrReplaceTempView(textExtractStage.outputView());
                    }
                    if (dataset6.isStreaming()) {
                        BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                    } else {
                        textExtractStage.stageDetail().put("inputFiles", Integer.valueOf(dataset6.inputFiles().length));
                        textExtractStage.stageDetail().put("outputColumns", Integer.valueOf(dataset6.schema().length()));
                        textExtractStage.stageDetail().put("numPartitions", Integer.valueOf(dataset6.rdd().partitions().length));
                        if (textExtractStage.persist()) {
                            sparkSession.catalog().cacheTable(textExtractStage.outputView(), aRCContext.storageLevel());
                            textExtractStage.stageDetail().put("records", Long.valueOf(dataset6.count()));
                        } else {
                            BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
                        }
                    }
                    return Option$.MODULE$.apply(dataset6);
                } catch (Exception e) {
                    throw new TextExtractStage$$anon$5(e, textExtractStage);
                }
            } catch (Exception e2) {
                throw new TextExtractStage$$anon$4(e2, textExtractStage);
            }
        } catch (Exception e3) {
            throw new TextExtractStage$$anon$1(e3, textExtractStage);
        }
    }

    public TextExtractStage apply(TextExtract textExtract, Option<String> option, String str, Option<String> option2, Either<String, List<API.ExtractColumn>> either, String str2, Either<String, String> either2, Option<API.Authentication> option3, Map<String, String> map, boolean z, Option<Object> option4, boolean z2, boolean z3, Option<String> option5, Option<API.Watermark> option6) {
        return new TextExtractStage(textExtract, option, str, option2, either, str2, either2, option3, map, z, option4, z2, z3, option5, option6);
    }

    public Option<Tuple15<TextExtract, Option<String>, String, Option<String>, Either<String, List<API.ExtractColumn>>, String, Either<String, String>, Option<API.Authentication>, Map<String, String>, Object, Option<Object>, Object, Object, Option<String>, Option<API.Watermark>>> unapply(TextExtractStage textExtractStage) {
        return textExtractStage == null ? None$.MODULE$ : new Some(new Tuple15(textExtractStage.plugin(), textExtractStage.id(), textExtractStage.name(), textExtractStage.description(), textExtractStage.schema(), textExtractStage.outputView(), textExtractStage.input(), textExtractStage.authentication(), textExtractStage.params(), BoxesRunTime.boxToBoolean(textExtractStage.persist()), textExtractStage.numPartitions(), BoxesRunTime.boxToBoolean(textExtractStage.contiguousIndex()), BoxesRunTime.boxToBoolean(textExtractStage.multiLine()), textExtractStage.basePath(), textExtractStage.watermark()));
    }

    private Object readResolve() {
        return MODULE$;
    }

    private static final /* synthetic */ int liftedTree1$1(StructType structType, String str, Dataset dataset, TextExtractStage textExtractStage) {
        try {
            return structType.fieldIndex("value");
        } catch (Exception e) {
            throw new TextExtractStage$$anon$2(str, dataset, textExtractStage);
        }
    }

    private TextExtractStage$() {
        MODULE$ = this;
    }
}
