package com.intel.analytics.bigdl.dllib.feature.text;

import com.intel.analytics.bigdl.dllib.feature.common.Relation;
import com.intel.analytics.bigdl.dllib.feature.common.RelationPair;
import com.intel.analytics.bigdl.dllib.feature.common.Relations$;
import com.intel.analytics.bigdl.dllib.utils.Log4Error$;
import java.io.File;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.SQLContext;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.Map;
import scala.collection.mutable.Map$;
import scala.collection.mutable.StringBuilder;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Ordering$;
import scala.math.Ordering$Int$;
import scala.math.Ordering$String$;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ScalaRunTime$;

/* compiled from: TextSet.scala */
/* loaded from: input_file:com/intel/analytics/bigdl/dllib/feature/text/TextSet$.class */
public final class TextSet$ {
    public static final TextSet$ MODULE$ = null;
    private final Logger logger;

    static {
        new TextSet$();
    }

    public Logger logger() {
        return this.logger;
    }

    public LocalTextSet array(TextFeature[] textFeatureArr) {
        return new LocalTextSet(textFeatureArr);
    }

    public DistributedTextSet rdd(RDD<TextFeature> rdd) {
        return new DistributedTextSet(rdd);
    }

    public TextSet read(String str, SparkContext sparkContext, int i) {
        TextSet rdd;
        if (sparkContext == null) {
            ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
            HashMap hashMap = new HashMap();
            File file = new File(str);
            Log4Error$.MODULE$.invalidInputError(file.exists(), new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", " doesn't exist. Please check your input path"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})), Log4Error$.MODULE$.invalidInputError$default$3());
            ((List) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(file.listFiles()).filter(new TextSet$$anonfun$4())).toList().sorted(Ordering$.MODULE$.ordered(Predef$.MODULE$.$conforms()))).foreach(new TextSet$$anonfun$5(apply, hashMap));
            logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Found ", " classes"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(hashMap.size())})));
            rdd = array((TextFeature[]) apply.toArray(ClassTag$.MODULE$.apply(TextFeature.class)));
        } else {
            String[] strArr = (String[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(FileSystem.get(StringUtils.stringToURI(new String[]{str})[0], new Configuration()).listStatus(new Path(str))).map(new TextSet$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).sorted(Ordering$String$.MODULE$);
            logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Found ", " classes."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(strArr.length)})));
            rdd = rdd(sparkContext.wholeTextFiles(new StringBuilder().append(str).append("/*").toString(), i).map(new TextSet$$anonfun$3(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr).zip(Predef$.MODULE$.refArrayOps(strArr).indices(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms())), ClassTag$.MODULE$.apply(TextFeature.class)));
        }
        return rdd;
    }

    public SparkContext read$default$2() {
        return null;
    }

    public int read$default$3() {
        return 1;
    }

    public TextSet readCSV(String str, SparkContext sparkContext, int i) {
        if (sparkContext != null) {
            return rdd(sparkContext.textFile(str, i).map(new TextSet$$anonfun$8(), ClassTag$.MODULE$.apply(TextFeature.class)));
        }
        return array((TextFeature[]) Predef$.MODULE$.refArrayOps((Object[]) Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec()).getLines().toArray(ClassTag$.MODULE$.apply(String.class))).map(new TextSet$$anonfun$9(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(TextFeature.class))));
    }

    public SparkContext readCSV$default$2() {
        return null;
    }

    public int readCSV$default$3() {
        return 1;
    }

    public DistributedTextSet readParquet(String str, SQLContext sQLContext) {
        return rdd(sQLContext.read().parquet(str).rdd().map(new TextSet$$anonfun$10(), ClassTag$.MODULE$.apply(TextFeature.class)));
    }

    public DistributedTextSet fromRelationPairs(RDD<Relation> rdd, TextSet textSet, TextSet textSet2) {
        RDD<RelationPair> generateRelationPairs = Relations$.MODULE$.generateRelationPairs(rdd);
        Log4Error$.MODULE$.invalidOperationError(textSet.isDistributed(), "corpus1 must be a DistributedTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Log4Error$.MODULE$.invalidOperationError(textSet2.isDistributed(), "corpus2 must be a DistributedTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        return rdd(RDD$.MODULE$.rddToPairRDDFunctions(textSet2.toDistributed(textSet2.toDistributed$default$1(), textSet2.toDistributed$default$2()).rdd().keyBy(new TextSet$$anonfun$17()), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(TextFeature.class), Ordering$String$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(textSet2.toDistributed(textSet2.toDistributed$default$1(), textSet2.toDistributed$default$2()).rdd().keyBy(new TextSet$$anonfun$14()), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(TextFeature.class), Ordering$String$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(textSet.toDistributed(textSet.toDistributed$default$1(), textSet.toDistributed$default$2()).rdd().keyBy(new TextSet$$anonfun$11()), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(TextFeature.class), Ordering$String$.MODULE$).join(generateRelationPairs.keyBy(new TextSet$$anonfun$12())).map(new TextSet$$anonfun$13(), ClassTag$.MODULE$.apply(Tuple2.class)).keyBy(new TextSet$$anonfun$15())).map(new TextSet$$anonfun$16(), ClassTag$.MODULE$.apply(Tuple3.class)).keyBy(new TextSet$$anonfun$18())).map(new TextSet$$anonfun$19(), ClassTag$.MODULE$.apply(Tuple3.class)).map(new TextSet$$anonfun$20(), ClassTag$.MODULE$.apply(TextFeature.class)).setName("Pairwise Training Set"));
    }

    public LocalTextSet fromRelationPairs(Relation[] relationArr, TextSet textSet, TextSet textSet2) {
        RelationPair[] generateRelationPairs = Relations$.MODULE$.generateRelationPairs(relationArr);
        Log4Error$.MODULE$.invalidInputError(textSet.isLocal(), "corpus1 must be a LocalTextSet", Log4Error$.MODULE$.invalidInputError$default$3());
        Log4Error$.MODULE$.invalidInputError(textSet2.isLocal(), "corpus2 must be a LocalTextSet", Log4Error$.MODULE$.invalidInputError$default$3());
        Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
        Map apply2 = Map$.MODULE$.apply(Nil$.MODULE$);
        TextFeature[] array = textSet.toLocal().array();
        TextFeature[] array2 = textSet2.toLocal().array();
        Predef$.MODULE$.refArrayOps(array).foreach(new TextSet$$anonfun$fromRelationPairs$1(apply));
        Predef$.MODULE$.refArrayOps(array2).foreach(new TextSet$$anonfun$fromRelationPairs$2(apply2));
        return array((TextFeature[]) Predef$.MODULE$.refArrayOps(generateRelationPairs).map(new TextSet$$anonfun$21(apply, apply2), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(TextFeature.class))));
    }

    public DistributedTextSet fromRelationLists(RDD<Relation> rdd, TextSet textSet, TextSet textSet2) {
        Log4Error$.MODULE$.invalidOperationError(textSet.isDistributed(), "corpus1 must be a DistributedTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Log4Error$.MODULE$.invalidOperationError(textSet2.isDistributed(), "corpus2 must be a DistributedTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        return rdd(RDD$.MODULE$.rddToPairRDDFunctions(textSet2.toDistributed(textSet2.toDistributed$default$1(), textSet2.toDistributed$default$2()).rdd().keyBy(new TextSet$$anonfun$25()), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(TextFeature.class), Ordering$String$.MODULE$).join(RDD$.MODULE$.rddToPairRDDFunctions(textSet.toDistributed(textSet.toDistributed$default$1(), textSet.toDistributed$default$2()).rdd().keyBy(new TextSet$$anonfun$22()), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(TextFeature.class), Ordering$String$.MODULE$).join(rdd.keyBy(new TextSet$$anonfun$23())).map(new TextSet$$anonfun$24(), ClassTag$.MODULE$.apply(Tuple2.class)).keyBy(new TextSet$$anonfun$26())).map(new TextSet$$anonfun$27(), ClassTag$.MODULE$.apply(Tuple3.class)).groupBy(new TextSet$$anonfun$28(), ClassTag$.MODULE$.apply(String.class)).map(new TextSet$$anonfun$29(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Tuple3.class))).map(new TextSet$$anonfun$30(), ClassTag$.MODULE$.apply(TextFeature.class)).setName("Listwise Evaluation Set"));
    }

    public LocalTextSet fromRelationLists(Relation[] relationArr, TextSet textSet, TextSet textSet2) {
        Log4Error$.MODULE$.invalidOperationError(textSet.isLocal(), "corpus1 must be a LocalTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Log4Error$.MODULE$.invalidOperationError(textSet2.isLocal(), "corpus2 must be a LocalTextSet", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
        Map apply2 = Map$.MODULE$.apply(Nil$.MODULE$);
        TextFeature[] array = textSet.toLocal().array();
        TextFeature[] array2 = textSet2.toLocal().array();
        Predef$.MODULE$.refArrayOps(array).foreach(new TextSet$$anonfun$fromRelationLists$1(apply));
        Predef$.MODULE$.refArrayOps(array2).foreach(new TextSet$$anonfun$fromRelationLists$2(apply2));
        Map apply3 = Map$.MODULE$.apply(Nil$.MODULE$);
        Predef$.MODULE$.refArrayOps(relationArr).foreach(new TextSet$$anonfun$fromRelationLists$3(apply3));
        ArrayBuffer apply4 = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        apply3.withFilter(new TextSet$$anonfun$fromRelationLists$4()).foreach(new TextSet$$anonfun$fromRelationLists$5(apply, apply2, apply4));
        return array((TextFeature[]) apply4.toArray(ClassTag$.MODULE$.apply(TextFeature.class)));
    }

    public scala.collection.immutable.Map<String, Object> wordsToMap(String[] strArr, scala.collection.immutable.Map<String, Object> map) {
        if (map == null) {
            return Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr).zip(package$.MODULE$.Range().apply(1, strArr.length + 1), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).map(new TextSet$$anonfun$wordsToMap$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms());
        }
        Map apply = Map$.MODULE$.apply(map.toSeq());
        Predef$.MODULE$.refArrayOps(strArr).foreach(new TextSet$$anonfun$wordsToMap$2(map, apply, IntRef.create(BoxesRunTime.unboxToInt(map.values().max(Ordering$Int$.MODULE$)) + 1)));
        return apply.toMap(Predef$.MODULE$.$conforms());
    }

    public scala.collection.immutable.Map<String, Object> wordsToMap$default$2() {
        return null;
    }

    private TextSet$() {
        MODULE$ = this;
        this.logger = LogManager.getLogger(getClass());
    }
}
