package com.intel.analytics.bigdl.dllib.feature.text;

import com.intel.analytics.bigdl.dllib.feature.FeatureSet$;
import com.intel.analytics.bigdl.dllib.feature.common.Preprocessing;
import com.intel.analytics.bigdl.dllib.feature.dataset.AbstractDataSet;
import com.intel.analytics.bigdl.dllib.feature.dataset.Sample;
import com.intel.analytics.bigdl.dllib.utils.Log4Error$;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.Map$;
import scala.math.Ordering$Int$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: TextSet.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\u001de\u0001B\u0001\u0003\u0001E\u0011!\u0003R5tiJL'-\u001e;fIR+\u0007\u0010^*fi*\u00111\u0001B\u0001\u0005i\u0016DHO\u0003\u0002\u0006\r\u00059a-Z1ukJ,'BA\u0004\t\u0003\u0015!G\u000e\\5c\u0015\tI!\"A\u0003cS\u001e$GN\u0003\u0002\f\u0019\u0005I\u0011M\\1msRL7m\u001d\u0006\u0003\u001b9\tQ!\u001b8uK2T\u0011aD\u0001\u0004G>l7\u0001A\n\u0003\u0001I\u0001\"a\u0005\u000b\u000e\u0003\tI!!\u0006\u0002\u0003\u000fQ+\u0007\u0010^*fi\"Aq\u0003\u0001BA\u0002\u0013\u0005\u0001$A\u0002sI\u0012,\u0012!\u0007\t\u00045\t\"S\"A\u000e\u000b\u0005]a\"BA\u000f\u001f\u0003\u0015\u0019\b/\u0019:l\u0015\ty\u0002%\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002C\u0005\u0019qN]4\n\u0005\rZ\"a\u0001*E\tB\u00111#J\u0005\u0003M\t\u00111\u0002V3yi\u001a+\u0017\r^;sK\"A\u0001\u0006\u0001BA\u0002\u0013\u0005\u0011&A\u0004sI\u0012|F%Z9\u0015\u0005)\u0002\u0004CA\u0016/\u001b\u0005a#\"A\u0017\u0002\u000bM\u001c\u0017\r\\1\n\u0005=b#\u0001B+oSRDq!M\u0014\u0002\u0002\u0003\u0007\u0011$A\u0002yIEB\u0001b\r\u0001\u0003\u0002\u0003\u0006K!G\u0001\u0005e\u0012$\u0007\u0005C\u00036\u0001\u0011\u0005a'\u0001\u0004=S:LGO\u0010\u000b\u0003oa\u0002\"a\u0005\u0001\t\u000b]!\u0004\u0019A\r\t\u000bi\u0002A\u0011I\u001e\u0002\u0013Q\u0014\u0018M\\:g_JlGC\u0001\n=\u0011\u0015i\u0014\b1\u0001?\u0003-!(/\u00198tM>\u0014X.\u001a:\u0011\t}\u0012E\u0005J\u0007\u0002\u0001*\u0011\u0011\tB\u0001\u0007G>lWn\u001c8\n\u0005\r\u0003%!\u0004)sKB\u0014xnY3tg&tw\rC\u0003F\u0001\u0011\u0005c)A\u0004jg2{7-\u00197\u0016\u0003\u001d\u0003\"a\u000b%\n\u0005%c#a\u0002\"p_2,\u0017M\u001c\u0005\u0006\u0017\u0002!\tER\u0001\u000eSN$\u0015n\u001d;sS\n,H/\u001a3\t\u000b5\u0003A\u0011\t(\u0002\u000fQ|Gj\\2bYR\tq\n\u0005\u0002\u0014!&\u0011\u0011K\u0001\u0002\r\u0019>\u001c\u0017\r\u001c+fqR\u001cV\r\u001e\u0005\u0006'\u0002!\t\u0005V\u0001\u000ei>$\u0015n\u001d;sS\n,H/\u001a3\u0015\u0007]*6\fC\u0004W%B\u0005\t\u0019A,\u0002\u0005M\u001c\u0007C\u0001-Z\u001b\u0005a\u0012B\u0001.\u001d\u00051\u0019\u0006/\u0019:l\u0007>tG/\u001a=u\u0011\u001da&\u000b%AA\u0002u\u000bA\u0002]1si&$\u0018n\u001c8Ok6\u0004\"a\u000b0\n\u0005}c#aA%oi\")\u0011\r\u0001C!E\u0006IAo\u001c#bi\u0006\u001cV\r^\u000b\u0002GB\u0019AM];\u000f\u0005\u0015\u0004hB\u00014p\u001d\t9gN\u0004\u0002i[:\u0011\u0011\u000e\\\u0007\u0002U*\u00111\u000eE\u0001\u0007yI|w\u000e\u001e \n\u0003=I!!\u0004\b\n\u0005-a\u0011BA\u0005\u000b\u0013\t\t\b\"A\u0004qC\u000e\\\u0017mZ3\n\u0005M$(a\u0002#bi\u0006\u001cV\r\u001e\u0006\u0003c\"\u00012A^=|\u001b\u00059(B\u0001=\u0005\u0003\u001d!\u0017\r^1tKRL!A_<\u0003\rM\u000bW\u000e\u001d7f!\tYC0\u0003\u0002~Y\t)a\t\\8bi\"1q\u0010\u0001C!\u0003\u0003\t1B]1oI>l7\u000b\u001d7jiR!\u00111AA\u0005!\u0011Y\u0013Q\u0001\n\n\u0007\u0005\u001dAFA\u0003BeJ\f\u0017\u0010C\u0004\u0002\fy\u0004\r!!\u0004\u0002\u000f],\u0017n\u001a5ugB)1&!\u0002\u0002\u0010A\u00191&!\u0005\n\u0007\u0005MAF\u0001\u0004E_V\u0014G.\u001a\u0005\b\u0003/\u0001A\u0011IA\r\u0003Q9WM\\3sCR,wk\u001c:e\u0013:$W\r_'baRQ\u00111DA\u0018\u0003g\t9$a\u000f\u0011\u000f\u0005u\u00111EA\u0015;:\u00191&a\b\n\u0007\u0005\u0005B&\u0001\u0004Qe\u0016$WMZ\u0005\u0005\u0003K\t9CA\u0002NCBT1!!\t-!\u0011\ti\"a\u000b\n\t\u00055\u0012q\u0005\u0002\u0007'R\u0014\u0018N\\4\t\u0013\u0005E\u0012Q\u0003I\u0001\u0002\u0004i\u0016A\u0003:f[>4X\rV8q\u001d\"I\u0011QGA\u000b!\u0003\u0005\r!X\u0001\f[\u0006Dxk\u001c:eg:+X\u000eC\u0005\u0002:\u0005U\u0001\u0013!a\u0001;\u00069Q.\u001b8Ge\u0016\f\bBCA\u001f\u0003+\u0001\n\u00111\u0001\u0002\u001c\u0005YQ\r_5ti&tw-T1q\u0011\u001d\t\t\u0005\u0001C!\u0003\u0007\nQb]1wK^{'\u000fZ%oI\u0016DHc\u0001\u0016\u0002F!A\u0011qIA \u0001\u0004\tI#\u0001\u0003qCRD\u0007bBA&\u0001\u0011\u0005\u0013QJ\u0001\u000eY>\fGmV8sI&sG-\u001a=\u0015\u0007I\ty\u0005\u0003\u0005\u0002H\u0005%\u0003\u0019AA\u0015\u0011%\t\u0019\u0006AI\u0001\n\u0003\n)&\u0001\u0010hK:,'/\u0019;f/>\u0014H-\u00138eKbl\u0015\r\u001d\u0013eK\u001a\fW\u000f\u001c;%cU\u0011\u0011q\u000b\u0016\u0004;\u0006e3FAA.!\u0011\ti&a\u001a\u000e\u0005\u0005}#\u0002BA1\u0003G\n\u0011\"\u001e8dQ\u0016\u001c7.\u001a3\u000b\u0007\u0005\u0015D&\u0001\u0006b]:|G/\u0019;j_:LA!!\u001b\u0002`\t\tRO\\2iK\u000e\\W\r\u001a,be&\fgnY3\t\u0013\u00055\u0004!%A\u0005B\u0005U\u0013AH4f]\u0016\u0014\u0018\r^3X_J$\u0017J\u001c3fq6\u000b\u0007\u000f\n3fM\u0006,H\u000e\u001e\u00133\u0011%\t\t\bAI\u0001\n\u0003\n)&\u0001\u0010hK:,'/\u0019;f/>\u0014H-\u00138eKbl\u0015\r\u001d\u0013eK\u001a\fW\u000f\u001c;%g!I\u0011Q\u000f\u0001\u0012\u0002\u0013\u0005\u0013qO\u0001\u001fO\u0016tWM]1uK^{'\u000fZ%oI\u0016DX*\u00199%I\u00164\u0017-\u001e7uIQ*\"!!\u001f+\t\u0005m\u0011\u0011\f\u0005\n\u0003{\u0002\u0011\u0013!C!\u0003\u007f\nq\u0003^8ESN$(/\u001b2vi\u0016$G\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\u0005\u0005%fA,\u0002Z!I\u0011Q\u0011\u0001\u0012\u0002\u0013\u0005\u0013QK\u0001\u0018i>$\u0015n\u001d;sS\n,H/\u001a3%I\u00164\u0017-\u001e7uII\u0002")
/* loaded from: input_file:com/intel/analytics/bigdl/dllib/feature/text/DistributedTextSet.class */
public class DistributedTextSet extends TextSet {
    private RDD<TextFeature> rdd;

    public RDD<TextFeature> rdd() {
        return this.rdd;
    }

    public void rdd_$eq(RDD<TextFeature> rdd) {
        this.rdd = rdd;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public TextSet transform(Preprocessing<TextFeature, TextFeature> preprocessing) {
        rdd_$eq(preprocessing.apply(rdd(), ClassTag$.MODULE$.apply(TextFeature.class)));
        return this;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public boolean isLocal() {
        return false;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public boolean isDistributed() {
        return true;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public LocalTextSet toLocal() {
        return new LocalTextSet((TextFeature[]) rdd().collect());
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public DistributedTextSet toDistributed(SparkContext sparkContext, int i) {
        return this;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public SparkContext toDistributed$default$1() {
        return null;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public int toDistributed$default$2() {
        return 4;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public AbstractDataSet<Sample<Object>, ?> toDataSet() {
        return FeatureSet$.MODULE$.rdd(rdd().map(new DistributedTextSet$$anonfun$toDataSet$2(this), ClassTag$.MODULE$.apply(Sample.class)).setName(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Samples in ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{rdd().name()}))), FeatureSet$.MODULE$.rdd$default$2(), FeatureSet$.MODULE$.rdd$default$3(), FeatureSet$.MODULE$.rdd$default$4(), ClassTag$.MODULE$.apply(Sample.class));
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public TextSet[] randomSplit(double[] dArr) {
        Predef$ predef$ = Predef$.MODULE$;
        RDD<TextFeature> rdd = rdd();
        return (TextSet[]) predef$.refArrayOps(rdd.randomSplit(dArr, rdd.randomSplit$default$2())).map(new DistributedTextSet$$anonfun$randomSplit$1(this), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(TextSet.class)));
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public Map<String, Object> generateWordIndexMap(int i, int i2, int i3, Map<String, Object> map) {
        String[] strArr;
        Log4Error$.MODULE$.invalidOperationError(i >= 0, "removeTopN should be a non-negative integer", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Log4Error$.MODULE$.invalidOperationError(i2 == -1 || i2 > 0, "maxWordsNum should be either -1 or a positive integer", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        Log4Error$.MODULE$.invalidOperationError(i3 >= 1, "minFreq should be a positive integer", Log4Error$.MODULE$.invalidOperationError$default$3(), Log4Error$.MODULE$.invalidOperationError$default$4());
        if (i == 0 && i2 == -1 && i3 == 1) {
            strArr = (String[]) rdd().flatMap(new DistributedTextSet$$anonfun$48(this), ClassTag$.MODULE$.apply(String.class)).distinct().collect();
        } else {
            RDD reduceByKey = RDD$.MODULE$.rddToPairRDDFunctions(rdd().flatMap(new DistributedTextSet$$anonfun$49(this), ClassTag$.MODULE$.apply(String.class)).map(new DistributedTextSet$$anonfun$50(this), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Int(), Ordering$String$.MODULE$).reduceByKey(new DistributedTextSet$$anonfun$1(this));
            if (i3 > 1) {
                reduceByKey = reduceByKey.filter(new DistributedTextSet$$anonfun$51(this, i3));
            }
            if (i > 0 || i2 > 0) {
                RDD rdd = reduceByKey;
                String[] strArr2 = (String[]) rdd.sortBy(new DistributedTextSet$$anonfun$52(this), rdd.sortBy$default$2(), rdd.sortBy$default$3(), Ordering$Int$.MODULE$, ClassTag$.MODULE$.Int()).map(new DistributedTextSet$$anonfun$53(this), ClassTag$.MODULE$.apply(String.class)).collect();
                if (i > 0) {
                    strArr2 = (String[]) Predef$.MODULE$.refArrayOps(strArr2).drop(i);
                }
                if (i2 > 0) {
                    strArr2 = (String[]) Predef$.MODULE$.refArrayOps(strArr2).take(i2);
                }
                strArr = strArr2;
            } else {
                strArr = (String[]) reduceByKey.map(new DistributedTextSet$$anonfun$54(this), ClassTag$.MODULE$.apply(String.class)).collect();
            }
        }
        Map<String, Object> wordsToMap = TextSet$.MODULE$.wordsToMap(strArr, map);
        setWordIndex(wordsToMap);
        return wordsToMap;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public int generateWordIndexMap$default$1() {
        return 0;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public int generateWordIndexMap$default$2() {
        return -1;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public int generateWordIndexMap$default$3() {
        return 1;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public Map<String, Object> generateWordIndexMap$default$4() {
        return null;
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public void saveWordIndex(String str) {
        super.saveWordIndex(str);
        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(FileSystem.get(StringUtils.stringToURI(new String[]{str})[0], rdd().sparkContext().hadoopConfiguration()).create(new Path(str)));
        getWordIndex().foreach(new DistributedTextSet$$anonfun$saveWordIndex$2(this, bufferedOutputStream));
        bufferedOutputStream.close();
    }

    @Override // com.intel.analytics.bigdl.dllib.feature.text.TextSet
    public TextSet loadWordIndex(String str) {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(FileSystem.get(StringUtils.stringToURI(new String[]{str})[0], rdd().sparkContext().hadoopConfiguration()).open(new Path(str))));
        scala.collection.mutable.Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                return setWordIndex(apply.toMap(Predef$.MODULE$.$conforms()));
            }
            String[] split = str2.split(" ");
            apply.put(split[0], BoxesRunTime.boxToInteger(new StringOps(Predef$.MODULE$.augmentString(split[1])).toInt()));
            readLine = bufferedReader.readLine();
        }
    }

    public DistributedTextSet(RDD<TextFeature> rdd) {
        this.rdd = rdd;
    }
}
