package pl.edu.icm.coansys.matching.organization;

import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import java.nio.charset.Charset;
import java.util.Locale;
import org.apache.hadoop.io.BytesWritable;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.WritableFactory$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.rdd.SequenceFileRDDFunctions;
import pl.edu.icm.coansys.models.OrganizationProtos;
import scala.Predef$;
import scala.Tuple2;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.ObjectRef;
import scala.runtime.ScalaRunTime$;

/* compiled from: DoMatching.scala */
/* loaded from: input_file:pl/edu/icm/coansys/matching/organization/DoMatching$.class */
public final class DoMatching$ {
    public static final DoMatching$ MODULE$ = null;

    static {
        new DoMatching$();
    }

    public String simplify(String str) {
        return str.toLowerCase(Locale.ENGLISH).replaceAll("[^a-z0-9]", "");
    }

    public HashFunction hf() {
        return Hashing.md5();
    }

    public long longHash(String str) {
        return hf().newHasher().putString((CharSequence) str, Charset.forName("UTF-8")).hash().asLong();
    }

    public String getOrganizationName(OrganizationProtos.OrganizationWrapper organizationWrapper) {
        return organizationWrapper.getOrganizationMetadata().getOriginalNameCount() > 0 ? organizationWrapper.getOrganizationMetadata().getOriginalName(0) : organizationWrapper.getOrganizationMetadata().getEnglishNameCount() > 0 ? organizationWrapper.getOrganizationMetadata().getEnglishName(0) : "";
    }

    public String trimOrganizationNamesForHash(String str) {
        ObjectRef objectRef = new ObjectRef(str.toLowerCase());
        Predef$.MODULE$.refArrayOps(new String[]{"university", "institute", "of ", " of"}).foreach(new DoMatching$$anonfun$trimOrganizationNamesForHash$1(objectRef));
        return (String) objectRef.elem;
    }

    public RDD<Tuple2<String, byte[]>> doMatching(RDD<Tuple2<String, byte[]>> rdd, RDD<Tuple2<String, byte[]>> rdd2) {
        RDD map = rdd.flatMap(new DoMatching$$anonfun$1(), ClassTag$.MODULE$.apply(Tuple2.class)).map(new DoMatching$$anonfun$2(3), ClassTag$.MODULE$.apply(Tuple2.class));
        RDD flatMap = rdd2.flatMap(new DoMatching$$anonfun$3(3), ClassTag$.MODULE$.apply(Tuple2.class));
        flatMap.cache();
        RDD filter = RDD$.MODULE$.rddToPairRDDFunctions(flatMap, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), Ordering$String$.MODULE$).join(map).filter(new DoMatching$$anonfun$4());
        filter.cache();
        return RDD$.MODULE$.rddToPairRDDFunctions(filter.map(new DoMatching$$anonfun$5(), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), Ordering$String$.MODULE$).groupByKey().map(new DoMatching$$anonfun$6(), ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public void main(String[] strArr) {
        String str = strArr[0];
        String str2 = strArr[1];
        SparkContext sparkContext = new SparkContext(new SparkConf().setAppName("Organization matching"));
        SequenceFileRDDFunctions rddToSequenceFileRDDFunctions = RDD$.MODULE$.rddToSequenceFileRDDFunctions(doMatching(sparkContext.sequenceFile(str, sparkContext.sequenceFile$default$2(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(BytesWritable.class), new DoMatching$$anonfun$7(), new DoMatching$$anonfun$8()).map(new DoMatching$$anonfun$9(), ClassTag$.MODULE$.apply(Tuple2.class)), sparkContext.sequenceFile(str2, sparkContext.sequenceFile$default$2(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(BytesWritable.class), new DoMatching$$anonfun$10(), new DoMatching$$anonfun$11()).map(new DoMatching$$anonfun$12(), ClassTag$.MODULE$.apply(Tuple2.class))), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Byte.TYPE)), WritableFactory$.MODULE$.stringWritableFactory(), WritableFactory$.MODULE$.bytesWritableFactory());
        rddToSequenceFileRDDFunctions.saveAsSequenceFile(strArr[2], rddToSequenceFileRDDFunctions.saveAsSequenceFile$default$2());
    }

    private DoMatching$() {
        MODULE$ = this;
    }
}
