package pl.edu.icm.coansys.input;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.PairRDDFunctions;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.hbase2sfbw2.DocumentProtoUtil;
import pl.edu.icm.coansys.hbase2sfbw2.HBaseToProtosMapper;
import pl.edu.icm.coansys.hbase2sfbw2.HbToProtosMRKey;
import pl.edu.icm.coansys.input.ReadDataFromHbaseToSf;
import pl.edu.icm.coansys.input.filters.EmptyTitleFilter;
import pl.edu.icm.coansys.input.filters.HBaseToSfProtosFilter;
import pl.edu.icm.coansys.input.filters.NoAuthorFilter;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.models.OrganizationProtos;
import pl.edu.icm.coansys.models.PersonProtos;
import pl.edu.icm.coansys.models.ProjectProtos;
import pl.edu.icm.coansys.output.merge.doc.AdvancedDuplicatesMerger;
import pl.edu.icm.coansys.output.merge.organization.SimpleOrganizationMerger;
import pl.edu.icm.coansys.output.merge.person.SimplePersonMerger;
import pl.edu.icm.coansys.output.merge.project.SimpleProjectMerger;
import pl.edu.icm.coansys.transformers.ProtoMediaMetadataToMetadata;
import pl.edu.icm.coansys.transformers.bwmetaToBW2Proto.BwMeta2bwProtoTransformer;
import pl.edu.icm.coansys.transformers.crossref.CrossrefJsonMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.crossref.UnixrefMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.dataciteToBW2Proto.Datacite2bwProtoTransformer;
import pl.edu.icm.coansys.transformers.dcOaiToBW2Proto.OaiDcMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.gsprotoToBW2Proto.GsMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.nlmToBW2Proto.NlMMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.openaire.OafMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.pbnToBW2Proto.PBNToBW2ProtoTransformer;
import pl.edu.icm.coansys.transformers.umultirank.UMultiRankToBw2Metadata;
import pl.edu.icm.model.transformers.coansys.MultiTypeParseResult;
import scala.Array$;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.Buffer$;
import scala.collection.mutable.BufferLike;
import scala.collection.mutable.MutableList;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering;
import scala.math.Ordering$;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: ReadDataFromHbaseToSf.scala */
/* loaded from: input_file:pl/edu/icm/coansys/input/ReadDataFromHbaseToSf$.class */
public final class ReadDataFromHbaseToSf$ {
    public static final ReadDataFromHbaseToSf$ MODULE$ = null;
    private final Logger LOGGER;

    static {
        new ReadDataFromHbaseToSf$();
    }

    public RDD<Tuple2<String, byte[]>> readTable(String str, SparkContext sparkContext) {
        Configuration create = HBaseConfiguration.create();
        create.set("hbase.client.keyvalue.maxsize", "0");
        create.addResource(new Path("/etc/hbase/conf/core-site.xml"));
        create.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));
        if (!new HBaseAdmin(create).isTableAvailable(str)) {
            Predef$.MODULE$.print(new StringBuilder().append("missing table: ").append(str).toString());
            return sparkContext.parallelize(Nil$.MODULE$, 1, ClassTag$.MODULE$.apply(Tuple2.class));
        }
        create.set("hbase.mapreduce.inputtable", str);
        new JobConf(create, getClass());
        return sparkContext.newAPIHadoopRDD(create, TableInputFormat.class, ImmutableBytesWritable.class, Result.class).map(new ReadDataFromHbaseToSf$$anonfun$readTable$1(), ClassTag$.MODULE$.apply(Tuple2.class));
    }

    public Logger LOGGER() {
        return this.LOGGER;
    }

    public boolean transformAndReturnSuccess(String str, MultiTypeParseResult multiTypeParseResult, DocumentProtos.MediaContainerOrBuilder mediaContainerOrBuilder, String str2, boolean z) {
        return BoxesRunTime.unboxToBoolean(((TraversableOnce) initTransformers().map(new ReadDataFromHbaseToSf$$anonfun$transformAndReturnSuccess$1(str, multiTypeParseResult, mediaContainerOrBuilder, str2), List$.MODULE$.canBuildFrom())).reduce(new ReadDataFromHbaseToSf$$anonfun$transformAndReturnSuccess$2())) || z;
    }

    public String getNewId(String str) {
        return str;
    }

    public TraversableOnce<Tuple2<HbToProtosMRKey, byte[]>> transformDocument(String str, byte[] bArr) {
        DocumentProtos.DocumentWrapper parseFrom = DocumentProtos.DocumentWrapper.parseFrom(bArr);
        MultiTypeParseResult multiTypeParseResult = new MultiTypeParseResult();
        DocumentProtoUtil documentProtoUtil = new DocumentProtoUtil();
        String newId = getNewId(str);
        if (parseFrom.getDocumentMetadata() != null) {
            DocumentProtos.DocumentWrapper.Builder newBuilder = DocumentProtos.DocumentWrapper.newBuilder();
            newBuilder.setDocumentMetadata(parseFrom.getDocumentMetadata());
            if (parseFrom.getMediaContainer() == null) {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            } else {
                newBuilder.setMediaContainer(parseFrom.getMediaContainer());
            }
            documentProtoUtil.setIDFor(newBuilder, newId);
            new HbToProtosMRKey(HbToProtosMRKey.Type.DOCUMENT, newId);
            multiTypeParseResult.add(newBuilder);
        }
        if ((HBaseToProtosMapper.hasMediaContainer(parseFrom.getMediaContainer()) || parseFrom.getDocumentMetadata() != null) && !transformAndReturnSuccess(str, multiTypeParseResult, parseFrom.getMediaContainer(), newId, false)) {
            LOGGER().info("row id: {}", str);
            if (parseFrom.getMediaContainer() != null) {
                LOGGER().debug("There were no suitable transformation. Available media types are: ");
                ((IterableLike) JavaConverters$.MODULE$.asScalaBufferConverter(parseFrom.getMediaContainer().getMediaList()).asScala()).foreach(new ReadDataFromHbaseToSf$$anonfun$transformDocument$1());
            }
        }
        return ((BufferLike) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(multiTypeParseResult.getDocuments()).asScala()).map(new ReadDataFromHbaseToSf$$anonfun$transformDocument$2(), Buffer$.MODULE$.canBuildFrom())).$plus$plus((GenTraversableOnce) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(multiTypeParseResult.getOrganizations()).asScala()).map(new ReadDataFromHbaseToSf$$anonfun$transformDocument$3(), Buffer$.MODULE$.canBuildFrom())).$plus$plus((GenTraversableOnce) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(multiTypeParseResult.getPersons()).asScala()).map(new ReadDataFromHbaseToSf$$anonfun$transformDocument$4(), Buffer$.MODULE$.canBuildFrom())).$plus$plus((GenTraversableOnce) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(multiTypeParseResult.getProjects()).asScala()).map(new ReadDataFromHbaseToSf$$anonfun$transformDocument$5(), Buffer$.MODULE$.canBuildFrom()));
    }

    public void main(String[] strArr) {
        String str = strArr[1];
        String[] split = strArr[0].split(";");
        SparkConf appName = new SparkConf().setAppName("HBaseInput");
        appName.set("spark.app.id", "input-data-spark-hbase-to-sf");
        appName.set("spark.yarn.app.id", "input-data-spark-hbase-to-sf");
        SparkContext sparkContext = new SparkContext(appName);
        Configuration create = HBaseConfiguration.create();
        create.set("hbase.client.keyvalue.maxsize", "0");
        create.addResource(new Path("/etc/hbase/conf/core-site.xml"));
        create.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));
        new HBaseAdmin(create);
        RDD rdd = (RDD) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(split).map(new ReadDataFromHbaseToSf$$anonfun$1(sparkContext), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(RDD.class)))).reduce(new ReadDataFromHbaseToSf$$anonfun$2());
        int defaultParallelism = sparkContext.defaultParallelism() * 6;
        RDD map = RDD$.MODULE$.rddToPairRDDFunctions(rdd.coalesce(defaultParallelism, true, rdd.coalesce$default$3(defaultParallelism, true)).flatMap(new ReadDataFromHbaseToSf$$anonfun$3(), ClassTag$.MODULE$.apply(Tuple2.class)).map(new ReadDataFromHbaseToSf$$anonfun$4(), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(HbToProtosMRKey.class), ClassTag$.MODULE$.apply(Tuple2.class), Ordering$.MODULE$.ordered(Predef$.MODULE$.conforms())).reduceByKey(new ReadDataFromHbaseToSf$$anonfun$5()).map(new ReadDataFromHbaseToSf$$anonfun$6(), ClassTag$.MODULE$.apply(Tuple2.class));
        ClassTag apply = ClassTag$.MODULE$.apply(Text.class);
        ClassTag apply2 = ClassTag$.MODULE$.apply(BytesWritable.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(map);
        PairRDDFunctions rddToPairRDDFunctions = RDD$.MODULE$.rddToPairRDDFunctions(map, apply, apply2, (Ordering) null);
        rddToPairRDDFunctions.saveAsHadoopFile(str, Text.class, BytesWritable.class, ReadDataFromHbaseToSf.RDDMultipleOutputFormat.class, rddToPairRDDFunctions.saveAsHadoopFile$default$5(), rddToPairRDDFunctions.saveAsHadoopFile$default$6());
    }

    public Text transformKey(ImmutableBytesWritable immutableBytesWritable) {
        return new Text(immutableBytesWritable.copyBytes());
    }

    public List<ProtoMediaMetadataToMetadata> initTransformers() {
        MutableList mutableList = new MutableList();
        mutableList.$plus$eq(new GsMediaToBw2Metadata());
        mutableList.$plus$eq(new OaiDcMediaToBw2Metadata());
        mutableList.$plus$eq(new BwMeta2bwProtoTransformer());
        mutableList.$plus$eq(new NlMMediaToBw2Metadata());
        mutableList.$plus$eq(new UnixrefMediaToBw2Metadata());
        mutableList.$plus$eq(new CrossrefJsonMediaToBw2Metadata());
        mutableList.$plus$eq(new OafMediaToBw2Metadata());
        mutableList.$plus$eq(new Datacite2bwProtoTransformer());
        mutableList.$plus$eq(new UMultiRankToBw2Metadata());
        mutableList.$plus$eq(new PBNToBW2ProtoTransformer());
        return mutableList.toList();
    }

    public List<HBaseToSfProtosFilter> initFilters() {
        MutableList mutableList = new MutableList();
        mutableList.$plus$eq(new EmptyTitleFilter());
        mutableList.$plus$eq(new NoAuthorFilter());
        return mutableList.toList();
    }

    public AdvancedDuplicatesMerger docDuplicatesMerger() {
        return new AdvancedDuplicatesMerger();
    }

    public byte[] mergeDocuments(byte[] bArr, byte[] bArr2) {
        return docDuplicatesMerger().merge((java.util.List) JavaConverters$.MODULE$.seqAsJavaListConverter(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new DocumentProtos.DocumentWrapper[]{DocumentProtos.DocumentWrapper.parseFrom(bArr), DocumentProtos.DocumentWrapper.parseFrom(bArr2)}))).asJava()).toByteArray();
    }

    public SimplePersonMerger personMerger() {
        return new SimplePersonMerger();
    }

    public byte[] mergePersons(byte[] bArr, byte[] bArr2) {
        return personMerger().merge((java.util.List) JavaConverters$.MODULE$.seqAsJavaListConverter(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new PersonProtos.PersonWrapper[]{PersonProtos.PersonWrapper.parseFrom(bArr), PersonProtos.PersonWrapper.parseFrom(bArr2)}))).asJava()).toByteArray();
    }

    public SimpleProjectMerger projectMerger() {
        return new SimpleProjectMerger();
    }

    public byte[] mergeProjects(byte[] bArr, byte[] bArr2) {
        return projectMerger().merge((java.util.List) JavaConverters$.MODULE$.seqAsJavaListConverter(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new ProjectProtos.ProjectWrapper[]{ProjectProtos.ProjectWrapper.parseFrom(bArr), ProjectProtos.ProjectWrapper.parseFrom(bArr2)}))).asJava()).toByteArray();
    }

    public SimpleOrganizationMerger organizationMerger() {
        return new SimpleOrganizationMerger();
    }

    public byte[] mergeOrganizations(byte[] bArr, byte[] bArr2) {
        return organizationMerger().merge((java.util.List) JavaConverters$.MODULE$.seqAsJavaListConverter(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new OrganizationProtos.OrganizationWrapper[]{OrganizationProtos.OrganizationWrapper.parseFrom(bArr), OrganizationProtos.OrganizationWrapper.parseFrom(bArr2)}))).asJava()).toByteArray();
    }

    private ReadDataFromHbaseToSf$() {
        MODULE$ = this;
        this.LOGGER = LoggerFactory.getLogger("pl.edu.icm.coansys.output.ReadDataFromHbaseToSf");
    }
}
