package eu.dnetlib.iis.wf.citationmatching;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import eu.dnetlib.iis.citationmatching.schemas.Citation;
import eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata;
import eu.dnetlib.iis.citationmatching.schemas.ReferenceMetadata;
import eu.dnetlib.iis.common.java.io.HdfsUtils;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import pl.edu.icm.coansys.citations.ConfigurableCitationMatchingService;
import pl.edu.icm.coansys.citations.CoreCitationMatchingSimpleFactory;

/* loaded from: input_file:eu/dnetlib/iis/wf/citationmatching/IisCitationMatchingJob.class */
public class IisCitationMatchingJob {
    private static CoreCitationMatchingSimpleFactory coreCitationMatchingFactory = new CoreCitationMatchingSimpleFactory();

    /* JADX INFO: Access modifiers changed from: private */
    @Parameters(separators = "=")
    /* loaded from: input_file:eu/dnetlib/iis/wf/citationmatching/IisCitationMatchingJob$IisCitationMatchingJobParameters.class */
    public static class IisCitationMatchingJobParameters {

        @Parameter(names = {"-fullDocumentPath"}, required = true, description = "path to directory/file with full documents (document with references")
        private String fullDocumentPath;

        @Parameter(names = {"-outputDirPath"}, required = true, description = "path to directory with results")
        private String outputDirPath;

        @Parameter(names = {"-outputReportPath"}, required = true, description = "path to directory with report")
        private String outputReportPath;

        @Parameter(names = {"-maxHashBucketSize"}, required = false, description = "max number of the citation-documents pairs for a given hash")
        private long maxHashBucketSize;

        @Parameter(names = {"-numberOfPartitions"}, required = false, description = "number of partitions used for rdds with citations and documents read from input files, if not set it will depend on the input format")
        private Integer numberOfPartitions;

        private IisCitationMatchingJobParameters() {
            this.maxHashBucketSize = 10000L;
            this.numberOfPartitions = 5;
        }
    }

    public static void main(String[] strArr) throws IOException {
        IisCitationMatchingJobParameters iisCitationMatchingJobParameters = new IisCitationMatchingJobParameters();
        new JCommander(iisCitationMatchingJobParameters).parse(strArr);
        SparkConf sparkConf = new SparkConf();
        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        sparkConf.set("spark.kryo.registrator", "pl.edu.icm.coansys.citations.MatchableEntityKryoRegistrator");
        JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);
        Throwable th = null;
        try {
            ConfigurableCitationMatchingService<String, ReferenceMetadata, String, DocumentMetadata, Citation, NullWritable> createConfigurableCitationMatchingService = createConfigurableCitationMatchingService(javaSparkContext, iisCitationMatchingJobParameters);
            HdfsUtils.remove(javaSparkContext.hadoopConfiguration(), iisCitationMatchingJobParameters.outputDirPath);
            HdfsUtils.remove(javaSparkContext.hadoopConfiguration(), iisCitationMatchingJobParameters.outputReportPath);
            createConfigurableCitationMatchingService.matchCitations(javaSparkContext, iisCitationMatchingJobParameters.fullDocumentPath, iisCitationMatchingJobParameters.fullDocumentPath, iisCitationMatchingJobParameters.outputDirPath);
            if (javaSparkContext != null) {
                if (0 == 0) {
                    javaSparkContext.close();
                    return;
                }
                try {
                    javaSparkContext.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (javaSparkContext != null) {
                if (0 != 0) {
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    javaSparkContext.close();
                }
            }
            throw th3;
        }
    }

    private static ConfigurableCitationMatchingService<String, ReferenceMetadata, String, DocumentMetadata, Citation, NullWritable> createConfigurableCitationMatchingService(JavaSparkContext javaSparkContext, IisCitationMatchingJobParameters iisCitationMatchingJobParameters) {
        ConfigurableCitationMatchingService<String, ReferenceMetadata, String, DocumentMetadata, Citation, NullWritable> configurableCitationMatchingService = new ConfigurableCitationMatchingService<>();
        configurableCitationMatchingService.setCoreCitationMatchingService(coreCitationMatchingFactory.createCoreCitationMatchingService(javaSparkContext, iisCitationMatchingJobParameters.maxHashBucketSize));
        configurableCitationMatchingService.setNumberOfPartitions(iisCitationMatchingJobParameters.numberOfPartitions);
        ReferenceMetadataInputReader referenceMetadataInputReader = new ReferenceMetadataInputReader();
        ReferenceMetadataInputConverter referenceMetadataInputConverter = new ReferenceMetadataInputConverter();
        configurableCitationMatchingService.setInputCitationReader(referenceMetadataInputReader);
        configurableCitationMatchingService.setInputCitationConverter(referenceMetadataInputConverter);
        DocumentMetadataInputReader documentMetadataInputReader = new DocumentMetadataInputReader();
        DocumentMetadataInputConverter documentMetadataInputConverter = new DocumentMetadataInputConverter();
        configurableCitationMatchingService.setInputDocumentReader(documentMetadataInputReader);
        configurableCitationMatchingService.setInputDocumentConverter(documentMetadataInputConverter);
        CitationMatchingCounterReporter citationMatchingCounterReporter = new CitationMatchingCounterReporter();
        citationMatchingCounterReporter.setSparkContext(javaSparkContext);
        citationMatchingCounterReporter.setReportPath(iisCitationMatchingJobParameters.outputReportPath);
        CitationOutputConverter citationOutputConverter = new CitationOutputConverter();
        CitationOutputWriter citationOutputWriter = new CitationOutputWriter();
        citationOutputWriter.setCitationMatchingReporter(citationMatchingCounterReporter);
        configurableCitationMatchingService.setOutputConverter(citationOutputConverter);
        configurableCitationMatchingService.setOutputWriter(citationOutputWriter);
        return configurableCitationMatchingService;
    }
}
