package pl.edu.icm.commoncrawl.crossref;

import com.google.protobuf.ByteString;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.protobuf.connector.DocumentWrapperHBaseConnector;
import pl.edu.icm.coansys.transformers.HBaseClient;
import pl.edu.icm.coansys.transformers.events.DocumentNotFoundException;
import pl.edu.icm.coansys.transformers.gsprotoToBW2Proto.GsMediaToBw2Metadata;
import pl.edu.icm.coansys.transformers.impl.HBaseClientThrift;
import pl.edu.icm.model.transformers.crossref.CrossrefDataDownloader;

/* loaded from: input_file:pl/edu/icm/commoncrawl/crossref/Augmenter.class */
public class Augmenter {
    CrossrefDataDownloader downloader;
    private static final Logger log = LoggerFactory.getLogger(Augmenter.class);
    protected static final String O_HOST = "host";
    protected static final String O_PORT = "port";
    protected static final String O_TABLE = "table";
    protected static final String O_CR_USER = "cruser";
    protected static final String O_CR_PASS = "crpass";
    protected static final String O_HELP = "help";
    GsMediaToBw2Metadata gsParser = new GsMediaToBw2Metadata();
    DocumentWrapperHBaseConnector connector = new DocumentWrapperHBaseConnector();

    public Augmenter(HBaseClient hBaseClient, String str, String str2, String str3) {
        this.connector.setHbaseClient(hBaseClient);
        this.connector.setHbaseTableName(str);
        this.downloader = new CrossrefDataDownloader();
        this.downloader.setup((String) null, str2, str3);
    }

    public void augmentSingleDocument(DocumentProtos.DocumentWrapper documentWrapper) {
        DocumentProtos.Media media = null;
        if (documentWrapper.hasMediaContainer()) {
            for (DocumentProtos.Media media2 : documentWrapper.getMediaContainer().getMediaList()) {
                if (media2.getKey().equalsIgnoreCase("cf_json_record") || media2.getKey().equalsIgnoreCase("cf_unixref_record")) {
                    return;
                }
                if (media2.getKey().equalsIgnoreCase("scholar_record")) {
                    media = media2;
                }
            }
        }
        if (media == null) {
            return;
        }
        DocumentProtos.DocumentWrapper.Builder builder = documentWrapper.toBuilder();
        DocumentProtos.DocumentMetadata.Builder newBuilder = DocumentProtos.DocumentMetadata.newBuilder();
        this.gsParser.transform(media, "ndi", newBuilder);
        String str = null;
        try {
            str = newBuilder.getBasicMetadataOrBuilder().getDoi();
        } catch (Exception e) {
        }
        if (StringUtils.isBlank(str)) {
            return;
        }
        try {
            log.info("Trying to augment with unixref");
            byte[] downloadUnixrefAndCheckCorrecteness = this.downloader.downloadUnixrefAndCheckCorrecteness(str);
            if (downloadUnixrefAndCheckCorrecteness != null) {
                DocumentProtos.Media.Builder newBuilder2 = DocumentProtos.Media.newBuilder();
                newBuilder2.setKey("cf_unixref_record");
                newBuilder2.setMediaType("pb/cfUnixrefRecord");
                newBuilder2.setContent(ByteString.copyFrom(downloadUnixrefAndCheckCorrecteness));
                builder.getMediaContainerBuilder().addMedia(newBuilder2);
                this.connector.store(builder.build());
                log.info("Unixref found");
                return;
            }
        } catch (FileNotFoundException e2) {
            log.error(e2.getMessage(), e2);
        }
        try {
            byte[] downloadCrossrefJson = this.downloader.downloadCrossrefJson(str);
            if (downloadCrossrefJson != null) {
                DocumentProtos.Media.Builder newBuilder3 = DocumentProtos.Media.newBuilder();
                newBuilder3.setKey("cf_json_record");
                newBuilder3.setMediaType("pb/cfJsonRecord");
                newBuilder3.setContent(ByteString.copyFrom(downloadCrossrefJson));
                builder.getMediaContainerBuilder().addMedia(newBuilder3);
                this.connector.store(builder.build());
            }
        } catch (FileNotFoundException e3) {
            log.error(e3.getMessage(), e3);
        }
    }

    public void augment() {
        try {
            List list = this.connector.get((String) null, 1024);
            while (!list.isEmpty()) {
                if (list.size() == 1) {
                    augmentSingleDocument((DocumentProtos.DocumentWrapper) list.get(0));
                    return;
                } else {
                    if (list.isEmpty()) {
                        return;
                    }
                    for (int i = 0; i < list.size() - 1; i++) {
                        augmentSingleDocument((DocumentProtos.DocumentWrapper) list.get(i));
                    }
                    list = this.connector.get(((DocumentProtos.DocumentWrapper) list.get(list.size() - 1)).getRowId(), 1024);
                }
            }
        } catch (DocumentNotFoundException e) {
            log.error(e.getMessage(), e);
        }
    }

    protected static Options defineOptions() {
        Options options = new Options();
        Option option = new Option("h", O_HOST, true, "host with hbase");
        option.setRequired(true);
        options.addOption(option);
        Option option2 = new Option("t", O_TABLE, true, "table ");
        option2.setRequired(true);
        options.addOption(option2);
        Option option3 = new Option("po", O_PORT, true, "port of hbase thrift");
        option3.setRequired(false);
        options.addOption(option3);
        Option option4 = new Option("u", O_CR_USER, true, "Crossref user");
        option4.setRequired(false);
        options.addOption(option4);
        Option option5 = new Option("p", O_CR_PASS, true, "Crossref password");
        option5.setRequired(false);
        options.addOption(option5);
        options.addOption(new Option("he", O_HELP, false, "this help message"));
        return options;
    }

    protected static void usage(Options options) {
        HelpFormatter helpFormatter = new HelpFormatter();
        PrintWriter printWriter = new PrintWriter(System.out);
        helpFormatter.printUsage(printWriter, 80, Augmenter.class.getSimpleName(), options);
        helpFormatter.printOptions(printWriter, 80, options, 1, 2);
        printWriter.flush();
    }

    public static void main(String[] strArr) {
        Options defineOptions = defineOptions();
        try {
            CommandLine parse = new GnuParser().parse(defineOptions, strArr);
            if (parse.hasOption(O_HELP)) {
                throw new ParseException("");
            }
            String optionValue = parse.getOptionValue(O_HOST);
            String optionValue2 = parse.getOptionValue(O_TABLE);
            String optionValue3 = parse.getOptionValue(O_PORT, "9090");
            String optionValue4 = parse.getOptionValue(O_CR_USER);
            String optionValue5 = parse.getOptionValue(O_CR_PASS);
            HBaseClientThrift hBaseClientThrift = new HBaseClientThrift(optionValue, Integer.parseInt(optionValue3));
            hBaseClientThrift.openConnection();
            new Augmenter(hBaseClientThrift, optionValue2, optionValue4, optionValue5).augment();
            hBaseClientThrift.closeConnection();
        } catch (ParseException e) {
            usage(defineOptions);
        }
    }
}
