package pl.edu.icm.coansys.harvest.oaipmh.importation;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.oclc.oai.harvester2.app.OpenarchivesListWrite;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import pl.edu.icm.coansys.harvest.data.impl.HarvestData;
import pl.edu.icm.coansys.harvest.oaipmh.check.correctness.CheckParamsCorrectness;
import pl.edu.icm.coansys.harvest.oaipmh.path.make.SequenceFilePathMaker;
import pl.edu.icm.coansys.harvest.oaipmh.write.SequenceFileWriter;

/* loaded from: input_file:pl/edu/icm/coansys/harvest/oaipmh/importation/Importer.class */
public class Importer {
    public static final String BASE_URLS_LONG = "baseURLs";
    public static final String UNTIL_OPTION_LONG = "until";
    public static final String FROM_OPTION_LONG = "from";
    public static final String FROM_OPTION = "f";
    public static final String FROM_OPTION_DESCRIPTION = "Date from.";
    public static final String UNTIL_OPTION_DESCRIPTION = "Date to.";
    public static final String UNTIL_OPTION = "u";
    private static final String URLS_MESSAGE_POSTFIX = " URLs";
    private static final String FOUND_MESSAGE_PREFIX = "Found ";
    private static final String PROCESSING_MESSAGE_PREFIX = "Processing ";
    private static final String SEC_MESSAGE_POSTFIX = " sec";
    private static final String ELAPSED_TIME_MESSAGE_PREFIX = "Elapsed time = ";
    private static final String FINISHED_MESSAGE_POSTFIX = " finished.";
    private static final String THREAD_MESSAGE_PREFIX = "Thread ";
    private static final String PROCESSED_MESSAGE_POSTFIX = " processed.";
    private static final String INFO_MESSAGE_SEPARATOR = ": ";
    private static final String ITEM_MESSAGE_PREFIX = "Item ";
    private static final String PROCESSING_ITEM_MESSAGE_PREFIX = "Processing item ";
    private static final String STARTING_THREAD_MESSAGE_PREFIX = "Starting thread ";
    private static final String ERROR_MESSAGE_PREFIX = "Error: ";
    private static final String XPATH_EXPRESSION_STRING = "/BaseURLs/baseURL";
    private static final String BASE_URL_LONG = "baseURL";
    private static final String BASE_URL = "b";
    private static final String BASE_URL_DESCRIPTION = "Url to harvest.";
    private static final String SEPARATOR = "_";
    private static final String PREFIX = "HBaseImporter_";
    private static final String BASE_URLS_DESCRIPTION = "Urls to harvest.";
    private static final String BASE_URLS = "b";
    private static final String METADATA_PREFIX = "oai_dc";
    private static final int RESUMPTION_TOKENS_PER_FILE = 1;
    private static String[] urls;
    private static String dateFrom;
    private static String dateUntil;
    private static String pathTo;
    private Options options;
    private CommandLineParser cmdLineParser;
    private static final Logger log = LoggerFactory.getLogger(Importer.class);
    private static final String SET_SPEC = null;
    private static ExecutorService threadPool = null;
    private SequenceFileWriter writer = null;
    private SequenceFilePathMaker pathMaker = null;
    private CheckParamsCorrectness checkParamsCorrectness = null;
    private List<HarvestData> harvestingDataList = null;

    public Importer() {
        this.options = null;
        this.cmdLineParser = null;
        this.options = new Options();
        this.options.addOption(UNTIL_OPTION, UNTIL_OPTION_LONG, true, UNTIL_OPTION_DESCRIPTION);
        this.options.addOption(FROM_OPTION, FROM_OPTION_LONG, true, FROM_OPTION_DESCRIPTION);
        this.options.addOption("b", BASE_URLS_LONG, true, BASE_URLS_DESCRIPTION);
        this.options.addOption("b", BASE_URL_LONG, true, BASE_URL_DESCRIPTION);
        this.cmdLineParser = new PosixParser();
    }

    public void run(String[] strArr) throws IOException, ParseException, NoSuchFieldException, ParserConfigurationException, TransformerException, org.apache.commons.cli.ParseException, XPathExpressionException, SAXException {
        this.checkParamsCorrectness.checkCorrectness(strArr);
        urls = parseParams(strArr, BASE_URLS_LONG);
        if (null != urls) {
            doRunByUrlsList(strArr);
        } else {
            doRunByArgsArray(strArr);
        }
        writeSequenceFile(pathTo, PREFIX + dateFrom + SEPARATOR + dateUntil);
    }

    public void run(List<HarvestData> list, String[] strArr) throws IOException, ParseException, NoSuchFieldException, ParserConfigurationException, TransformerException, org.apache.commons.cli.ParseException, XPathExpressionException, SAXException {
        this.checkParamsCorrectness.checkCorrectness(strArr);
        this.harvestingDataList = list;
        doRunByUrlsList(strArr);
        writeSequenceFileViaHarvestedDataList(PREFIX + dateFrom + SEPARATOR + dateUntil);
    }

    private String[] parseParams(String[] strArr, String str) throws org.apache.commons.cli.ParseException {
        CommandLine parse = this.cmdLineParser.parse(this.options, strArr);
        if (parse.hasOption(str)) {
            return parse.getOptionValues(str);
        }
        return null;
    }

    private void prepareHarvestingDataListFromArgs(String[] strArr) throws org.apache.commons.cli.ParseException {
        if (null == this.harvestingDataList || this.harvestingDataList.isEmpty()) {
            CommandLine parse = this.cmdLineParser.parse(this.options, strArr);
            if (parse.hasOption(FROM_OPTION_LONG)) {
                dateFrom = parse.getOptionValue(FROM_OPTION_LONG);
            }
            if (parse.hasOption(UNTIL_OPTION_LONG)) {
                dateUntil = parse.getOptionValue(UNTIL_OPTION_LONG);
            }
            pathTo = strArr[strArr.length - 1];
            this.harvestingDataList = new ArrayList();
            for (String str : urls) {
                this.harvestingDataList.add(new HarvestData(str, dateFrom, dateUntil, pathTo));
            }
        }
    }

    private void doRunByUrlsList(String[] strArr) throws org.apache.commons.cli.ParseException {
        prepareHarvestingDataListFromArgs(strArr);
        int size = this.harvestingDataList.size();
        final AtomicInteger atomicInteger = new AtomicInteger(0);
        Future[] futureArr = new Future[size];
        double nanoTime = System.nanoTime();
        threadPool = Executors.newFixedThreadPool(this.harvestingDataList.size());
        for (int i = 0; i < size; i++) {
            final int i2 = i;
            futureArr[i2] = threadPool.submit(new Runnable() { // from class: pl.edu.icm.coansys.harvest.oaipmh.importation.Importer.1
                @Override // java.lang.Runnable
                public void run() {
                    int andIncrement = atomicInteger.getAndIncrement();
                    Importer.log.info(Importer.STARTING_THREAD_MESSAGE_PREFIX + i2);
                    while (andIncrement < Importer.this.harvestingDataList.size()) {
                        Importer.log.info(Importer.PROCESSING_ITEM_MESSAGE_PREFIX + andIncrement + Importer.INFO_MESSAGE_SEPARATOR + ((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getHarvestingUrl());
                        try {
                            OpenarchivesListWrite.runHarvester(((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getHarvestingUrl(), ((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getDateFrom(), ((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getDateUntil(), Importer.METADATA_PREFIX, Importer.SET_SPEC, 1, ((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getHarvestedDataPath());
                            Importer.log.info(Importer.ITEM_MESSAGE_PREFIX + andIncrement + Importer.INFO_MESSAGE_SEPARATOR + ((HarvestData) Importer.this.harvestingDataList.get(andIncrement)).getHarvestingUrl() + Importer.PROCESSED_MESSAGE_POSTFIX);
                            andIncrement = atomicInteger.getAndIncrement();
                        } catch (Exception e) {
                            Importer.log.error(Importer.ERROR_MESSAGE_PREFIX + e);
                            andIncrement = atomicInteger.getAndIncrement();
                        }
                    }
                    Importer.log.info(Importer.THREAD_MESSAGE_PREFIX + i2 + Importer.FINISHED_MESSAGE_POSTFIX);
                }
            });
        }
        for (Future future : futureArr) {
            try {
                future.get();
            } catch (InterruptedException e) {
                log.error(ERROR_MESSAGE_PREFIX + e);
                System.exit(-1);
            } catch (ExecutionException e2) {
                log.error(ERROR_MESSAGE_PREFIX + e2);
                System.exit(-1);
            }
        }
        log.info(ELAPSED_TIME_MESSAGE_PREFIX + ((System.nanoTime() - nanoTime) / 1.0E9d) + SEC_MESSAGE_POSTFIX);
    }

    private void doRunByArgsArray(String[] strArr) throws org.apache.commons.cli.ParseException, XPathExpressionException, IOException, SAXException, ParserConfigurationException {
        urls = parseUrlsFromParams(strArr, BASE_URL_LONG);
        if (null != urls) {
            doRunByUrlsList(strArr);
        } else {
            OpenarchivesListWrite.main(strArr);
        }
    }

    private void writeSequenceFile(String str, String str2) throws IOException {
        File file = new File(str);
        TreeMap treeMap = new TreeMap();
        for (File file2 : file.listFiles()) {
            treeMap.put(FilenameUtils.getName(file2.getName()), FileUtils.readFileToByteArray(file2));
            System.out.println(FileUtils.readFileToByteArray(file2).length + ", " + file2.getAbsolutePath());
        }
        this.writer.write(treeMap, this.pathMaker.makePath(str2));
    }

    private void writeSequenceFileViaHarvestedDataList(String str) throws IOException {
        Iterator<HarvestData> it = this.harvestingDataList.iterator();
        while (it.hasNext()) {
            File file = new File(it.next().getHarvestedDataPath());
            TreeMap treeMap = new TreeMap();
            for (File file2 : file.listFiles()) {
                treeMap.put(FilenameUtils.getName(file2.getName()), FileUtils.readFileToByteArray(file2));
            }
            this.writer.write(treeMap, this.pathMaker.makePath(str));
        }
    }

    private String[] parseUrlsFromParams(String[] strArr, String str) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException, org.apache.commons.cli.ParseException {
        String optionValue = this.cmdLineParser.parse(this.options, strArr).getOptionValue(str);
        if (null == optionValue) {
            return null;
        }
        URLConnection openConnection = new URL(optionValue).openConnection();
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(false);
        newInstance.setValidating(false);
        NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath().compile(XPATH_EXPRESSION_STRING).evaluate(newInstance.newDocumentBuilder().parse(openConnection.getInputStream()), XPathConstants.NODESET);
        if (nodeList.getLength() < 1) {
            throw new IllegalArgumentException();
        }
        log.info(FOUND_MESSAGE_PREFIX + nodeList.getLength() + URLS_MESSAGE_POSTFIX);
        int length = nodeList.getLength();
        log.info(PROCESSING_MESSAGE_PREFIX + length + URLS_MESSAGE_POSTFIX);
        String[] strArr2 = new String[length];
        for (int i = 0; i < length; i++) {
            strArr2[i] = nodeList.item(i).getFirstChild().getNodeValue();
        }
        return strArr2;
    }

    public void setWriter(SequenceFileWriter sequenceFileWriter) {
        this.writer = sequenceFileWriter;
    }

    public void setPathMaker(SequenceFilePathMaker sequenceFilePathMaker) {
        this.pathMaker = sequenceFilePathMaker;
    }

    public void setCheckParamsCorrectness(CheckParamsCorrectness checkParamsCorrectness) {
        this.checkParamsCorrectness = checkParamsCorrectness;
    }
}
