package gate.cloud.io.csv;

import au.com.bytecode.opencsv.CSVReader;
import gate.Factory;
import gate.FeatureMap;
import gate.cloud.batch.Batch;
import gate.cloud.batch.DocumentID;
import gate.cloud.io.DocumentData;
import gate.cloud.io.StreamingInputHandler;
import gate.util.GateException;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.ProcessBuilder;
import java.util.Map;
import java.util.Set;
import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:gate/cloud/io/csv/CSVStreamingInputHandler.class */
public class CSVStreamingInputHandler implements StreamingInputHandler {
    public static final String PARAM_SEPARATOR_CHARACTER = "separator";
    public static final String PARAM_QUOTE_CHARACTER = "quote";
    public static final String PARAM_LABELLED_COLUMNS = "labelledColumns";
    public static final String PARAM_COLUMN = "column";
    public static final String PARAM_TEXT_IS_URL = "textIsURL";
    private static Logger logger = LoggerFactory.getLogger(CSVStreamingInputHandler.class);
    protected Set<String> completedDocuments;
    protected File batchDir;
    protected File srcFile;
    protected CSVReader csvReader;
    protected String encoding;
    protected char separatorChar;
    protected char quoteChar;
    protected long idCounter;
    protected int column;
    protected String[] features;
    protected boolean colLabels;
    protected boolean textIsURL;
    protected String compression;
    protected Process decompressProcess = null;

    public void config(Map<String, String> map) throws IOException, GateException {
        String str = map.get("srcFile");
        if (str == null) {
            throw new IllegalArgumentException("Parameter srcFile is required");
        }
        String str2 = map.get("batchFileLocation");
        if (str2 != null) {
            this.batchDir = new File(str2).getParentFile();
        }
        this.srcFile = new File(str);
        if (!this.srcFile.isAbsolute()) {
            this.srcFile = new File(this.batchDir, str);
        }
        if (!this.srcFile.exists()) {
            throw new IllegalArgumentException("File \"" + this.srcFile + "\", provided as value for required parameter \"srcFile\", does not exist!");
        }
        if (!this.srcFile.isFile()) {
            throw new IllegalArgumentException("File \"" + this.srcFile + "\", provided as value for required parameter \"srcFile\", is not a file!");
        }
        this.encoding = map.get("encoding");
        this.separatorChar = map.get("separator").charAt(0);
        this.quoteChar = map.get("quote").charAt(0);
        this.colLabels = Boolean.parseBoolean(map.get(PARAM_LABELLED_COLUMNS));
        this.column = Integer.parseInt(map.get(PARAM_COLUMN));
        this.textIsURL = Boolean.parseBoolean(map.get(PARAM_TEXT_IS_URL));
    }

    public void init() throws IOException, GateException {
        InputStream bufferedInputStream;
        if (this.compression == null) {
            bufferedInputStream = new FileInputStream(this.srcFile);
        } else if ("any".equals(this.compression)) {
            bufferedInputStream = new BufferedInputStream(new FileInputStream(this.srcFile));
            try {
                bufferedInputStream = new CompressorStreamFactory().createCompressorInputStream(bufferedInputStream);
            } catch (CompressorException e) {
                if (e.getCause() != null) {
                    if (!(e.getCause() instanceof IOException)) {
                        throw new GateException(e.getCause());
                    }
                    throw ((IOException) e.getCause());
                }
                logger.info("Failed to detect compression format, assuming no compression");
            }
        } else {
            if ("gzip".equals(this.compression)) {
                this.compression = "gz";
            }
            bufferedInputStream = new BufferedInputStream(new FileInputStream(this.srcFile));
            try {
                bufferedInputStream = new CompressorStreamFactory().createCompressorInputStream(this.compression, bufferedInputStream);
            } catch (CompressorException e2) {
                if (e2.getCause() != null) {
                    if (!(e2.getCause() instanceof IOException)) {
                        throw new GateException(e2.getCause());
                    }
                    throw ((IOException) e2.getCause());
                }
                logger.info("Unrecognised compression format, assuming external compressor");
                IOUtils.closeQuietly(bufferedInputStream);
                ProcessBuilder processBuilder = new ProcessBuilder(this.compression.trim().split("\\s+"));
                processBuilder.directory(this.batchDir);
                processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT);
                processBuilder.redirectOutput(ProcessBuilder.Redirect.PIPE);
                processBuilder.redirectInput(this.srcFile);
                this.decompressProcess = processBuilder.start();
                bufferedInputStream = this.decompressProcess.getInputStream();
            }
        }
        this.csvReader = new CSVReader(new InputStreamReader(bufferedInputStream, this.encoding), this.separatorChar, this.quoteChar);
        this.features = this.colLabels ? this.csvReader.readNext() : null;
        this.idCounter = this.colLabels ? 1 : 0;
    }

    public DocumentData getInputDocument(DocumentID documentID) throws IOException, GateException {
        throw new UnsupportedOperationException("CSVStreamingInputHandler can only operate in streaming mode");
    }

    public void startBatch(Batch batch) {
        this.completedDocuments = batch.getCompletedDocuments();
        if (this.completedDocuments == null || this.completedDocuments.size() <= 0) {
            return;
        }
        logger.info("Restarting failed batch - " + this.completedDocuments.size() + " documents already processed");
    }

    public DocumentData nextDocument() throws IOException, GateException {
        while (true) {
            String[] readNext = this.csvReader.readNext();
            if (readNext == null) {
                return null;
            }
            if (this.column < readNext.length && !readNext[this.column].trim().equals("")) {
                StringBuilder append = new StringBuilder(String.valueOf(this.srcFile.getName())).append(".");
                long j = this.idCounter;
                this.idCounter = j + 1;
                String sb = append.append(j).toString();
                if (this.completedDocuments.contains(sb)) {
                    continue;
                } else {
                    DocumentID documentID = new DocumentID(sb);
                    FeatureMap newFeatureMap = Factory.newFeatureMap();
                    newFeatureMap.put("throwExceptionOnFormatError", Boolean.TRUE);
                    if (this.colLabels) {
                        for (int i = 0; i < this.features.length; i++) {
                            if (i != this.column && i < readNext.length) {
                                newFeatureMap.put(this.features[i], readNext[i]);
                            }
                        }
                    }
                    FeatureMap newFeatureMap2 = Factory.newFeatureMap();
                    newFeatureMap2.put(this.textIsURL ? "sourceUrl" : "stringContent", readNext[this.column]);
                    try {
                        return new DocumentData(Factory.createResource("gate.corpora.DocumentImpl", newFeatureMap2, newFeatureMap, sb), documentID);
                    } catch (Exception e) {
                        logger.warn("Error encountered while parsing object with ID " + sb + " - skipped", e);
                    }
                }
            }
        }
    }

    public void close() throws IOException, GateException {
        this.csvReader.close();
        if (this.decompressProcess != null) {
            try {
                this.decompressProcess.waitFor();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    }
}
