package it.unimi.dsi.law.warc.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.law.warc.filters.Filter;
import it.unimi.dsi.law.warc.filters.parser.FilterParser;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.HttpResponseFilteredIterator;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.util.HttpResponse;
import it.unimi.dsi.law.warc.util.Util;
import it.unimi.dsi.law.warc.util.WarcHttpResponse;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.FastDateFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/warc/tool/Warc09Warc10Converter.class */
public class Warc09Warc10Converter {
    private static final Logger LOGGER;
    public static final String DEFAULT_BUFFER_SIZE = "64Ki";
    static final /* synthetic */ boolean $assertionsDisabled;

    public static void run(FastBufferedInputStream fastBufferedInputStream, boolean z, Filter<HttpResponse> filter, FastBufferedOutputStream fastBufferedOutputStream) throws IOException {
        WarcRecord gZWarcRecord = z ? new GZWarcRecord() : new WarcRecord();
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, 1L, TimeUnit.MINUTES, "records");
        WarcHttpResponse warcHttpResponse = new WarcHttpResponse();
        HttpResponseFilteredIterator httpResponseFilteredIterator = new HttpResponseFilteredIterator(fastBufferedInputStream, gZWarcRecord, warcHttpResponse, filter);
        progressLogger.start("Listing...");
        long j = -1;
        WarcRecord.Header header = null;
        while (httpResponseFilteredIterator.hasNext()) {
            try {
                j = fastBufferedInputStream.position();
                httpResponseFilteredIterator.next();
                header = gZWarcRecord.header;
                String hexString = warcHttpResponse.digest() != null ? Util.toHexString(warcHttpResponse.digest()) : header.recordId.toString();
                fastBufferedOutputStream.write(toByteArray("WARC/1.0\r\n"));
                fastBufferedOutputStream.write(toByteArray("WARC-Record-ID: <urn:uid:" + header.recordId + ">\r\n"));
                String format = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ss'Z'").format(header.creationDate);
                fastBufferedOutputStream.write(toByteArray("WARC-Date: " + format + "\r\n"));
                fastBufferedOutputStream.write(toByteArray("WARC-Target-URI: " + header.subjectUri + "\r\n"));
                fastBufferedOutputStream.write(toByteArray("WARC-Type: " + StringUtils.lowerCase(header.recordType.toString()) + "\r\n"));
                fastBufferedOutputStream.write(toByteArray("Content-Type: " + header.contentType + "\r\n"));
                fastBufferedOutputStream.write(toByteArray("WARC-Payload-Digest: bubing:" + hexString + "\r\n"));
                if (warcHttpResponse.isDuplicate()) {
                    fastBufferedOutputStream.write(toByteArray("BUbiNG-Is-Duplicate: true\r\n"));
                }
                fastBufferedOutputStream.write(toByteArray("BUbiNG-Guessed-Charset: ISO-8859-1\r\n"));
                String str = warcHttpResponse.statusLine() + "\r\nDate: " + format + "\r\nLocation: " + header.subjectUri + "\r\nContent-Type: text/html";
                String iOUtils = IOUtils.toString(warcHttpResponse.contentAsStream());
                fastBufferedOutputStream.write(toByteArray("Content-Length: " + toByteArray(str + "\r\n\r\n" + iOUtils).length + "\r\n\r\n"));
                fastBufferedOutputStream.write(toByteArray(str + "\r\n\r\n"));
                fastBufferedOutputStream.write(toByteArray(iOUtils + "\r\n\r\n"));
                progressLogger.update();
            } catch (RuntimeException e) {
                System.err.println("Got " + e);
                PrintStream printStream = System.err;
                printStream.println("Position: " + j + ", last url header:\n" + printStream);
                throw e;
            }
        }
        progressLogger.done();
    }

    public static byte[] toByteArray(String str) {
        byte[] bArr = new byte[str.length()];
        int length = str.length();
        while (true) {
            int i = length;
            length--;
            if (i == 0) {
                return bArr;
            }
            if (!$assertionsDisabled && str.charAt(length) >= 128) {
                throw new AssertionError(str.charAt(length));
            }
            bArr[length] = (byte) str.charAt(length);
        }
    }

    public static void main(String[] strArr) throws Exception {
        InputStream fileInputStream;
        SimpleJSAP simpleJSAP = new SimpleJSAP(Warc09Warc10Converter.class.getName(), "Convert a WARC 0.9 in WARC 1.0: warning this class is only for testing purpose content-based (no guarantees about the warc record documentation)", new Parameter[]{new Switch("gzip", 'z', "gzip", "Whether the Warc file is compressed."), new FlaggedOption("filter", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'f', "filter", "The filter."), new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, "64Ki", false, 'b', "buffer-size", "The size of an I/O buffer."), new UnflaggedOption("warcFile", JSAP.STRING_PARSER, "-", true, false, "The Warc input file basename (if not present, or -, stdin will be used).")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            System.exit(1);
        }
        boolean z = parse.getBoolean("gzip");
        String string = parse.getString("filter") == null ? "TRUE" : parse.getString("filter");
        int i = parse.getInt("bufferSize");
        String string2 = parse.getString("warcFile");
        Filter parse2 = new FilterParser(HttpResponse.class).parse(string);
        if (string2.equals("-")) {
            fileInputStream = System.in;
        } else {
            fileInputStream = new FileInputStream(new File(string2 + ".warc" + (z ? ".gz" : "")));
        }
        FastBufferedInputStream fastBufferedInputStream = new FastBufferedInputStream(fileInputStream, i);
        FastBufferedOutputStream fastBufferedOutputStream = new FastBufferedOutputStream(System.out, i);
        run(fastBufferedInputStream, z, parse2, fastBufferedOutputStream);
        fastBufferedInputStream.close();
        fastBufferedOutputStream.close();
    }

    static {
        $assertionsDisabled = !Warc09Warc10Converter.class.desiredAssertionStatus();
        LOGGER = LoggerFactory.getLogger(ExtractDigestUrls.class);
    }
}
