package pl.edu.icm.coansys.io.writers.file;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.SnappyCodec;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import pl.edu.icm.coansys.harvest.configuration.impl.ImporterConfigurationImpl;
import pl.edu.icm.coansys.iterators.ZipDirToDocumentDTOIterator;
import pl.edu.icm.coansys.models.DocumentDTO;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.transformers.RowComposer;

/* loaded from: input_file:pl/edu/icm/coansys/io/writers/file/BwmetaToDocumentWraperSequenceFileWriter.class */
public class BwmetaToDocumentWraperSequenceFileWriter {
    private static final Logger LOGGER = Logger.getLogger(BwmetaToDocumentWraperSequenceFileWriter.class);
    private static String[] DEFAULT_ARGS = {"/home/akawa/bwndata/zips/", "cedram", "/home/akawa/bwndata/cedram.sf"};
    private static long documentCount = 0;
    private static long metadataCount = 0;
    private static long mediaCount = 0;
    private static long mediaConteinerCount = 0;
    private static Map<Long, Long> sizeMap = new HashMap();

    public static void main(String[] strArr) throws IOException {
        String[] strArr2 = (strArr == null || strArr.length == 0) ? DEFAULT_ARGS : strArr;
        if (strArr2.length < 3) {
            usage();
            System.exit(1);
        }
        String str = strArr2[0];
        String str2 = strArr2[1];
        String str3 = strArr2[2];
        boolean z = false;
        boolean z2 = false;
        long j = 536870912;
        for (int i = 3; i < strArr2.length; i++) {
            if ("snappyCompression".equalsIgnoreCase(strArr2[i])) {
                z = true;
            } else if ("metadataOnly".equalsIgnoreCase(strArr2[i])) {
                z2 = true;
            } else {
                try {
                    j = Long.valueOf(Long.parseLong(strArr2[i])).longValue();
                } catch (NumberFormatException e) {
                    PropertyConfigurator.configure(strArr2[i]);
                }
            }
        }
        checkPaths(str, str2, str3);
        generateSequenceFile(str, str2, str3, z, z2, j);
        printStats();
    }

    private static void printStats() {
        LOGGER.info(documentCount + " document count");
        LOGGER.info(metadataCount + " metadata records");
        LOGGER.info(mediaConteinerCount + " mediaContainer records");
        LOGGER.info(mediaCount + " media records");
        for (Map.Entry<Long, Long> entry : sizeMap.entrySet()) {
            LOGGER.info(entry.getKey() + "MB = " + entry.getValue());
        }
    }

    private static void checkPaths(String str, String str2, String str3) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            System.err.println("<Input dir> does not exist: " + str);
            System.exit(2);
        }
        if (!file.isDirectory()) {
            System.err.println("<Input dir> is not a directory:" + str);
            System.exit(3);
        }
        if (str2.length() != str2.replaceAll("[^a-zA-Z0-9]", "").length()) {
            System.err.println("Only alphanumeric signs (a space sign is also excluded) are allowed for a collection name: " + str2);
            System.exit(4);
        }
        File file2 = new File(str3);
        if (file2.getParentFile().exists()) {
            return;
        }
        file2.getParentFile().mkdirs();
    }

    private static void generateSequenceFile(String str, String str2, String str3, boolean z, boolean z2, long j) throws IOException {
        ZipDirToDocumentDTOIterator zipDirToDocumentDTOIterator = new ZipDirToDocumentDTOIterator(str, str2, z2, j);
        SequenceFile.Writer writer = null;
        try {
            BytesWritable bytesWritable = new BytesWritable();
            BytesWritable bytesWritable2 = new BytesWritable();
            writer = createSequenceFileWriter(str3, bytesWritable, bytesWritable2, z);
            Iterator<DocumentDTO> it = zipDirToDocumentDTOIterator.iterator();
            while (it.hasNext()) {
                DocumentProtos.DocumentWrapper buildFrom = buildFrom(it.next());
                byte[] bytes = buildFrom.getRowId().getBytes();
                bytesWritable.set(bytes, 0, bytes.length);
                byte[] byteArray = buildFrom.toByteArray();
                if (bytesWritable2.getCapacity() < byteArray.length) {
                    bytesWritable2.setCapacity(Math.max(byteArray.length, (byteArray.length / 4) * 5));
                }
                bytesWritable2.set(byteArray, 0, byteArray.length);
                writer.append(bytesWritable, bytesWritable2);
                if (documentCount % 10000 == 0) {
                    printStats();
                }
            }
            IOUtils.closeStream(writer);
        } catch (Throwable th) {
            IOUtils.closeStream(writer);
            throw th;
        }
    }

    private static DocumentProtos.DocumentWrapper buildFrom(DocumentDTO documentDTO) {
        DocumentProtos.DocumentWrapper.Builder newBuilder = DocumentProtos.DocumentWrapper.newBuilder();
        newBuilder.setRowId(RowComposer.composeRow(documentDTO));
        documentCount++;
        LOGGER.trace("Building: ");
        LOGGER.trace("\tKey = " + documentDTO.getKey());
        LOGGER.trace("\tCollection = " + documentDTO.getCollection());
        DocumentProtos.DocumentMetadata documentMetadata = documentDTO.getDocumentMetadata();
        byte[] byteArray = documentMetadata.toByteArray();
        if (byteArray.length > 0) {
            newBuilder.setDocumentMetadata(documentMetadata);
            LOGGER.trace("\tSourcePath = " + documentMetadata.getSourcePath());
            LOGGER.trace("\tDocumentMetadata size: " + byteArray.length);
            metadataCount++;
        }
        DocumentProtos.MediaContainer mediaConteiner = documentDTO.getMediaConteiner();
        int serializedSize = mediaConteiner.getSerializedSize();
        if (serializedSize > 0) {
            newBuilder.setMediaContainer(mediaConteiner);
            LOGGER.info("\tMediaConteiner size: " + ((serializedSize / 1024) / 1024) + "MB");
            for (DocumentProtos.Media media : mediaConteiner.getMediaList()) {
                long sourceFilesize = (media.getSourceFilesize() / 1024) / 1024;
                LOGGER.info("\tSourcePath = " + media.getSourcePath());
                LOGGER.info("\tSourcePathFilesize = " + sourceFilesize + "MB");
                mediaCount++;
                sizeMap.put(Long.valueOf(sourceFilesize), Long.valueOf(sizeMap.get(Long.valueOf(sourceFilesize)) != null ? sizeMap.get(Long.valueOf(sourceFilesize)).longValue() + 1 : 1L));
                if (sourceFilesize >= 10) {
                    LOGGER.info("\tBig media = " + sourceFilesize + "MB");
                }
            }
            mediaConteinerCount++;
        }
        return newBuilder.build();
    }

    private static SequenceFile.Writer createSequenceFileWriter(String str, Writable writable, Object obj, boolean z) throws IOException {
        Configuration createConfiguration = new ImporterConfigurationImpl().createConfiguration();
        FileSystem fileSystem = FileSystem.get(URI.create(str), createConfiguration);
        Path path = new Path(str);
        return z ? SequenceFile.createWriter(fileSystem, createConfiguration, path, writable.getClass(), obj.getClass(), SequenceFile.CompressionType.BLOCK, new SnappyCodec()) : SequenceFile.createWriter(fileSystem, createConfiguration, path, writable.getClass(), obj.getClass());
    }

    private static void usage() {
        System.out.println("Usage: \njava -cp importers-*-with-deps.jar " + BwmetaToDocumentWraperSequenceFileWriter.class.getName() + " <in_dir> <collectionName> <out_file> [snappyCompression] [metadataOnly] [<contentSizeLimit>] [<log4j.properties>]");
    }
}
