package pl.edu.icm.cermine.pubmed.importer;

import com.google.protobuf.ByteString;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import pl.edu.icm.cermine.pubmed.importer.model.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/cermine/pubmed/importer/SequenceFileWriter.class */
public class SequenceFileWriter {
    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 2) {
            System.out.println("Usage: <in_dir> <out_file>");
            System.exit(1);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        checkPaths(str, str2);
        generateSequenceFile(str, str2);
    }

    private static void checkPaths(String str, String str2) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            System.err.println("<Input dir> does not exist: " + str);
            System.exit(1);
        }
        if (!file.isDirectory()) {
            System.err.println("<Input dir> is not a directory:" + str);
            System.exit(1);
        }
        File file2 = new File(str2);
        if (file2.getParentFile().exists()) {
            return;
        }
        file2.getParentFile().mkdirs();
    }

    private static void generateSequenceFile(String str, String str2) throws IOException {
        SequenceFile.Writer createSequenceFileWriter = createSequenceFileWriter(str2, BytesWritable.class, BytesWritable.class);
        PubmedCollectionIterator pubmedCollectionIterator = new PubmedCollectionIterator(str);
        Integer num = 0;
        Integer num2 = 0;
        System.out.println(pubmedCollectionIterator.size());
        try {
            Iterator<PubmedEntry> it = pubmedCollectionIterator.iterator();
            while (it.hasNext()) {
                PubmedEntry next = it.next();
                DocumentProtos.Document.Builder newBuilder = DocumentProtos.Document.newBuilder();
                BytesWritable bytesWritable = new BytesWritable();
                BytesWritable bytesWritable2 = new BytesWritable();
                num2 = Integer.valueOf(num2.intValue() + 1);
                File nlm = next.getNlm();
                File pdf = next.getPdf();
                System.out.println(num + ": " + nlm.getName());
                newBuilder.setKey(next.getKey());
                FileInputStream fileInputStream = null;
                if (nlm != null) {
                    fileInputStream = new FileInputStream(next.getNlm());
                    newBuilder.setNlm(ByteString.copyFrom(IOUtils.toByteArray(fileInputStream)));
                } else {
                    newBuilder.setNlm(ByteString.EMPTY);
                }
                fileInputStream.close();
                FileInputStream fileInputStream2 = null;
                if (pdf != null) {
                    fileInputStream2 = new FileInputStream(next.getPdf());
                    newBuilder.setPdf(ByteString.copyFrom(IOUtils.toByteArray(fileInputStream2)));
                } else {
                    newBuilder.setPdf(ByteString.EMPTY);
                }
                fileInputStream2.close();
                DocumentProtos.Document m28build = newBuilder.m28build();
                byte[] bytes = next.getKey().getBytes();
                byte[] byteArray = m28build.toByteArray();
                bytesWritable.set(bytes, 0, bytes.length);
                bytesWritable2.set(byteArray, 0, byteArray.length);
                createSequenceFileWriter.append(bytesWritable, bytesWritable2);
                if (num2.intValue() == 256) {
                    createSequenceFileWriter.syncFs();
                    num2 = 0;
                }
                num = Integer.valueOf(num.intValue() + 1);
            }
        } finally {
            createSequenceFileWriter.close();
        }
    }

    private static <T1, T2> SequenceFile.Writer createSequenceFileWriter(String str, Class<T1> cls, Class<T2> cls2) throws IOException {
        return SequenceFile.createWriter(new Configuration(), new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(new Path(str)), SequenceFile.Writer.keyClass(cls), SequenceFile.Writer.valueClass(cls2), SequenceFile.Writer.bufferSize(33554432)});
    }
}
