package com.bigdata.rdf.util;

import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.rio.BasicRioLoader;
import com.bigdata.rdf.rio.IStatementBuffer;
import com.bigdata.rdf.rio.NQuadsParser;
import com.bigdata.rdf.rio.RDFParserOptions;
import com.bigdata.rdf.store.AbstractTripleStore;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.text.NumberFormat;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import net.jini.config.Configuration;
import net.jini.config.ConfigurationException;
import net.jini.config.ConfigurationProvider;
import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.ContextStatementImpl;
import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFParserRegistry;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RDFWriterRegistry;
import org.openrdf.rio.Rio;

/* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter.class */
public class Splitter {
    protected static final Logger log = Logger.getLogger(Splitter.class);
    public static final String COMPONENT = Splitter.class.getName();
    private final Settings s;
    private volatile ExecutorService service;
    private final AtomicLong nextId = new AtomicLong();

    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$CompressEnum.class */
    public enum CompressEnum {
        None(""),
        GZip(".gz"),
        Zip(".zip");

        private final String ext;

        CompressEnum(String str) {
            this.ext = str;
        }

        public String getExt() {
            return this.ext;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$ConfigurationOptions.class */
    public interface ConfigurationOptions {
        public static final String SRC_DIR = "srcDir";
        public static final String SRC_FILTER = "srcFilter";
        public static final String SRC_FORMAT = "srcFormat";
        public static final String PARSER_OPTIONS = "parserOptions";
        public static final String OUT_DIR = "outDir";
        public static final String OUT_FORMAT = "outFormat";
        public static final String OUT_CHUNK_SIZE = "outChunkSize";
        public static final int DEFAULT_OUT_CHUNK_SIZE = 10000;
        public static final String OUT_COMPRESS = "outCompress";
        public static final String THREAD_POOL_SIZE = "threadPoolSize";
        public static final int DEFAULT_THREAD_POOL_SIZE = 10;
        public static final String SUBDIRS = "subdirs";
        public static final boolean DEFAULT_SUBDIRS = true;
        public static final String MAX_PER_SUB_DIR = "maxPerSubDir";
        public static final int DEFAULT_MAX_PER_SUBDIR = 1000;
        public static final String DEFAULT_SRC_FORMAT = RDFFormat.RDFXML.getName();
        public static final String DEFAULT_OUT_COMPRESS = CompressEnum.None.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$MyLoader.class */
    public static class MyLoader extends BasicRioLoader implements RDFHandler {
        protected final IStatementBuffer<?> buffer;

        public MyLoader(IStatementBuffer<?> iStatementBuffer) {
            super(new ValueFactoryImpl());
            this.buffer = iStatementBuffer;
        }

        @Override // com.bigdata.rdf.rio.BasicRioLoader
        protected void success() {
            if (this.buffer != null) {
                this.buffer.flush();
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // com.bigdata.rdf.rio.BasicRioLoader
        public void error(Exception exc) {
            if (this.buffer != null) {
                this.buffer.reset();
            }
            super.error(exc);
        }

        @Override // com.bigdata.rdf.rio.BasicRioLoader
        public RDFHandler newRDFHandler() {
            return this;
        }

        @Override // org.openrdf.rio.RDFHandler
        public void handleStatement(Statement statement) {
            if (log.isDebugEnabled()) {
                log.debug(statement);
            }
            this.buffer.add(statement.getSubject(), statement.getPredicate(), statement.getObject(), statement.getContext());
            this.stmtsAdded++;
            if (this.stmtsAdded % 100000 == 0) {
                notifyListeners();
            }
        }

        @Override // org.openrdf.rio.RDFHandler
        public void endRDF() throws RDFHandlerException {
        }

        @Override // org.openrdf.rio.RDFHandler
        public void handleComment(String str) throws RDFHandlerException {
        }

        @Override // org.openrdf.rio.RDFHandler
        public void handleNamespace(String str, String str2) throws RDFHandlerException {
        }

        @Override // org.openrdf.rio.RDFHandler
        public void startRDF() throws RDFHandlerException {
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$MyStatementBuffer.class */
    public class MyStatementBuffer implements IStatementBuffer<Statement> {
        private final File srcFile;
        private final Statement[] stmts;
        private int numStmts = 0;
        private long nwritten = 0;
        private int nchunks = 0;

        public MyStatementBuffer(File file) {
            this.stmts = new Statement[Splitter.this.s.outChunkSize];
            this.srcFile = file;
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public AbstractTripleStore getDatabase() {
            return null;
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public AbstractTripleStore getStatementStore() {
            return null;
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public void setBNodeMap(Map<String, BigdataBNode> map) {
            throw new UnsupportedOperationException();
        }

        @Override // com.bigdata.relation.accesspath.IBuffer
        public boolean isEmpty() {
            return this.numStmts == 0;
        }

        @Override // com.bigdata.relation.accesspath.IBuffer
        public int size() {
            return this.numStmts;
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public void add(Resource resource, URI uri, Value value) {
            add((Statement) new StatementImpl(resource, uri, value));
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public void add(Resource resource, URI uri, Value value, Resource resource2) {
            add((Statement) new ContextStatementImpl(resource, uri, value, resource2));
        }

        @Override // com.bigdata.rdf.rio.IStatementBuffer
        public void add(Resource resource, URI uri, Value value, Resource resource2, StatementEnum statementEnum) {
            add((Statement) new ContextStatementImpl(resource, uri, value, resource2));
        }

        @Override // com.bigdata.relation.accesspath.IBuffer
        public void add(Statement statement) {
            if (this.numStmts == this.stmts.length) {
                flush();
            }
            Statement[] statementArr = this.stmts;
            int i = this.numStmts;
            this.numStmts = i + 1;
            statementArr[i] = statement;
        }

        @Override // com.bigdata.relation.accesspath.IBuffer
        public void reset() {
            for (int i = 0; i < this.stmts.length; i++) {
                this.stmts[i] = null;
            }
            this.numStmts = 0;
            this.nwritten = 0;
        }

        @Override // com.bigdata.relation.accesspath.IBuffer
        public long flush() {
            if (this.numStmts == 0) {
                return 0L;
            }
            File outDir = Splitter.this.getOutDir();
            String name = this.srcFile.getName();
            if (name.endsWith(".zip")) {
                name = name.substring(0, name.length() - 4);
            } else if (name.endsWith(".gz")) {
                name = name.substring(0, name.length() - 3);
            }
            RDFFormat forFileName = RDFFormat.forFileName(name);
            RDFFormat rDFFormat = forFileName;
            if (forFileName != null) {
                Iterator<String> it2 = rDFFormat.getFileExtensions().iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    String next = it2.next();
                    if (name.endsWith(next)) {
                        name = name.substring(0, name.length() - next.length());
                        if (name.endsWith(".")) {
                            name = name.substring(0, name.length() - 1);
                        }
                    }
                }
            }
            if (rDFFormat == null) {
                rDFFormat = Splitter.this.s.srcFormat;
                if (rDFFormat == null) {
                    throw new UnsupportedOperationException("Could not identify format: " + this.srcFile);
                }
            }
            RDFFormat rDFFormat2 = Splitter.this.s.outFormat;
            if (rDFFormat2 == null) {
                rDFFormat2 = rDFFormat;
            }
            NumberFormat integerInstance = NumberFormat.getIntegerInstance();
            integerInstance.setMinimumIntegerDigits(6);
            integerInstance.setGroupingUsed(false);
            try {
                writeFile(new File(outDir, name + "_" + integerInstance.format(this.nchunks) + "." + rDFFormat2.getDefaultFileExtension() + Splitter.this.s.outCompress.getExt()), this.numStmts, this.stmts);
                for (int i = 0; i < this.numStmts; i++) {
                    this.stmts[i] = null;
                }
                this.nwritten += this.numStmts;
                this.nchunks++;
                this.numStmts = 0;
                return this.nwritten;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        protected void writeFile(File file, int i, Statement[] statementArr) throws IOException {
            if (Splitter.log.isInfoEnabled()) {
                Splitter.log.info("Writing " + i + " statements on " + file);
            }
            OutputStream fileOutputStream = new FileOutputStream(file);
            try {
                fileOutputStream = new BufferedOutputStream(fileOutputStream);
                switch (Splitter.this.s.outCompress) {
                    case None:
                        break;
                    case GZip:
                        fileOutputStream = new GZIPOutputStream(fileOutputStream);
                        break;
                    case Zip:
                        fileOutputStream = new ZipOutputStream(fileOutputStream);
                        break;
                    default:
                        throw new AssertionError("Unknown value: outCompress=" + Splitter.this.s.outCompress);
                }
                RDFWriter createWriter = Rio.createWriter(Splitter.this.s.outFormat, fileOutputStream);
                try {
                    createWriter.startRDF();
                    for (int i2 = 0; i2 < i; i2++) {
                        createWriter.handleStatement(statementArr[i2]);
                    }
                    createWriter.endRDF();
                } catch (RDFHandlerException e) {
                    throw new IOException(e);
                }
            } finally {
                fileOutputStream.close();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$ParserTask.class */
    public class ParserTask implements Callable<Void> {
        private final File file;
        private final String baseURL;
        private final RDFFormat defaultRDFFormat;

        public ParserTask(File file, RDFFormat rDFFormat) {
            if (file == null) {
                throw new IllegalArgumentException();
            }
            this.file = file;
            this.baseURL = file.toURI().toString();
            this.defaultRDFFormat = rDFFormat;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        /* JADX WARN: Finally extract failed */
        @Override // java.util.concurrent.Callable
        public Void call() throws Exception {
            if (Splitter.log.isInfoEnabled()) {
                Splitter.log.info("file=" + this.file);
            }
            MyStatementBuffer myStatementBuffer = new MyStatementBuffer(this.file);
            try {
                InputStream fileInputStream = new FileInputStream(this.file);
                try {
                    String name = this.file.getName();
                    if (name.endsWith(".zip")) {
                        name = name.substring(0, name.length() - 4);
                        fileInputStream = new ZipInputStream(fileInputStream);
                    } else if (name.endsWith(".gz")) {
                        name = name.substring(0, name.length() - 3);
                        fileInputStream = new GZIPInputStream(fileInputStream);
                    }
                    RDFFormat forFileName = RDFFormat.forFileName(name);
                    if (forFileName != null) {
                        Iterator<String> it2 = forFileName.getFileExtensions().iterator();
                        while (true) {
                            if (!it2.hasNext()) {
                                break;
                            }
                            String next = it2.next();
                            if (name.endsWith(next)) {
                                String substring = name.substring(0, name.length() - next.length());
                                if (substring.endsWith(".")) {
                                    substring.substring(0, substring.length() - 1);
                                }
                            }
                        }
                    }
                    if (forFileName == null && this.defaultRDFFormat == null) {
                        throw new UnsupportedOperationException("Could not identify format: " + this.file);
                    }
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
                    try {
                        new MyLoader(myStatementBuffer).loadRdf(bufferedReader, this.baseURL, this.defaultRDFFormat, (String) null, Splitter.this.s.parserOptions);
                        bufferedReader.close();
                        fileInputStream.close();
                        myStatementBuffer.flush();
                        return null;
                    } catch (Throwable th) {
                        bufferedReader.close();
                        throw th;
                    }
                } catch (Throwable th2) {
                    fileInputStream.close();
                    throw th2;
                }
            } catch (Exception e) {
                Splitter.log.error("file=" + this.file + " : " + e, e);
                throw e;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$RunnableFileSystemLoader.class */
    public class RunnableFileSystemLoader implements Callable<List<Callable<Void>>> {
        final File fileOrDir;
        final FilenameFilter filter;
        final RDFFormat rdfFormat;
        final List<Callable<Void>> futures = new LinkedList();

        public RunnableFileSystemLoader(File file, FilenameFilter filenameFilter, RDFFormat rDFFormat) {
            if (file == null) {
                throw new IllegalArgumentException();
            }
            this.fileOrDir = file;
            this.filter = filenameFilter;
            this.rdfFormat = rDFFormat;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public List<Callable<Void>> call() throws Exception {
            process2(this.fileOrDir);
            return this.futures;
        }

        private void process2(File file) throws InterruptedException {
            if (file.isHidden()) {
                return;
            }
            if (!file.isDirectory()) {
                try {
                    if (Splitter.log.isInfoEnabled()) {
                        Splitter.log.info("Accepting file: " + file);
                    }
                    this.futures.add(new ParserTask(file, this.rdfFormat));
                    return;
                } catch (Exception e) {
                    Splitter.log.error(file, e);
                    return;
                }
            }
            if (Splitter.log.isInfoEnabled()) {
                Splitter.log.info("Scanning directory: " + file);
            }
            for (File file2 : this.filter == null ? file.listFiles() : file.listFiles(this.filter)) {
                process2(file2);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/bigdata-0.83.2.jar:com/bigdata/rdf/util/Splitter$Settings.class */
    public static class Settings {
        final File srcDir;
        final FilenameFilter srcFilter;
        final RDFFormat srcFormat;
        final File outDir;
        final RDFFormat outFormat;
        final CompressEnum outCompress;
        final int outChunkSize;
        final RDFParserOptions parserOptions;
        final boolean subdirs;
        final int maxPerSubdir;
        final int threadPoolSize;

        public Settings(Configuration configuration) throws ConfigurationException {
            this.srcDir = (File) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.SRC_DIR, File.class);
            this.srcFilter = (FilenameFilter) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.SRC_FILTER, FilenameFilter.class, (Object) null);
            this.srcFormat = RDFFormat.valueOf((String) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.SRC_FORMAT, String.class, ConfigurationOptions.DEFAULT_SRC_FORMAT));
            if (this.srcFormat != null && RDFParserRegistry.getInstance().get(this.srcFormat) == null) {
                throw new ConfigurationException("srcFormat=" + this.srcFormat);
            }
            this.outDir = (File) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.OUT_DIR, File.class);
            this.outFormat = RDFFormat.valueOf((String) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.OUT_FORMAT, String.class));
            if (this.outFormat != null && RDFWriterRegistry.getInstance().get(this.outFormat) == null) {
                throw new ConfigurationException("outFormat=" + this.outFormat);
            }
            this.outCompress = (CompressEnum) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.OUT_COMPRESS, CompressEnum.class, CompressEnum.None);
            this.outChunkSize = ((Integer) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.OUT_CHUNK_SIZE, Integer.TYPE, 10000)).intValue();
            RDFParserOptions rDFParserOptions = new RDFParserOptions();
            rDFParserOptions.setPreserveBNodeIDs(true);
            rDFParserOptions.setStopAtFirstError(false);
            rDFParserOptions.setVerifyData(false);
            rDFParserOptions.setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
            this.parserOptions = (RDFParserOptions) configuration.getEntry(Splitter.COMPONENT, "parserOptions", RDFParserOptions.class, rDFParserOptions);
            this.subdirs = ((Boolean) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.SUBDIRS, Boolean.TYPE, true)).booleanValue();
            this.maxPerSubdir = ((Integer) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.MAX_PER_SUB_DIR, Integer.TYPE, 1000)).intValue();
            this.threadPoolSize = ((Integer) configuration.getEntry(Splitter.COMPONENT, ConfigurationOptions.THREAD_POOL_SIZE, Integer.TYPE, 10)).intValue();
        }
    }

    protected Splitter(Settings settings) {
        this.s = settings;
    }

    public synchronized void start() throws InterruptedException, Exception {
        if (this.service != null) {
            throw new IllegalStateException();
        }
        this.service = Executors.newFixedThreadPool(this.s.threadPoolSize);
    }

    public synchronized void terminate() {
        if (this.service == null) {
            return;
        }
        this.service.shutdownNow();
        this.service = null;
    }

    public void submitAll(File file, FilenameFilter filenameFilter, RDFFormat rDFFormat) throws InterruptedException, Exception {
        List<Callable<Void>> acceptAll = acceptAll(this.s.srcDir, this.s.srcFilter, this.s.srcFormat);
        if (log.isInfoEnabled()) {
            log.info("Running: " + acceptAll.size() + " tasks");
        }
        this.service.invokeAll(acceptAll);
    }

    private List<Callable<Void>> acceptAll(File file, FilenameFilter filenameFilter, RDFFormat rDFFormat) throws Exception {
        return new RunnableFileSystemLoader(file, filenameFilter, rDFFormat).call();
    }

    public Future<Void> submitOne(File file, RDFFormat rDFFormat) throws Exception {
        if (log.isInfoEnabled()) {
            log.info("file=" + file + ", rdfFormat=" + rDFFormat);
        }
        FutureTask futureTask = new FutureTask(new ParserTask(file, rDFFormat));
        this.service.execute(futureTask);
        return futureTask;
    }

    protected File getOutDir() {
        if (!this.s.subdirs) {
            return this.s.outDir;
        }
        File file = new File(this.s.outDir, Long.toString(this.nextId.incrementAndGet() / this.s.maxPerSubdir));
        if (file.mkdirs() && log.isInfoEnabled()) {
            log.info("new subdirectory: " + file);
        }
        return file;
    }

    public static void main(String[] strArr) throws Exception {
        NQuadsParser.forceLoad();
        Settings settings = new Settings(ConfigurationProvider.getInstance(strArr));
        settings.outDir.mkdirs();
        Splitter splitter = new Splitter(settings);
        Runtime.getRuntime().addShutdownHook(new Thread() { // from class: com.bigdata.rdf.util.Splitter.1
            @Override // java.lang.Thread, java.lang.Runnable
            public void run() {
                Splitter.this.terminate();
            }
        });
        splitter.start();
        try {
            splitter.submitAll(settings.srcDir, settings.srcFilter, settings.srcFormat);
            System.exit(0);
            splitter.terminate();
        } catch (Throwable th) {
            splitter.terminate();
            throw th;
        }
    }
}
