package it.unimi.dsi.sux4j.mph;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.FileStringParser;
import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.bits.TransformationStrategies;
import it.unimi.dsi.bits.TransformationStrategy;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.AbstractLongBigList;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrayBigList;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.longs.LongBigListIterator;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.sux4j.io.BucketedHashStore;
import it.unimi.dsi.sux4j.mph.GOV3Function;
import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/sux4j/mph/TwoStepsGOV3Function.class */
public class TwoStepsGOV3Function<T> extends AbstractHashFunction<T> implements Serializable, Size64 {
    public static final long serialVersionUID = 1;
    private static final Logger LOGGER = LoggerFactory.getLogger(TwoStepsGOV3Function.class);
    private static final boolean ASSERTS = false;
    protected final long n;
    protected final TransformationStrategy<? super T> transform;
    protected final GOV3Function<T> firstFunction;
    protected final GOV3Function<T> secondFunction;
    protected final long[] remap;
    protected final int escape;
    protected long seed;
    protected final int width;
    protected final double rankMean;

    /* loaded from: input_file:it/unimi/dsi/sux4j/mph/TwoStepsGOV3Function$Builder.class */
    public static class Builder<T> {
        protected Iterable<? extends T> keys;
        protected TransformationStrategy<? super T> transform;
        protected File tempDir;
        protected BucketedHashStore<T> bucketedHashStore;
        protected LongBigList values;
        protected boolean built;

        public Builder<T> keys(Iterable<? extends T> iterable) {
            this.keys = iterable;
            return this;
        }

        public Builder<T> transform(TransformationStrategy<? super T> transformationStrategy) {
            this.transform = transformationStrategy;
            return this;
        }

        public Builder<T> tempDir(File file) {
            this.tempDir = file;
            return this;
        }

        public Builder<T> store(BucketedHashStore<T> bucketedHashStore) {
            this.bucketedHashStore = bucketedHashStore;
            return this;
        }

        public Builder<T> values(LongBigList longBigList) {
            this.values = longBigList;
            return this;
        }

        public TwoStepsGOV3Function<T> build() throws IOException {
            if (this.built) {
                throw new IllegalStateException("This builder has been already used");
            }
            this.built = true;
            if (this.transform == null) {
                if (this.bucketedHashStore == null) {
                    throw new IllegalArgumentException("You must specify a TransformationStrategy, either explicitly or via a given BucketedHashStore");
                }
                this.transform = this.bucketedHashStore.transform();
            }
            return new TwoStepsGOV3Function<>(this.keys, this.transform, this.values, this.tempDir, this.bucketedHashStore);
        }
    }

    protected TwoStepsGOV3Function(Iterable<? extends T> iterable, TransformationStrategy<? super T> transformationStrategy, final LongBigList longBigList, File file, BucketedHashStore<T> bucketedHashStore) throws IOException {
        this.transform = transformationStrategy;
        ProgressLogger progressLogger = new ProgressLogger(LOGGER);
        progressLogger.displayLocalSpeed = true;
        progressLogger.displayFreeMemory = true;
        XoRoShiRo128PlusRandomGenerator xoRoShiRo128PlusRandomGenerator = new XoRoShiRo128PlusRandomGenerator();
        progressLogger.itemsName = "keys";
        boolean z = bucketedHashStore != null;
        if (bucketedHashStore == null) {
            if (iterable == null) {
                throw new IllegalArgumentException("If you do not provide a bucketed hash store, you must provide the keys");
            }
            bucketedHashStore = new BucketedHashStore<>(transformationStrategy, progressLogger);
            bucketedHashStore.reset(xoRoShiRo128PlusRandomGenerator.nextLong());
            bucketedHashStore.addAll(iterable.iterator());
        }
        this.n = bucketedHashStore.size();
        this.defRetValue = -1L;
        if (this.n == 0) {
            this.width = ASSERTS;
            this.escape = ASSERTS;
            this.rankMean = ASSERTS;
            this.secondFunction = null;
            this.firstFunction = null;
            this.remap = null;
            if (z) {
                return;
            }
            bucketedHashStore.close();
            return;
        }
        int i = ASSERTS;
        Long2LongOpenHashMap long2LongOpenHashMap = new Long2LongOpenHashMap();
        long2LongOpenHashMap.defaultReturnValue(-1L);
        LongBigListIterator it2 = longBigList.iterator();
        while (it2.hasNext()) {
            long nextLong = it2.nextLong();
            long2LongOpenHashMap.put(nextLong, long2LongOpenHashMap.get(nextLong) + 1);
            int length = Fast.length(nextLong);
            if (length > i) {
                i = length;
            }
        }
        this.width = i;
        int size = long2LongOpenHashMap.size();
        LOGGER.debug("Generating two-steps GOV3 function with " + i + " output bits...");
        long[] array = long2LongOpenHashMap.keySet().toArray(new long[size]);
        LongArrays.quickSort(array, ASSERTS, array.length, (j, j2) -> {
            return Long.signum(long2LongOpenHashMap.get(j2) - long2LongOpenHashMap.get(j));
        });
        long j3 = 0;
        for (int i2 = ASSERTS; i2 < array.length; i2++) {
            j3 += i2 * long2LongOpenHashMap.get(array[i2]);
        }
        this.rankMean = j3 / this.n;
        long j4 = this.n;
        long j5 = Long.MAX_VALUE;
        int i3 = ASSERTS;
        int i4 = -1;
        for (int i5 = ASSERTS; i5 < i && i3 < size; i5++) {
            long min = ((long) Math.min((GOV3Function.C * this.n * 1.126d) + (this.n * i5), GOV3Function.C * this.n * i5)) + ((long) Math.min((GOV3Function.C * j4 * 1.126d) + (j4 * i), GOV3Function.C * j4 * i)) + (i3 * 64);
            if (min < j5) {
                i4 = i5;
                j5 = min;
            }
            for (int i6 = ASSERTS; i6 < (1 << i5) && i3 < size; i6++) {
                int i7 = i3;
                i3++;
                j4 -= long2LongOpenHashMap.get(array[i7]);
            }
        }
        long2LongOpenHashMap.clear();
        long2LongOpenHashMap.trim();
        i4 = i4 >= 32 ? 31 : i4;
        LOGGER.debug("Best threshold: " + i4);
        this.escape = (1 << i4) - 1;
        long[] jArr = new long[this.escape];
        this.remap = jArr;
        System.arraycopy(array, ASSERTS, jArr, ASSERTS, this.remap.length);
        final Long2LongOpenHashMap long2LongOpenHashMap2 = new Long2LongOpenHashMap();
        long2LongOpenHashMap2.defaultReturnValue(-1L);
        for (int i8 = ASSERTS; i8 < this.escape; i8++) {
            long2LongOpenHashMap2.put(this.remap[i8], i8);
        }
        if (i4 != 0) {
            this.firstFunction = new GOV3Function.Builder().keys(iterable).transform(transformationStrategy).store(bucketedHashStore).values(new AbstractLongBigList() { // from class: it.unimi.dsi.sux4j.mph.TwoStepsGOV3Function.1
                public long getLong(long j6) {
                    long j7 = long2LongOpenHashMap2.get(longBigList.getLong(j6));
                    return j7 == -1 ? TwoStepsGOV3Function.this.escape : j7;
                }

                public long size64() {
                    return TwoStepsGOV3Function.this.n;
                }
            }, i4).indirect().build();
            LOGGER.debug("Actual bit cost per key of first function: " + (this.firstFunction.numBits() / this.n));
        } else {
            this.firstFunction = null;
        }
        bucketedHashStore.filter(obj -> {
            return this.firstFunction == null || this.firstFunction.getLongBySignature((long[]) obj) == ((long) this.escape);
        });
        this.secondFunction = new GOV3Function.Builder().store(bucketedHashStore).values(longBigList, i).indirect().build();
        this.seed = bucketedHashStore.seed();
        if (!z) {
            bucketedHashStore.close();
        }
        LOGGER.debug("Actual bit cost per key of second function: " + (this.secondFunction.numBits() / this.n));
        LOGGER.info("Actual bit cost per key: " + (numBits() / this.n));
        LOGGER.info("Completed.");
    }

    public long getLong(Object obj) {
        if (this.n == 0) {
            return this.defRetValue;
        }
        long[] jArr = new long[2];
        Hashes.spooky4(this.transform.toBitVector(obj), this.seed, jArr);
        if (this.firstFunction != null) {
            int longBySignature = (int) this.firstFunction.getLongBySignature(jArr);
            if (longBySignature == -1) {
                return this.defRetValue;
            }
            if (longBySignature != this.escape) {
                return this.remap[longBySignature];
            }
        }
        return this.secondFunction.getLongBySignature(jArr);
    }

    public long getLongBySignature(long[] jArr) {
        if (this.firstFunction != null) {
            int longBySignature = (int) this.firstFunction.getLongBySignature(jArr);
            if (longBySignature == -1) {
                return this.defRetValue;
            }
            if (longBySignature != this.escape) {
                return this.remap[longBySignature];
            }
        }
        return this.secondFunction.getLongBySignature(jArr);
    }

    @Override // it.unimi.dsi.sux4j.mph.AbstractHashFunction
    public long size64() {
        return this.n;
    }

    public long numBits() {
        return (this.firstFunction != null ? this.firstFunction.numBits() : 0L) + this.secondFunction.numBits() + this.transform.numBits() + (this.remap.length * 64);
    }

    public static void main(String[] strArr) throws NoSuchMethodException, IOException, JSAPException {
        List fileLinesCollection;
        SimpleJSAP simpleJSAP = new SimpleJSAP(TwoStepsGOV3Function.class.getName(), "Builds a two-steps GOV3 function mapping a newline-separated list of strings to their ordinal position, or to specific values.", new Parameter[]{new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", false, 'e', "encoding", "The string file encoding."), new FlaggedOption("tempDir", FileStringParser.getParser(), JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for temporary files."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new Switch("utf32", (char) 0, "utf-32", "Use UTF-32 internally (handles surrogate pairs)."), new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."), new FlaggedOption("values", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, 'v', "values", "A binary file in DataInput format containing a long for each string (otherwise, the values will be the ordinal positions of the strings)."), new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The filename for the serialised two-steps GOV3 function."), new UnflaggedOption("stringFile", JSAP.STRING_PARSER, "-", false, false, "The name of a file containing a newline-separated list of strings, or - for standard input; in the first case, strings will not be loaded into core memory.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        String string = parse.getString("function");
        String string2 = parse.getString("stringFile");
        Charset charset = (Charset) parse.getObject("encoding");
        File file = parse.getFile("tempDir");
        boolean z = parse.getBoolean("zipped");
        boolean z2 = parse.getBoolean("iso");
        boolean z3 = parse.getBoolean("utf32");
        if ("-".equals(string2)) {
            ProgressLogger progressLogger = new ProgressLogger(LOGGER);
            progressLogger.displayLocalSpeed = true;
            progressLogger.displayFreeMemory = true;
            progressLogger.start("Loading strings...");
            fileLinesCollection = new LineIterator(new FastBufferedReader(new InputStreamReader(z ? new GZIPInputStream(System.in) : System.in, charset)), progressLogger).allLines();
            progressLogger.done();
        } else {
            fileLinesCollection = new FileLinesCollection(string2, charset.toString(), z);
        }
        BinIO.storeObject(new TwoStepsGOV3Function(fileLinesCollection, z2 ? TransformationStrategies.rawIso() : z3 ? TransformationStrategies.rawUtf32() : TransformationStrategies.rawUtf16(), LongBigArrayBigList.wrap(BinIO.loadLongsBig(parse.getString("values"))), file, null), string);
        LOGGER.info("Completed.");
    }
}
