package edu.umn.biomedicus.normalization;

import com.google.inject.Inject;
import com.google.inject.Module;
import edu.umn.biomedicus.common.dictionary.BidirectionalDictionary;
import edu.umn.biomedicus.common.dictionary.StringIdentifier;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.Bootstrapper;
import edu.umn.biomedicus.vocabulary.Vocabulary;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.PathOptionHandler;
import org.rocksdb.Options;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/umn/biomedicus/normalization/NormalizerModelBuilder.class */
public final class NormalizerModelBuilder {
    public static final int LRAGR_INFLECTIONAL_VARIANT = 1;
    public static final int LRAGR_SYNTACTIC_CATEGORY = 2;
    public static final int LRAGR_AGREEMENT_INFLECTION_CODE = 3;
    public static final int LRAGR_BASE_FORM = 4;
    private static final Logger LOGGER;
    private static final int IGNORE_WHEN_LONGER = 100;
    private static final Map<LragrPos, PartOfSpeech> LRAGR_TO_PENN;
    private static final Map<LragrPos, PartOfSpeech> LRAGR_TO_PENN_FALLBACK;
    private final BidirectionalDictionary normsIndex;
    private final BidirectionalDictionary wordsIndex;

    @Nullable
    @Option(name = "-l", required = true, handler = PathOptionHandler.class, usage = "path to SPECIALIST Lexicon LRAGR file.")
    private Path lragrPath;

    @Argument(required = true, handler = PathOptionHandler.class, usage = "output path of normalization model")
    @Nullable
    private Path dbPath;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:edu/umn/biomedicus/normalization/NormalizerModelBuilder$LragrPos.class */
    private static class LragrPos implements Comparable<LragrPos> {
        private final String syntacticCategory;
        private final String agreementInflectionCode;

        public LragrPos(String str, String str2) {
            this.syntacticCategory = (String) Objects.requireNonNull(str);
            this.agreementInflectionCode = (String) Objects.requireNonNull(str2);
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            LragrPos lragrPos = (LragrPos) obj;
            if (this.agreementInflectionCode.equals(lragrPos.agreementInflectionCode)) {
                return this.syntacticCategory.equals(lragrPos.syntacticCategory);
            }
            return false;
        }

        public int hashCode() {
            return (31 * this.syntacticCategory.hashCode()) + this.agreementInflectionCode.hashCode();
        }

        @Override // java.lang.Comparable
        public int compareTo(LragrPos lragrPos) {
            int compareTo = this.syntacticCategory.compareTo(lragrPos.syntacticCategory);
            if (compareTo == 0) {
                compareTo = this.agreementInflectionCode.compareTo(lragrPos.agreementInflectionCode);
            }
            return compareTo;
        }
    }

    @Inject
    public NormalizerModelBuilder(Vocabulary vocabulary) {
        this.normsIndex = vocabulary.getNormsIndex();
        this.wordsIndex = vocabulary.getWordsIndex();
    }

    public static void main(String[] strArr) {
        try {
            ((NormalizerModelBuilder) Bootstrapper.create(new Module[0]).getInstance(NormalizerModelBuilder.class)).process(strArr);
        } catch (BiomedicusException | IOException e) {
            e.printStackTrace();
        }
    }

    public void process(String[] strArr) throws IOException {
        CmdLineParser cmdLineParser = new CmdLineParser(this);
        try {
            cmdLineParser.parseArgument(strArr);
            if (!$assertionsDisabled && this.lragrPath == null) {
                throw new AssertionError("should be non-null by this point based on required = true");
            }
            System.out.println("Starting building normalizer model from: " + this.lragrPath.toString());
            try {
                Files.deleteIfExists(this.dbPath);
                TreeMap treeMap = new TreeMap();
                Pattern compile = Pattern.compile(".*[|$#,@;:<>?\\[\\]{}\\d.].*");
                long count = Files.lines(this.lragrPath).count();
                AtomicLong atomicLong = new AtomicLong();
                Files.lines(this.lragrPath).map(str -> {
                    return str.split("\\|");
                }).forEach(strArr2 -> {
                    String str2 = strArr2[1];
                    if (compile.matcher(str2).matches() || str2.length() > IGNORE_WHEN_LONGER) {
                        return;
                    }
                    String trim = strArr2[2].trim();
                    String trim2 = strArr2[3].trim();
                    String trim3 = strArr2[4].trim();
                    LragrPos lragrPos = new LragrPos(trim, trim2);
                    if (!str2.endsWith(trim3)) {
                        PartOfSpeech partOfSpeech = LRAGR_TO_PENN.get(lragrPos);
                        StringIdentifier termIdentifier = this.wordsIndex.getTermIdentifier(str2);
                        if (termIdentifier.isUnknown()) {
                            return;
                        }
                        if (partOfSpeech != null) {
                            treeMap.put(new TermPos(termIdentifier, partOfSpeech), new TermString(this.normsIndex.getTermIdentifier(trim3), trim3));
                        }
                        PartOfSpeech partOfSpeech2 = LRAGR_TO_PENN_FALLBACK.get(lragrPos);
                        if (partOfSpeech2 != null) {
                            treeMap.put(new TermPos(termIdentifier, partOfSpeech2), new TermString(this.normsIndex.getTermIdentifier(trim3), trim3));
                        }
                    }
                    if (atomicLong.incrementAndGet() % 10000 == 0) {
                        System.out.println("Read " + atomicLong.get() + " of " + count + " from LRAGR.");
                    }
                });
                RocksDB.loadLibrary();
                System.out.println("Creating normalizer db from " + treeMap.size() + " terms");
                Options prepareForBulkLoad = new Options().setCreateIfMissing(true).prepareForBulkLoad();
                Throwable th = null;
                try {
                    try {
                        RocksDB open = RocksDB.open(prepareForBulkLoad, this.dbPath.toString());
                        Throwable th2 = null;
                        try {
                            try {
                                treeMap.forEach((termPos, termString) -> {
                                    try {
                                        open.put(termPos.getBytes(), termString.getBytes());
                                    } catch (RocksDBException e) {
                                        throw new RuntimeException((Throwable) e);
                                    }
                                });
                                if (open != null) {
                                    if (0 != 0) {
                                        try {
                                            open.close();
                                        } catch (Throwable th3) {
                                            th2.addSuppressed(th3);
                                        }
                                    } else {
                                        open.close();
                                    }
                                }
                            } catch (Throwable th4) {
                                th2 = th4;
                                throw th4;
                            }
                        } catch (Throwable th5) {
                            if (open != null) {
                                if (th2 != null) {
                                    try {
                                        open.close();
                                    } catch (Throwable th6) {
                                        th2.addSuppressed(th6);
                                    }
                                } else {
                                    open.close();
                                }
                            }
                            throw th5;
                        }
                    } catch (RocksDBException e) {
                        e.printStackTrace();
                    }
                    if (prepareForBulkLoad != null) {
                        if (0 == 0) {
                            prepareForBulkLoad.close();
                            return;
                        }
                        try {
                            prepareForBulkLoad.close();
                        } catch (Throwable th7) {
                            th.addSuppressed(th7);
                        }
                    }
                } catch (Throwable th8) {
                    if (prepareForBulkLoad != null) {
                        if (0 != 0) {
                            try {
                                prepareForBulkLoad.close();
                            } catch (Throwable th9) {
                                th.addSuppressed(th9);
                            }
                        } else {
                            prepareForBulkLoad.close();
                        }
                    }
                    throw th8;
                }
            } catch (IOException e2) {
                System.out.println("Failed to delete an existing db at location: " + this.dbPath.toString());
                e2.printStackTrace();
            }
        } catch (CmdLineException e3) {
            System.err.println(e3.getLocalizedMessage());
            System.err.println("java edu.umn.biomedicus.normalization.NormalizerModelBuilder -l [path-to-lragr] [path-to-po");
            cmdLineParser.printUsage(System.err);
        }
    }

    static {
        $assertionsDisabled = !NormalizerModelBuilder.class.desiredAssertionStatus();
        LOGGER = LoggerFactory.getLogger(NormalizerModelBuilder.class);
        HashMap hashMap = new HashMap();
        hashMap.put(new LragrPos("noun", "uncount(thr_plur)"), PartOfSpeech.NNS);
        hashMap.put(new LragrPos("noun", "count(thr_plur)"), PartOfSpeech.NNS);
        hashMap.put(new LragrPos("noun", "uncount(thr_sing)"), PartOfSpeech.NN);
        hashMap.put(new LragrPos("noun", "count(thr_sing)"), PartOfSpeech.NN);
        hashMap.put(new LragrPos("verb", "infinitive"), PartOfSpeech.VB);
        hashMap.put(new LragrPos("verb", "pres(thr_sing)"), PartOfSpeech.VBZ);
        hashMap.put(new LragrPos("verb", "past"), PartOfSpeech.VBD);
        hashMap.put(new LragrPos("verb", "past_part"), PartOfSpeech.VBN);
        hashMap.put(new LragrPos("verb", "pres_part"), PartOfSpeech.VBG);
        hashMap.put(new LragrPos("adj", "comparative"), PartOfSpeech.JJR);
        hashMap.put(new LragrPos("adj", "superlative"), PartOfSpeech.JJS);
        hashMap.put(new LragrPos("adj", "positive"), PartOfSpeech.JJ);
        hashMap.put(new LragrPos("adv", "comparative"), PartOfSpeech.RBR);
        hashMap.put(new LragrPos("adv", "superlative"), PartOfSpeech.RBS);
        hashMap.put(new LragrPos("adv", "positive"), PartOfSpeech.RB);
        LRAGR_TO_PENN = Collections.unmodifiableMap(hashMap);
        HashMap hashMap2 = new HashMap();
        hashMap2.put(new LragrPos("noun", "uncount(thr_plur)"), PartOfSpeech.NN);
        hashMap2.put(new LragrPos("noun", "count(thr_plur)"), PartOfSpeech.NN);
        hashMap2.put(new LragrPos("noun", "uncount(thr_sing)"), PartOfSpeech.NNS);
        hashMap2.put(new LragrPos("noun", "count(thr_sing)"), PartOfSpeech.NNS);
        LRAGR_TO_PENN_FALLBACK = Collections.unmodifiableMap(hashMap2);
    }
}
