package edu.umn.biomedicus.vocabulary;

import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Module;
import com.google.inject.Stage;
import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.Bootstrapper;
import edu.umn.biomedicus.tokenization.ParseToken;
import edu.umn.biomedicus.tokenization.TermTokenMerger;
import edu.umn.biomedicus.tokenization.TokenResult;
import edu.umn.biomedicus.tokenization.Tokenizer;
import edu.umn.nlpengine.Span;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.PathOptionHandler;

/* loaded from: input_file:edu/umn/biomedicus/vocabulary/VocabularyInitializer.class */
public class VocabularyInitializer {
    private static final Pattern PIPE_SPLITTER = Pattern.compile("\\|");
    private final VocabularyBuilder builder;
    private TermIndexBuilder normsIndexBuilder;
    private TermIndexBuilder termsIndexBuilder;
    private TermIndexBuilder wordsIndexBuilder;

    @Nullable
    @Option(name = "-s", required = true, handler = PathOptionHandler.class, usage = "path to SPECIALIST Lexicon installation.")
    private Path specialistPath;

    @Nullable
    @Option(name = "-u", required = true, handler = PathOptionHandler.class, usage = "path to UMLS installation.")
    private Path umlsPath;

    @Inject
    private VocabularyInitializer(VocabularyBuilder vocabularyBuilder) {
        this.builder = vocabularyBuilder;
    }

    @Argument(required = true, handler = PathOptionHandler.class)
    public void setOutputPath(Path path) {
        this.builder.setOutputPath(path);
        this.normsIndexBuilder = this.builder.createNormsIndexBuilder();
        this.termsIndexBuilder = this.builder.createTermsIndexBuilder();
        this.wordsIndexBuilder = this.builder.createWordsIndexBuilder();
    }

    public static void main(String[] strArr) {
        try {
            ((VocabularyInitializer) Bootstrapper.create(Guice.createInjector(Stage.DEVELOPMENT, new Module[0])).getInstance(VocabularyInitializer.class)).doMain(strArr);
        } catch (BiomedicusException e) {
            e.printStackTrace();
        }
    }

    void addPhrase(String str) throws BiomedicusException {
        Iterator it = Tokenizer.tokenize(str).iterator();
        ArrayList arrayList = new ArrayList();
        TokenResult tokenResult = null;
        while (true) {
            TokenResult tokenResult2 = tokenResult;
            if (!it.hasNext() && tokenResult2 == null) {
                break;
            }
            TokenResult tokenResult3 = null;
            if (it.hasNext()) {
                tokenResult3 = (TokenResult) it.next();
            }
            if (tokenResult2 != null) {
                String substring = str.substring(tokenResult2.getStartIndex(), tokenResult2.getEndIndex());
                this.wordsIndexBuilder.addTerm(substring);
                arrayList.add(new ParseToken(tokenResult2.getStartIndex(), tokenResult2.getEndIndex(), substring, (tokenResult3 == null || tokenResult2.getStartIndex() == tokenResult3.getEndIndex()) ? false : true));
            }
            tokenResult = tokenResult3;
        }
        TermTokenMerger termTokenMerger = new TermTokenMerger((Iterator<ParseToken>) arrayList.iterator());
        while (termTokenMerger.hasNext()) {
            this.termsIndexBuilder.addTerm(termTokenMerger.next().getText());
        }
    }

    void addNormPhrase(String str) throws BiomedicusException {
        for (TokenResult tokenResult : Tokenizer.tokenize(str)) {
            this.normsIndexBuilder.addTerm(new Span(tokenResult.getStartIndex(), tokenResult.getEndIndex()).coveredString(str).toString());
        }
    }

    private void doMain(String[] strArr) throws BiomedicusException {
        CmdLineParser cmdLineParser = new CmdLineParser(this);
        try {
            cmdLineParser.parseArgument(strArr);
            Path resolve = this.umlsPath.resolve("MRCONSO.RRF");
            if (Files.notExists(resolve, new LinkOption[0])) {
                throw new BiomedicusException("Could not find MRCNSO at " + this.umlsPath.toString(), new String[0]);
            }
            Path resolve2 = this.specialistPath.resolve("LRAGR");
            try {
                long count = Files.lines(resolve2).count();
                try {
                    Stream<String> lines = Files.lines(resolve2);
                    Pattern pattern = PIPE_SPLITTER;
                    pattern.getClass();
                    int i = 0;
                    for (R r : lines.map((v1) -> {
                        return r1.split(v1);
                    })) {
                        addPhrase(r[1]);
                        addNormPhrase(r[4]);
                        i++;
                        if (i % 10000 == 0) {
                            System.out.println("Read " + i + " / " + count + " lines from LRAGR.");
                        }
                    }
                    try {
                        long count2 = Files.lines(resolve).count();
                        try {
                            Stream<String> lines2 = Files.lines(resolve);
                            Pattern pattern2 = PIPE_SPLITTER;
                            pattern2.getClass();
                            Iterator it = lines2.map((v1) -> {
                                return r1.split(v1);
                            }).iterator();
                            int i2 = 0;
                            while (it.hasNext()) {
                                addPhrase(((String[]) it.next())[14]);
                                i2++;
                                if (i2 % 10000 == 0) {
                                    System.out.println("Read " + i2 + " / " + count2 + " lines from MRCONSO.RRF.");
                                }
                            }
                            System.out.println("Writing words");
                            this.wordsIndexBuilder.doWrite();
                            System.out.println("Writing norms");
                            this.normsIndexBuilder.doWrite();
                            System.out.println("Writing terms");
                            this.termsIndexBuilder.doWrite();
                            System.out.println("Done writing");
                            try {
                                this.builder.doShutdown();
                            } catch (BiomedicusException e) {
                                e.printStackTrace();
                            }
                        } catch (IOException e2) {
                            e2.printStackTrace();
                        }
                    } catch (IOException e3) {
                        e3.printStackTrace();
                    }
                } catch (IOException e4) {
                    e4.printStackTrace(System.err);
                }
            } catch (IOException e5) {
                e5.printStackTrace();
            }
        } catch (CmdLineException e6) {
            System.err.println(e6.getLocalizedMessage());
            System.err.println("java edu.umn.biomedicus.vocabulary.VocabularyInitializer [options...]");
            cmdLineParser.printUsage(System.err);
        }
    }
}
