package edu.umn.biomedicus.acronym;

import com.google.inject.Inject;
import com.google.inject.ProvidedBy;
import com.google.inject.Singleton;
import edu.umn.biomedicus.annotations.Setting;
import edu.umn.biomedicus.common.collect.HashIndexMap;
import edu.umn.biomedicus.common.collect.IndexMap;
import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.DataLoader;
import edu.umn.biomedicus.tokenization.Token;
import java.io.IOException;
import java.io.Serializable;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.yaml.snakeyaml.Yaml;

@ProvidedBy(Loader.class)
/* loaded from: input_file:edu/umn/biomedicus/acronym/OrthographicAcronymModel.class */
public class OrthographicAcronymModel implements Serializable {
    static final IndexMap<Character> CASE_SENS_SYMBOLS = new HashIndexMap((Set) "abcdefghijklmnopqrstuvwxyz.-ABCDEFGHIJKLMNOPQRSTUVWXYZ0?^$".chars().mapToObj(i -> {
        return Character.valueOf((char) i);
    }).collect(Collectors.toSet()));
    static final Set<Character> CASE_SENS_CHARS = (Set) "abcdefghijklmnopqrstuvwxyz.-ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars().mapToObj(i -> {
        return Character.valueOf((char) i);
    }).collect(Collectors.toSet());
    static final IndexMap<Character> CASE_INSENS_SYMBOLS = new HashIndexMap((Set) "abcdefghijklmnopqrstuvwxyz.-0?^$".chars().mapToObj(i -> {
        return Character.valueOf((char) i);
    }).collect(Collectors.toSet()));
    static final Set<Character> CASE_INSENS_CHARS = (Set) "abcdefghijklmnopqrstuvwxyz.-".chars().mapToObj(i -> {
        return Character.valueOf((char) i);
    }).collect(Collectors.toSet());
    private final double[][][] abbrevProbs;
    private final double[][][] longformProbs;
    private final boolean caseSensitive;
    private final Set<String> longformsLower;
    private final transient IndexMap<Character> symbols;
    private final transient Set<Character> chars;

    @Singleton
    /* loaded from: input_file:edu/umn/biomedicus/acronym/OrthographicAcronymModel$Loader.class */
    static class Loader extends DataLoader<OrthographicAcronymModel> {
        private final Path orthographicModel;
        private IndexMap<Character> symbols;

        @Inject
        Loader(@Setting("acronym.orthographicModel.asDataPath") Path path) {
            this.orthographicModel = path;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // edu.umn.biomedicus.framework.DataLoader
        public OrthographicAcronymModel loadModel() throws BiomedicusException {
            try {
                Map map = (Map) new Yaml().load(Files.newBufferedReader(this.orthographicModel));
                boolean booleanValue = ((Boolean) map.get("caseSensitive")).booleanValue();
                this.symbols = booleanValue ? OrthographicAcronymModel.CASE_SENS_SYMBOLS : OrthographicAcronymModel.CASE_INSENS_SYMBOLS;
                double[][][] expandProbs = expandProbs((Map) map.get("abbrevProbs"));
                double[][][] expandProbs2 = expandProbs((Map) map.get("longformProbs"));
                HashSet hashSet = new HashSet();
                hashSet.addAll((List) map.get("longformsLower"));
                return new OrthographicAcronymModel(expandProbs, expandProbs2, booleanValue, hashSet);
            } catch (IOException e) {
                throw new BiomedicusException(e);
            }
        }

        private double[][][] expandProbs(Map<String, Double> map) {
            double[][][] dArr = new double[this.symbols.size()][this.symbols.size()][this.symbols.size()];
            for (Map.Entry<String, Double> entry : map.entrySet()) {
                String key = entry.getKey();
                dArr[this.symbols.indexOf(Character.valueOf(key.charAt(0))).intValue()][this.symbols.indexOf(Character.valueOf(key.charAt(1))).intValue()][this.symbols.indexOf(Character.valueOf(key.charAt(2))).intValue()] = entry.getValue().doubleValue();
            }
            return dArr;
        }
    }

    private OrthographicAcronymModel(double[][][] dArr, double[][][] dArr2, boolean z, Set<String> set) {
        this.abbrevProbs = dArr;
        this.longformProbs = dArr2;
        this.caseSensitive = z;
        this.longformsLower = set;
        this.symbols = z ? CASE_SENS_SYMBOLS : CASE_INSENS_SYMBOLS;
        this.chars = z ? CASE_SENS_CHARS : CASE_INSENS_CHARS;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean seemsLikeAbbreviation(Token token) {
        String text = token.getText();
        String lowerCase = text.toLowerCase();
        if ((this.longformsLower != null && this.longformsLower.contains(lowerCase)) || text.length() < 2 || lowerCase.matches("[^a-z]*")) {
            return false;
        }
        if (lowerCase.matches("[^bcdfghjklmnpqrstvwxz]*") || lowerCase.matches("[^aeiouy]*")) {
            return true;
        }
        return seemsLikeAbbrevByTrigram(text);
    }

    private boolean seemsLikeAbbrevByTrigram(String str) {
        return (this.abbrevProbs == null || this.longformProbs == null || getWordLikelihood(str, this.abbrevProbs) <= getWordLikelihood(str, this.longformProbs)) ? false : true;
    }

    private double getWordLikelihood(String str, double[][][] dArr) {
        char c = '^';
        char c2 = '^';
        char c3 = '^';
        double d = 0.0d;
        for (int i = 0; i < str.length(); i++) {
            c3 = fixChar(str.charAt(i));
            d += dArr[this.symbols.indexOf(Character.valueOf(c)).intValue()][this.symbols.indexOf(Character.valueOf(c2)).intValue()][this.symbols.indexOf(Character.valueOf(c3)).intValue()];
            c = c2;
            c2 = c3;
        }
        return d + dArr[this.symbols.indexOf(Character.valueOf(c2)).intValue()][this.symbols.indexOf(Character.valueOf(c3)).intValue()][this.symbols.indexOf('$').intValue()] + dArr[this.symbols.indexOf(Character.valueOf(c3)).intValue()][this.symbols.indexOf('$').intValue()][this.symbols.indexOf('$').intValue()];
    }

    private char fixChar(char c) {
        if (!this.caseSensitive) {
            c = Character.toLowerCase(c);
        }
        if (Character.isDigit(c)) {
            c = '0';
        } else if (!this.chars.contains(Character.valueOf(c))) {
            c = '?';
        }
        return c;
    }
}
