package edu.umn.biomedicus.tnt;

import edu.umn.biomedicus.common.tuples.PosCap;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.tagging.PosTag;
import edu.umn.biomedicus.tokenization.ParseToken;
import java.util.Arrays;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:edu/umn/biomedicus/tnt/PosCapTrigramModelTrainer.class */
public class PosCapTrigramModelTrainer {
    private static final Logger LOGGER = LoggerFactory.getLogger(PosCapTrigramModelTrainer.class);
    private static final PosCap BBS_POS_CAP = PosCap.create(PartOfSpeech.BBS, false);
    private static final PosCap BOS_POS_CAP = PosCap.create(PartOfSpeech.BOS, false);
    private static final PosCap EOS_POS_CAP = PosCap.create(PartOfSpeech.EOS, false);
    private final int[] tagFrequencies;
    private final int[][] bigramFrequencies;
    private final int[][][] trigramFrequencies;
    private int taggedTokens;

    public PosCapTrigramModelTrainer(int i, int[][][] iArr, int[][] iArr2, int[] iArr3) {
        this.taggedTokens = i;
        this.trigramFrequencies = iArr;
        this.bigramFrequencies = iArr2;
        this.tagFrequencies = iArr3;
    }

    public PosCapTrigramModelTrainer() {
        this.trigramFrequencies = new int[PosCap.cardinality()][PosCap.cardinality()][PosCap.cardinality()];
        for (int[][] iArr : this.trigramFrequencies) {
            for (int[] iArr2 : iArr) {
                Arrays.fill(iArr2, 0);
            }
        }
        this.bigramFrequencies = new int[PosCap.cardinality()][PosCap.cardinality()];
        for (int[] iArr3 : this.bigramFrequencies) {
            Arrays.fill(iArr3, 0);
        }
        this.tagFrequencies = new int[PosCap.cardinality()];
        Arrays.fill(this.tagFrequencies, 0);
        this.taggedTokens = 0;
    }

    public void addSentence(List<ParseToken> list, List<PosTag> list2) {
        int[] iArr = new int[list.size()];
        for (int i = 0; i < list.size(); i++) {
            iArr[i] = PosCap.create(list2.get(i).getPartOfSpeech(), Character.isUpperCase(list.get(i).getText().charAt(0))).ordinal();
        }
        int[] iArr2 = new int[iArr.length + 3];
        iArr2[0] = BBS_POS_CAP.ordinal();
        iArr2[1] = BOS_POS_CAP.ordinal();
        System.arraycopy(iArr, 0, iArr2, 2, iArr.length);
        int length = iArr2.length;
        iArr2[length - 1] = EOS_POS_CAP.ordinal();
        int[] iArr3 = this.tagFrequencies;
        int i2 = iArr2[0];
        iArr3[i2] = iArr3[i2] + 1;
        int[] iArr4 = this.tagFrequencies;
        int i3 = iArr2[1];
        iArr4[i3] = iArr4[i3] + 1;
        int[] iArr5 = this.bigramFrequencies[iArr2[0]];
        int i4 = iArr2[1];
        iArr5[i4] = iArr5[i4] + 1;
        this.taggedTokens += 2;
        for (int i5 = 0; i5 < length - 3; i5++) {
            int i6 = iArr2[i5 + 1];
            int i7 = iArr2[i5 + 2];
            int[] iArr6 = this.trigramFrequencies[iArr2[i5]][i6];
            iArr6[i7] = iArr6[i7] + 1;
            int[] iArr7 = this.bigramFrequencies[i6];
            iArr7[i7] = iArr7[i7] + 1;
            int[] iArr8 = this.tagFrequencies;
            iArr8[i7] = iArr8[i7] + 1;
            this.taggedTokens++;
        }
    }

    public PosCapTrigramModel build() {
        LOGGER.info("Building pos cap trigram model");
        LOGGER.debug("Computing unigram probabilities");
        double[] array = Arrays.stream(this.tagFrequencies).mapToDouble(i -> {
            return i / this.taggedTokens;
        }).toArray();
        LOGGER.debug("Computing bigram probabilities");
        int cardinality = PosCap.cardinality();
        double[][] dArr = new double[cardinality][cardinality];
        for (int i2 = 0; i2 < dArr.length; i2++) {
            for (int i3 = 0; i3 < dArr[i2].length; i3++) {
                int i4 = this.bigramFrequencies[i2][i3];
                int i5 = this.tagFrequencies[i2];
                if (i5 == 0) {
                    dArr[i2][i3] = 0.0d;
                } else {
                    dArr[i2][i3] = i4 / i5;
                }
            }
        }
        int i6 = 0;
        int i7 = 0;
        int i8 = 0;
        LOGGER.debug("Computing trigram probabilities and smoothing coefficients");
        double[][][] dArr2 = new double[cardinality][cardinality][cardinality];
        for (int i9 = 0; i9 < dArr2.length; i9++) {
            for (int i10 = 0; i10 < dArr2[i9].length; i10++) {
                for (int i11 = 0; i11 < dArr2[i9][i10].length; i11++) {
                    int i12 = this.bigramFrequencies[i9][i10];
                    int i13 = this.trigramFrequencies[i9][i10][i11];
                    dArr2[i9][i10][i11] = i12 == 0 ? 0.0d : i13 / i12;
                    double d = (i13 - 1) / (i12 - 1);
                    double d2 = (this.bigramFrequencies[i10][i11] - 1) / (this.tagFrequencies[i10] - 1);
                    double d3 = (this.tagFrequencies[i11] - 1) / (this.taggedTokens - 1);
                    if (d >= d2 && d >= d3) {
                        i8 += i13;
                    } else if (d2 >= d3) {
                        i7 += i13;
                    } else {
                        i6 += i13;
                    }
                }
            }
        }
        double d4 = i6 + i7 + i8;
        double d5 = i6 / d4;
        double d6 = i7 / d4;
        double d7 = i8 / d4;
        LOGGER.info("Finished build pos cap trigram model");
        return new PosCapTrigramModel(array, dArr, dArr2, d5, d6, d7);
    }
}
