package edu.umn.biomedicus.tnt;

import edu.umn.biomedicus.common.tuples.WordCap;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
import edu.umn.biomedicus.tagging.PosTag;
import edu.umn.biomedicus.tokenization.ParseToken;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/umn/biomedicus/tnt/TntModelTrainer.class */
public class TntModelTrainer {
    private static final Logger LOGGER = LoggerFactory.getLogger(TntModelTrainer.class);
    private final List<FilteredWordPosFrequencies> filteredWordPosFrequencies;
    private final PosCapTrigramModelTrainer posCapTrigramModelTrainer;
    private final int maxSuffixLength;
    private final int maxWordFrequency;
    private final boolean useMslSuffixModel;
    private final boolean restrictToOpenClass;
    private final DataStoreFactory dataStoreFactory;

    /* loaded from: input_file:edu/umn/biomedicus/tnt/TntModelTrainer$Builder.class */
    public static final class Builder {
        private int maxSuffixLength;
        private int maxWordFrequency;
        private boolean useCapitalization;
        private boolean useMslSuffixModel;
        private boolean restrictToOpenClass;
        private DataStoreFactory dataStoreFactory;

        private Builder() {
            this.maxSuffixLength = 5;
            this.maxWordFrequency = 10;
            this.useCapitalization = true;
            this.useMslSuffixModel = false;
            this.restrictToOpenClass = false;
        }

        public Builder maxSuffixLength(int i) {
            this.maxSuffixLength = i;
            return this;
        }

        public Builder maxWordFrequency(int i) {
            this.maxWordFrequency = i;
            return this;
        }

        public Builder useCapitalization(boolean z) {
            this.useCapitalization = z;
            return this;
        }

        public Builder useMslSuffixModel(boolean z) {
            this.useMslSuffixModel = z;
            return this;
        }

        public Builder restrictToOpenClass(boolean z) {
            this.restrictToOpenClass = z;
            return this;
        }

        public Builder dataStoreFactory(DataStoreFactory dataStoreFactory) {
            this.dataStoreFactory = dataStoreFactory;
            return this;
        }

        public TntModelTrainer build() {
            ArrayList arrayList = new ArrayList();
            if (this.useCapitalization) {
                arrayList.add(new FilteredWordPosFrequencies(new WordCapFilter(true, false), new WordCapAdapter(true, false)));
                arrayList.add(new FilteredWordPosFrequencies(new WordCapFilter(false, true), new WordCapAdapter(true, false)));
            } else {
                arrayList.add(new FilteredWordPosFrequencies(new WordCapFilter(false, false), new WordCapAdapter(true, true)));
            }
            return new TntModelTrainer(arrayList, new PosCapTrigramModelTrainer(), this.maxSuffixLength, this.maxWordFrequency, this.useMslSuffixModel, this.restrictToOpenClass, this.dataStoreFactory);
        }
    }

    private TntModelTrainer(List<FilteredWordPosFrequencies> list, PosCapTrigramModelTrainer posCapTrigramModelTrainer, int i, int i2, boolean z, boolean z2, DataStoreFactory dataStoreFactory) {
        this.filteredWordPosFrequencies = list;
        this.posCapTrigramModelTrainer = posCapTrigramModelTrainer;
        this.maxSuffixLength = i;
        this.maxWordFrequency = i2;
        this.useMslSuffixModel = z;
        this.restrictToOpenClass = z2;
        this.dataStoreFactory = dataStoreFactory;
    }

    public static Builder builder() {
        return new Builder();
    }

    public void addSentence(List<ParseToken> list, List<PosTag> list2) {
        for (int i = 0; i < list.size(); i++) {
            String text = list.get(i).getText();
            WordCap wordCap = new WordCap(text, Character.isUpperCase(text.charAt(0)));
            PartOfSpeech partOfSpeech = list2.get(i).getPartOfSpeech();
            for (FilteredWordPosFrequencies filteredWordPosFrequencies : this.filteredWordPosFrequencies) {
                if (partOfSpeech != null) {
                    filteredWordPosFrequencies.addWord(wordCap, partOfSpeech);
                }
            }
        }
        this.posCapTrigramModelTrainer.addSentence(list, list2);
    }

    public TntModel createModel() {
        PosCapTrigramModel build = this.posCapTrigramModelTrainer.build();
        Set<PartOfSpeech> openClass = this.restrictToOpenClass ? PartsOfSpeech.getOpenClass() : PartsOfSpeech.getRealTags();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (FilteredWordPosFrequencies filteredWordPosFrequencies : this.filteredWordPosFrequencies) {
            WordPosFrequencies wordPosFrequencies = filteredWordPosFrequencies.getWordPosFrequencies();
            WordCapFilter filter = filteredWordPosFrequencies.getFilter();
            WordCapAdapter wordCapAdapter = filteredWordPosFrequencies.getWordCapAdapter();
            KnownWordProbabilityModel knownWordProbabilityModel = new KnownWordProbabilityModel();
            knownWordProbabilityModel.setId(i);
            knownWordProbabilityModel.setFilter(filter);
            knownWordProbabilityModel.setWordCapAdapter(wordCapAdapter);
            knownWordProbabilityModel.createDataStore(this.dataStoreFactory);
            knownWordProbabilityModel.train(wordPosFrequencies, openClass);
            arrayList.add(knownWordProbabilityModel);
            WordPosFrequencies expandSuffixes = wordPosFrequencies.onlyWordsOccurringUpTo(this.maxWordFrequency).expandSuffixes(this.maxSuffixLength);
            SuffixWordProbabilityModel suffixWordProbabilityModel = new SuffixWordProbabilityModel();
            suffixWordProbabilityModel.setMaxSuffixLength(this.maxSuffixLength);
            int i2 = i;
            i++;
            suffixWordProbabilityModel.setId(this.filteredWordPosFrequencies.size() + i2);
            suffixWordProbabilityModel.setWordCapAdapter(wordCapAdapter);
            suffixWordProbabilityModel.createDataStore(this.dataStoreFactory);
            suffixWordProbabilityModel.setFilter(filter);
            if (this.useMslSuffixModel) {
                throw new UnsupportedOperationException();
            }
            suffixWordProbabilityModel.trainPI(expandSuffixes, openClass);
            arrayList2.add(suffixWordProbabilityModel);
        }
        arrayList.addAll(arrayList2);
        LOGGER.debug("Word models: {}", arrayList);
        return new TntModel(build, arrayList);
    }
}
