package edu.umn.biomedicus.concepts;

import edu.umn.biomedicus.acronyms.Acronym;
import edu.umn.biomedicus.common.dictionary.StringsBag;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
import edu.umn.biomedicus.normalization.NormForm;
import edu.umn.biomedicus.sentences.Sentence;
import edu.umn.biomedicus.tagging.PosTag;
import edu.umn.biomedicus.tokenization.TermToken;
import edu.umn.biomedicus.tokenization.Token;
import edu.umn.nlpengine.Document;
import edu.umn.nlpengine.DocumentTask;
import edu.umn.nlpengine.LabelIndex;
import edu.umn.nlpengine.Labeler;
import edu.umn.nlpengine.Span;
import edu.umn.nlpengine.TextRange;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/umn/biomedicus/concepts/DetectDictionaryConcepts.class */
class DetectDictionaryConcepts implements DocumentTask {
    private static final Logger LOGGER = LoggerFactory.getLogger(DetectDictionaryConcepts.class);
    private static final Set<PartOfSpeech> TRIVIAL_POS = buildTrivialPos();
    private static final int SPAN_SIZE = 5;
    private final ConceptDictionary conceptDictionary;
    private Labeler<DictionaryTerm> termLabeler;
    private LabelIndex<PosTag> posTags;
    private LabelIndex<NormForm> normIndexes;
    private Labeler<UmlsConcept> conceptLabeler;

    @Inject
    DetectDictionaryConcepts(ConceptDictionary conceptDictionary) {
        this.conceptDictionary = conceptDictionary;
    }

    private static Set<PartOfSpeech> buildTrivialPos() {
        HashSet hashSet = new HashSet();
        Collections.addAll(hashSet, PartOfSpeech.DT, PartOfSpeech.CD, PartOfSpeech.WDT, PartOfSpeech.TO, PartOfSpeech.CC, PartOfSpeech.PRP, PartOfSpeech.PRP$, PartOfSpeech.MD, PartOfSpeech.EX, PartOfSpeech.IN, PartOfSpeech.XX);
        hashSet.addAll(PartsOfSpeech.getPunctuationClass());
        return Collections.unmodifiableSet(hashSet);
    }

    private boolean checkPhrase(Span span, String str, boolean z, double d) {
        List<ConceptRow> forLowercasePhrase;
        List<ConceptRow> forPhrase = this.conceptDictionary.forPhrase(str);
        if (forPhrase != null) {
            makeTerm(span, forPhrase, 1.0d - d);
            return true;
        }
        if (z || (forLowercasePhrase = this.conceptDictionary.forLowercasePhrase(str.toLowerCase(Locale.ENGLISH))) == null) {
            return false;
        }
        makeTerm(span, forLowercasePhrase, 0.6d - d);
        return true;
    }

    private void checkTokenSet(List<TermToken> list) {
        if (list.size() <= 1) {
            return;
        }
        Span span = new Span(list.get(0).getStartIndex(), list.get(list.size() - 1).getEndIndex());
        StringsBag.Builder builder = StringsBag.builder();
        for (NormForm normForm : this.normIndexes.inside(span)) {
            PosTag posTag = (PosTag) this.posTags.firstAtLocation(normForm);
            if (posTag == null || !TRIVIAL_POS.contains(posTag.getPartOfSpeech())) {
                builder.addTerm(normForm.normIdentifier());
            }
        }
        List<ConceptRow> forNorms = this.conceptDictionary.forNorms(builder.build());
        if (forNorms != null) {
            makeTerm(span, forNorms, 0.3d);
        }
    }

    private void makeTerm(TextRange textRange, List<ConceptRow> list, double d) {
        for (ConceptRow conceptRow : list) {
            String source = this.conceptDictionary.source(conceptRow.getSource());
            if (source == null) {
                source = "unknown";
                LOGGER.warn("Unknown source");
            }
            this.conceptLabeler.add(new UmlsConcept(textRange, conceptRow.getSui().toString(), conceptRow.getCui().toString(), conceptRow.getTui().toString(), source, d));
        }
        this.termLabeler.add(new DictionaryTerm(textRange));
    }

    public void run(@Nonnull Document document) {
        LOGGER.debug("Finding concepts in document.");
        LabelIndex<Sentence> labelIndex = document.labelIndex(Sentence.class);
        this.normIndexes = document.labelIndex(NormForm.class);
        this.termLabeler = document.labeler(DictionaryTerm.class);
        this.conceptLabeler = document.labeler(UmlsConcept.class);
        this.posTags = document.labelIndex(PosTag.class);
        LabelIndex labelIndex2 = document.labelIndex(TermToken.class);
        LabelIndex labelIndex3 = document.labelIndex(Acronym.class);
        String text = document.getText();
        for (Sentence sentence : labelIndex) {
            LOGGER.trace("Identifying concepts in a sentence");
            StringBuilder sb = new StringBuilder();
            ArrayList arrayList = new ArrayList();
            List<Token> asList = labelIndex2.inside(sentence).asList();
            for (Token token : asList) {
                Token token2 = (Acronym) labelIndex3.firstAtLocation(token);
                Token token3 = token2 != null ? token2 : token;
                String text2 = token3.getText();
                Span span = new Span(sb.length(), sb.length() + text2.length());
                sb.append(text2);
                if (token3.getHasSpaceAfter()) {
                    sb.append(' ');
                }
                arrayList.add(span);
            }
            for (int i = 0; i < asList.size(); i++) {
                List subList = asList.subList(i, Math.min(i + SPAN_SIZE, asList.size()));
                TermToken termToken = (TermToken) subList.get(0);
                int i2 = 1;
                while (i2 <= subList.size()) {
                    List<TermToken> subList2 = subList.subList(0, i2);
                    Span span2 = new Span(termToken.getStartIndex(), subList2.get(i2 - 1).getEndIndex());
                    Stream map = this.posTags.inside(span2).stream().map((v0) -> {
                        return v0.getPartOfSpeech();
                    });
                    Set<PartOfSpeech> set = TRIVIAL_POS;
                    set.getClass();
                    if (!map.allMatch((v1) -> {
                        return r1.contains(v1);
                    })) {
                        if (!checkPhrase(span2, span2.coveredString(text), i2 == 1, 0.0d)) {
                            if (!checkPhrase(span2, sb.substring(((Span) arrayList.get(i)).getStartIndex(), ((Span) arrayList.get((i + i2) - 1)).getEndIndex()), i2 == 1, 0.1d)) {
                                checkTokenSet(subList2);
                            }
                        }
                    }
                    i2++;
                }
            }
        }
    }
}
