package org.apache.ctakes.coreference.ae.features;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;

/* loaded from: input_file:org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.class */
public class StringMatchingFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation, IdentifiedAnnotation> {
    public List<Feature> extract(JCas jCas, IdentifiedAnnotation identifiedAnnotation, IdentifiedAnnotation identifiedAnnotation2) throws AnalysisEngineProcessException {
        ArrayList arrayList = new ArrayList();
        if (isPronoun(identifiedAnnotation) || isPronoun(identifiedAnnotation2)) {
            return arrayList;
        }
        String coveredText = identifiedAnnotation.getCoveredText();
        String coveredText2 = identifiedAnnotation2.getCoveredText();
        Set<String> contentWords = contentWords(identifiedAnnotation);
        Set<String> contentWords2 = contentWords(identifiedAnnotation2);
        arrayList.add(new Feature("MATCH_EXACT", Boolean.valueOf(coveredText.equalsIgnoreCase(coveredText2))));
        arrayList.add(new Feature("MATCH_START", Boolean.valueOf(startMatch(coveredText, coveredText2))));
        arrayList.add(new Feature("MATCH_END", Boolean.valueOf(endMatch(coveredText, coveredText2))));
        arrayList.add(new Feature("MATCH_SOON", Boolean.valueOf(soonMatch(coveredText, coveredText2))));
        arrayList.add(new Feature("MATCH_OVERLAP", Boolean.valueOf(wordOverlap(contentWords, contentWords2))));
        arrayList.add(new Feature("MATCH_SUBSTRING", Boolean.valueOf(wordSubstring(contentWords, contentWords2))));
        return arrayList;
    }

    public static boolean startMatch(String str, String str2) {
        int indexOf = str.indexOf(" ");
        int indexOf2 = str2.indexOf(" ");
        return str.substring(0, indexOf == -1 ? str.length() > 5 ? 5 : str.length() : indexOf).equalsIgnoreCase(str2.substring(0, indexOf2 == -1 ? str2.length() > 5 ? 5 : str2.length() : indexOf2));
    }

    public static boolean endMatch(String str, String str2) {
        int lastIndexOf = str.lastIndexOf(" ");
        int lastIndexOf2 = str2.lastIndexOf(" ");
        return str.substring(lastIndexOf == -1 ? str.length() > 5 ? str.length() - 5 : 0 : lastIndexOf + 1).equalsIgnoreCase(str2.substring(lastIndexOf2 == -1 ? str2.length() > 5 ? str2.length() - 5 : 0 : lastIndexOf2 + 1));
    }

    public static boolean soonMatch(String str, String str2) {
        return nonDetSubstr(str.toLowerCase()).equals(nonDetSubstr(str2.toLowerCase()));
    }

    public static String nonDetSubstr(String str) {
        if (str.startsWith("the ")) {
            return str.substring(4);
        }
        if (str.startsWith("a ")) {
            return str.substring(2);
        }
        if (!str.startsWith("this ") && !str.startsWith("that ")) {
            if (!str.startsWith("these ") && !str.startsWith("those ")) {
                return str;
            }
            return str.substring(6);
        }
        return str.substring(5);
    }

    public static boolean wordOverlap(Set<String> set, Set<String> set2) {
        Iterator<String> it = set2.iterator();
        while (it.hasNext()) {
            if (set.contains(it.next())) {
                return true;
            }
        }
        return false;
    }

    public static boolean wordSubstring(Set<String> set, Set<String> set2) {
        for (String str : set) {
            for (String str2 : set2) {
                if (str.contains(str2) || str2.contains(str)) {
                    return true;
                }
            }
        }
        return false;
    }

    public static Set<String> contentWords(Annotation annotation) {
        HashSet hashSet = new HashSet();
        Iterator it = JCasUtil.selectCovered(BaseToken.class, annotation).iterator();
        while (it.hasNext()) {
            hashSet.add(((BaseToken) it.next()).getCoveredText().toLowerCase());
        }
        return hashSet;
    }

    public static boolean isPronoun(IdentifiedAnnotation identifiedAnnotation) {
        List selectCovered = JCasUtil.selectCovered(BaseToken.class, identifiedAnnotation);
        if (selectCovered.size() != 1) {
            return false;
        }
        BaseToken baseToken = (BaseToken) selectCovered.get(0);
        if (baseToken.getPartOfSpeech() == null) {
            return false;
        }
        return baseToken.getPartOfSpeech().startsWith("PRP") || baseToken.getPartOfSpeech().equals("DT");
    }

    public static boolean inQuote(JCas jCas, Annotation annotation) {
        boolean z = false;
        String documentText = jCas.getDocumentText();
        int lastIndexOf = documentText.lastIndexOf("\n", annotation.getBegin());
        if (lastIndexOf != 0 && documentText.indexOf(34, lastIndexOf) != 0) {
            z = true;
        }
        return z;
    }
}
