package gate.creole.orthomatcher;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Resource;
import gate.creole.ANNIEConstants;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.BomStrippingInputStreamReader;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import gate.util.Out;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

@CreoleResource(name = "ANNIE OrthoMatcher", comment = "ANNIE orthographical coreference component.", helpURL = "http://gate.ac.uk/userguide/sec:annie:orthomatcher", icon = "ortho-matcher")
/* loaded from: input_file:gate/creole/orthomatcher/OrthoMatcher.class */
public class OrthoMatcher extends AbstractLanguageAnalyser {
    private static final long serialVersionUID = -6258229350677707465L;
    public static final boolean DEBUG = false;
    public static final String OM_DOCUMENT_PARAMETER_NAME = "document";
    public static final String OM_ANN_SET_PARAMETER_NAME = "annotationSetName";
    public static final String OM_CASE_SENSITIVE_PARAMETER_NAME = "caseSensitive";
    public static final String OM_ANN_TYPES_PARAMETER_NAME = "annotationTypes";
    public static final String OM_ORG_TYPE_PARAMETER_NAME = "organizationType";
    public static final String OM_PERSON_TYPE_PARAMETER_NAME = "personType";
    public static final String OM_EXT_LISTS_PARAMETER_NAME = "extLists";
    protected static final String CDGLISTNAME = "cdg";
    protected static final String ALIASLISTNAME = "alias";
    protected static final String ARTLISTNAME = "def_art";
    protected static final String PREPLISTNAME = "prepos";
    protected static final String CONNECTORLISTNAME = "connector";
    protected static final String SPURLISTNAME = "spur_match";
    protected static final String PUNCTUATION_VALUE = "punctuation";
    protected static final String THE_VALUE = "The";
    protected String annotationSetName;
    protected Annotation shortAnnot;
    protected Annotation longAnnot;
    protected ArrayList<Annotation> tokensLongAnnot;
    protected ArrayList<Annotation> tokensShortAnnot;
    protected ArrayList<Annotation> normalizedTokensLongAnnot;
    protected ArrayList<Annotation> normalizedTokensShortAnnot;
    private URL definitionFileURL;
    private Double minimumNicknameLikelihood;
    private String encoding;
    private AnnotationOrthography orthoAnnotation;
    protected static final Logger log = Logger.getLogger(OrthoMatcher.class);
    static Pattern periodPat = Pattern.compile("[\\.]+");
    static Pattern punctPat = Pattern.compile("[\\p{Punct}]+");
    static Pattern badMiddleTokens = Pattern.compile("[“”‘’'\\(\\)\"]+|^de$|^von$");
    protected List<String> annotationTypes = new ArrayList(10);
    protected String organizationType = ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE;
    protected String personType = ANNIEConstants.PERSON_ANNOTATION_TYPE;
    protected String unknownType = "Unknown";
    protected boolean extLists = true;
    protected Boolean highPrecisionOrgs = false;
    protected boolean matchingUnknowns = true;
    protected boolean allMatchingNeeded = false;
    protected boolean caseSensitive = false;
    protected HashMap<String, String> alias = new HashMap<>(100);
    protected Set<String> cdg = new HashSet();
    protected HashMap<String, String> spur_match = new HashMap<>(100);
    protected HashMap<String, String> def_art = new HashMap<>(20);
    protected HashMap<String, String> connector = new HashMap<>(20);
    protected HashMap<String, String> prepos = new HashMap<>(30);
    protected AnnotationSet nameAllAnnots = null;
    protected HashMap<Integer, String> processedAnnots = new HashMap<>(150);
    protected HashMap<Integer, String> annots2Remove = new HashMap<>(75);
    protected List<List<Integer>> matchesDocFeature = new ArrayList();
    protected HashMap<Integer, List<Annotation>> tokensMap = new HashMap<>(150);
    protected Map<Integer, List<Annotation>> normalizedTokensMap = new HashMap(150);
    private Map<Integer, OrthoMatcherRule> rules = new HashMap();

    public Map<Integer, List<Annotation>> getTokensMap() {
        return this.tokensMap;
    }

    public OrthoMatcher() {
        this.annotationTypes.add(this.organizationType);
        this.annotationTypes.add(this.personType);
        this.annotationTypes.add(ANNIEConstants.LOCATION_ANNOTATION_TYPE);
        this.annotationTypes.add(ANNIEConstants.DATE_ANNOTATION_TYPE);
    }

    private void initRules() {
        this.rules.put(0, new MatchRule0(this));
        this.rules.put(1, new MatchRule1(this));
        this.rules.put(2, new MatchRule2(this));
        this.rules.put(3, new MatchRule3(this));
        this.rules.put(4, new MatchRule4(this));
        this.rules.put(5, new MatchRule5(this));
        this.rules.put(6, new MatchRule6(this));
        this.rules.put(7, new MatchRule7(this));
        this.rules.put(8, new MatchRule8(this));
        this.rules.put(9, new MatchRule9(this));
        this.rules.put(10, new MatchRule10(this));
        this.rules.put(11, new MatchRule11(this));
        this.rules.put(12, new MatchRule12(this));
        this.rules.put(13, new MatchRule13(this));
        this.rules.put(14, new MatchRule14(this));
        this.rules.put(15, new MatchRule15(this));
        this.rules.put(16, new MatchRule16(this));
        this.rules.put(17, new MatchRule17(this));
    }

    protected void modifyRules(Map<Integer, OrthoMatcherRule> map) {
    }

    @Override // gate.creole.AbstractProcessingResource, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        if (this.definitionFileURL == null) {
            throw new ResourceInstantiationException("No URL provided for the definition file!");
        }
        String str = null;
        try {
            try {
                BomStrippingInputStreamReader bomStrippingInputStreamReader = new BomStrippingInputStreamReader(this.definitionFileURL.openStream(), this.encoding);
                while (true) {
                    String readLine = bomStrippingInputStreamReader.readLine();
                    if (readLine == null) {
                        bomStrippingInputStreamReader.close();
                        URL url = null;
                        if (str != null) {
                            url = new URL(this.definitionFileURL, str);
                        }
                        this.orthoAnnotation = new BasicAnnotationOrthography(this.personType, this.extLists, this.unknownType, url, this.minimumNicknameLikelihood, this.encoding);
                        initRules();
                        modifyRules(this.rules);
                        IOUtils.closeQuietly(bomStrippingInputStreamReader);
                        return this;
                    }
                    int indexOf = readLine.indexOf(":");
                    if (indexOf != -1) {
                        String substring = readLine.substring(0, indexOf);
                        String substring2 = readLine.substring(indexOf + 1, readLine.length());
                        if (!substring2.equals("nickname")) {
                            createAnnotList(substring, substring2);
                        } else {
                            if (this.minimumNicknameLikelihood == null) {
                                throw new ResourceInstantiationException("No value for the required parameter minimumNicknameLikelihood!");
                            }
                            str = substring;
                        }
                    }
                }
            } catch (IOException e) {
                throw new ResourceInstantiationException(e);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly((Reader) null);
            throw th;
        }
    }

    @Override // gate.creole.AbstractProcessingResource, gate.Executable
    public void execute() throws ExecutionException {
        try {
            if (this.document == null) {
                throw new ExecutionException("No document for namematch!");
            }
            fireStatusChanged("OrthoMatcher processing: " + this.document.getName());
            if (this.annotationSetName == null || this.annotationSetName.equals(OrthoMatcherRule.description)) {
                this.nameAllAnnots = this.document.getAnnotations();
            } else {
                this.nameAllAnnots = this.document.getAnnotations(this.annotationSetName);
            }
            if (this.nameAllAnnots == null || this.nameAllAnnots.isEmpty()) {
                Out.prln("OrthoMatcher Warning: No annotations found for processing");
                return;
            }
            docCleanup();
            Map map = (Map) this.document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
            if (!this.extLists) {
                this.cdg = this.orthoAnnotation.buildTables(this.nameAllAnnots);
            }
            matchNameAnnotations();
            if (!this.matchesDocFeature.isEmpty()) {
                if (map == null) {
                    map = new HashMap();
                }
                map.put(this.nameAllAnnots.getName(), this.matchesDocFeature);
                this.document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME, map);
                this.matchesDocFeature = new ArrayList();
                fireStatusChanged("OrthoMatcher completed");
            }
        } finally {
            this.nameAllAnnots = null;
            this.processedAnnots.clear();
            this.annots2Remove.clear();
            this.tokensMap.clear();
            this.normalizedTokensMap.clear();
            this.matchesDocFeature = new ArrayList();
            this.longAnnot = null;
            this.shortAnnot = null;
            this.tokensLongAnnot = null;
            this.tokensShortAnnot = null;
        }
    }

    /* JADX WARN: Removed duplicated region for block: B:36:0x01f6  */
    /* JADX WARN: Removed duplicated region for block: B:47:0x025c  */
    /* JADX WARN: Removed duplicated region for block: B:49:0x02af A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:50:0x021e  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected void matchNameAnnotations() throws gate.creole.ExecutionException {
        /*
            Method dump skipped, instructions count: 710
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: gate.creole.orthomatcher.OrthoMatcher.matchNameAnnotations():void");
    }

    protected void matchUnknown(ArrayList<Annotation> arrayList) throws ExecutionException {
        AnnotationSet annotationSet = this.nameAllAnnots.get(this.unknownType);
        this.annots2Remove.clear();
        if (annotationSet.isEmpty()) {
            return;
        }
        AnnotationSet annotationSet2 = this.nameAllAnnots.get("Token");
        if (annotationSet2.isEmpty()) {
            return;
        }
        Iterator<Annotation> it = annotationSet.iterator();
        while (it.hasNext()) {
            Annotation next = it.next();
            String stringForAnnotation = this.orthoAnnotation.getStringForAnnotation(next, this.document);
            if (!this.caseSensitive) {
                stringForAnnotation = stringForAnnotation.toLowerCase();
            }
            ArrayList arrayList2 = new ArrayList(annotationSet2.getContained(next.getStartNode().getOffset(), next.getEndNode().getOffset()));
            if (!arrayList2.isEmpty()) {
                Collections.sort(arrayList2, new OffsetComparator());
                this.tokensMap.put(next.getId(), arrayList2);
                this.normalizedTokensMap.put(next.getId(), arrayList2);
                if (this.processedAnnots.containsValue(stringForAnnotation)) {
                    Annotation updateMatches = this.orthoAnnotation.updateMatches(next, stringForAnnotation, this.processedAnnots, this.nameAllAnnots, this.matchesDocFeature);
                    if (updateMatches == null) {
                        log.debug("Orthomatcher: Unable to find the annotation: " + this.orthoAnnotation.getStringForAnnotation(next, this.document) + " in matchUnknown");
                    } else {
                        if (updateMatches.getType().equals(this.unknownType)) {
                            this.annots2Remove.put(next.getId(), this.annots2Remove.get(updateMatches.getId()));
                        } else {
                            this.annots2Remove.put(next.getId(), updateMatches.getType());
                        }
                        this.processedAnnots.put(next.getId(), stringForAnnotation);
                        next.getFeatures().put("NMRule", this.unknownType);
                    }
                }
                if (arrayList2.size() != 1 || !"hyphen".equals(next.getFeatures().get("kind")) || !matchHyphenatedUnknowns(next, stringForAnnotation, it)) {
                    matchWithPrevious(next, stringForAnnotation, arrayList, arrayList.size());
                }
            }
        }
        if (this.annots2Remove.isEmpty()) {
            return;
        }
        for (Integer num : this.annots2Remove.keySet()) {
            Annotation annotation = this.nameAllAnnots.get(num);
            Integer add = this.nameAllAnnots.add(annotation.getStartNode(), annotation.getEndNode(), this.annots2Remove.get(num), annotation.getFeatures());
            this.nameAllAnnots.remove(annotation);
            List list = (List) annotation.getFeatures().get(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
            list.remove(num);
            list.add(add);
        }
    }

    private boolean matchHyphenatedUnknowns(Annotation annotation, String str, Iterator<Annotation> it) {
        boolean z = false;
        int indexOf = str.indexOf("-");
        String substring = str.substring(0, indexOf);
        if (this.processedAnnots.containsValue(substring)) {
            z = true;
            Annotation updateMatches = this.orthoAnnotation.updateMatches(annotation, substring, this.processedAnnots, this.nameAllAnnots, this.matchesDocFeature);
            it.remove();
            String type = updateMatches.getType().equals(this.unknownType) ? this.annots2Remove.get(updateMatches.getId()) : updateMatches.getType();
            new Integer(-1);
            try {
                Integer add = this.nameAllAnnots.add(annotation.getStartNode().getOffset(), new Long(annotation.getStartNode().getOffset().longValue() + indexOf), type, annotation.getFeatures());
                this.nameAllAnnots.remove(annotation);
                List list = (List) annotation.getFeatures().get(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
                list.remove(annotation.getId());
                list.add(add);
            } catch (InvalidOffsetException e) {
                throw new GateRuntimeException(e.getMessage());
            }
        }
        return z;
    }

    protected void matchWithPrevious(Annotation annotation, String str, ArrayList<Annotation> arrayList, int i) {
        boolean z = false;
        int i2 = i - 1;
        while (true) {
            if (i2 < 0) {
                break;
            }
            Annotation annotation2 = this.nameAllAnnots.get(arrayList.get(i2).getId());
            if (annotation2 != null && ((annotation2.getType().equals(annotation.getType()) || annotation.getType().equals(this.unknownType)) && ((!annotation.getType().equals(this.unknownType) || !annotation2.getType().equals(this.unknownType)) && !this.orthoAnnotation.matchedAlready(annotation, annotation2, this.matchesDocFeature, this.nameAllAnnots)))) {
                if (annotation2.getType().equals(this.personType)) {
                    String str2 = (String) annotation2.getFeatures().get(ANNIEConstants.PERSON_GENDER_FEATURE_NAME);
                    String str3 = (String) annotation.getFeatures().get(ANNIEConstants.PERSON_GENDER_FEATURE_NAME);
                    if (str2 != null) {
                        if (str3 != null) {
                            if (str3.equalsIgnoreCase("female")) {
                                if (str2.equalsIgnoreCase("male")) {
                                    continue;
                                }
                            }
                            if (str2.equalsIgnoreCase("female") && str3.equalsIgnoreCase("male")) {
                            }
                        }
                    }
                }
                boolean containsKey = annotation2.getFeatures().containsKey("matchedWithLonger");
                if (matchAnnotations(annotation, str, annotation2)) {
                    this.orthoAnnotation.updateMatches(annotation, annotation2, this.matchesDocFeature, this.nameAllAnnots);
                    if (!containsKey && annotation2.getFeatures().containsKey("matchedWithLonger")) {
                        propagatePropertyToExactMatchingMatches(annotation2, "matchedWithLonger", true);
                    }
                    if (annotation.getType().equals(this.unknownType)) {
                        z = true;
                        if (annotation2.getType().equals(this.unknownType)) {
                            this.annots2Remove.put(annotation.getId(), this.annots2Remove.get(annotation2.getId()));
                        } else {
                            this.annots2Remove.put(annotation.getId(), annotation2.getType());
                        }
                        annotation.getFeatures().put("NMRule", this.unknownType);
                    }
                }
            }
            i2--;
        }
        if (z) {
            this.processedAnnots.put(annotation.getId(), str);
        }
    }

    protected void propagatePropertyToExactMatchingMatches(Annotation annotation, String str, Object obj) {
        try {
            List list = (List) annotation.getFeatures().get(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
            if (list == null || list.isEmpty()) {
                return;
            }
            String lowerCase = this.orthoAnnotation.getStringForAnnotation(annotation, this.document).toLowerCase();
            Iterator it = list.iterator();
            while (it.hasNext()) {
                Annotation annotation2 = this.nameAllAnnots.get((Integer) it.next());
                if (this.orthoAnnotation.fuzzyMatch(this.orthoAnnotation.getStringForAnnotation(annotation2, this.document), lowerCase)) {
                    annotation2.getFeatures().put(str, obj);
                }
            }
        } catch (Exception e) {
            log.error("Error in propogatePropertyToExactMatchingMatches", e);
        }
    }

    protected boolean matchAnnotations(Annotation annotation, String str, Annotation annotation2) {
        String str2;
        if (annotation.overlaps(annotation2) || (str2 = this.processedAnnots.get(annotation2.getId())) == null) {
            return false;
        }
        String str3 = str2;
        String str4 = str;
        this.longAnnot = annotation2;
        this.shortAnnot = annotation;
        boolean z = true;
        if (str4.length() > str3.length()) {
            str3 = str4;
            str4 = str3;
            Annotation annotation3 = this.longAnnot;
            this.longAnnot = this.shortAnnot;
            this.shortAnnot = annotation3;
            z = false;
        }
        this.tokensLongAnnot = (ArrayList) this.tokensMap.get(this.longAnnot.getId());
        this.normalizedTokensLongAnnot = (ArrayList) this.normalizedTokensMap.get(this.longAnnot.getId());
        this.tokensShortAnnot = (ArrayList) this.tokensMap.get(this.shortAnnot.getId());
        this.normalizedTokensShortAnnot = (ArrayList) this.normalizedTokensMap.get(this.shortAnnot.getId());
        List list = (List) annotation2.getFeatures().get(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
        if (list == null || list.isEmpty()) {
            return apply_rules_namematch(annotation2.getType(), str4, str3, annotation2, annotation, z);
        }
        if (!apply_rules_namematch(annotation2.getType(), str4, str3, annotation2, annotation, z)) {
            return false;
        }
        if (!this.allMatchingNeeded) {
            return true;
        }
        this.allMatchingNeeded = false;
        ArrayList arrayList = new ArrayList(list);
        arrayList.remove(annotation2.getId());
        return matchOtherAnnots(arrayList, annotation, str);
    }

    protected boolean matchOtherAnnots(List<Integer> list, Annotation annotation, String str) {
        if (list.isEmpty()) {
            return true;
        }
        boolean z = true;
        for (int i = 0; z && i < list.size(); i++) {
            Annotation annotation2 = this.nameAllAnnots.get(list.get(i));
            String str2 = this.processedAnnots.get(annotation2.getId());
            if (str2 == null) {
                try {
                    str2 = this.document.getContent().getContent(annotation2.getStartNode().getOffset(), annotation2.getEndNode().getOffset()).toString();
                } catch (InvalidOffsetException e) {
                    return false;
                }
            }
            String str3 = str2;
            String str4 = str;
            this.longAnnot = annotation2;
            this.shortAnnot = annotation;
            boolean z2 = true;
            if (str4.length() >= str3.length()) {
                str3 = str4;
                str4 = str3;
                Annotation annotation3 = this.longAnnot;
                this.longAnnot = this.shortAnnot;
                this.shortAnnot = annotation3;
                z2 = false;
            }
            this.tokensLongAnnot = (ArrayList) this.tokensMap.get(this.longAnnot.getId());
            this.normalizedTokensLongAnnot = (ArrayList) this.normalizedTokensMap.get(this.longAnnot.getId());
            this.tokensShortAnnot = (ArrayList) this.tokensMap.get(this.shortAnnot.getId());
            this.normalizedTokensShortAnnot = (ArrayList) this.normalizedTokensMap.get(this.shortAnnot.getId());
            z = apply_rules_namematch(annotation2.getType(), str4, str3, annotation2, annotation, z2);
        }
        return z;
    }

    protected void docCleanup() {
        Object obj = this.document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
        if (obj != null && (obj instanceof Map)) {
            ((Map) obj).remove(this.nameAllAnnots.getName());
        } else if (obj != null) {
            this.document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME, new HashMap());
        }
        HashSet hashSet = new HashSet();
        hashSet.add(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
        AnnotationSet annotationSet = this.nameAllAnnots.get((String) null, hashSet);
        if (annotationSet == null || annotationSet.isEmpty()) {
            return;
        }
        Iterator<Annotation> it = annotationSet.iterator();
        while (it.hasNext()) {
            while (it.hasNext()) {
                it.next().getFeatures().remove(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
            }
        }
    }

    protected void normalizePersonName(Annotation annotation) throws ExecutionException {
        ArrayList arrayList = (ArrayList) this.normalizedTokensMap.get(annotation.getId());
        for (int size = arrayList.size() - 1; size >= 0; size--) {
            String str = (String) ((Annotation) arrayList.get(size)).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            String str2 = (String) ((Annotation) arrayList.get(size)).getFeatures().get("kind");
            if (!this.caseSensitive) {
                str.toLowerCase();
            }
            if (str2.equals(PUNCTUATION_VALUE)) {
                ((Annotation) arrayList.get(size)).getFeatures().put("ortho_stop", true);
            }
        }
        ArrayList arrayList2 = new ArrayList(arrayList);
        for (int size2 = arrayList2.size() - 1; size2 >= 0; size2--) {
            if (((Annotation) arrayList2.get(size2)).getFeatures().containsKey("ortho_stop")) {
                arrayList2.remove(size2);
            }
        }
        this.normalizedTokensMap.put(annotation.getId(), arrayList2);
    }

    protected String normalizeOrganizationName(String str, Annotation annotation) {
        ArrayList arrayList = (ArrayList) this.tokensMap.get(annotation.getId());
        if (((String) ((Annotation) arrayList.get(0)).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME)).equalsIgnoreCase(THE_VALUE)) {
            arrayList.remove(0);
        }
        if (arrayList.size() > 0) {
            for (int size = arrayList.size() - 1; size >= 0; size--) {
                String str2 = (String) ((Annotation) arrayList.get(size)).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
                String str3 = (String) ((Annotation) arrayList.get(size)).getFeatures().get("kind");
                String str4 = (String) ((Annotation) arrayList.get(size)).getFeatures().get(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME);
                if (!this.caseSensitive) {
                    str2 = str2.toLowerCase();
                }
                if (str3.equals(PUNCTUATION_VALUE) || ((str4 != null && (str4.equals("DT") || str4.equals("IN"))) || this.cdg.contains(str2))) {
                    ((Annotation) arrayList.get(size)).getFeatures().put("ortho_stop", true);
                }
            }
            String str5 = (String) ((Annotation) arrayList.get(arrayList.size() - 1)).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            if (!this.caseSensitive) {
                str5 = str5.toLowerCase();
            }
            if (this.cdg.contains(str5)) {
                arrayList.remove(arrayList.size() - 1);
            }
        }
        ArrayList arrayList2 = new ArrayList(arrayList);
        for (int size2 = arrayList2.size() - 1; size2 >= 0; size2--) {
            if (((Annotation) arrayList2.get(size2)).getFeatures().containsKey("ortho_stop")) {
                arrayList2.remove(size2);
            }
        }
        this.normalizedTokensMap.put(annotation.getId(), arrayList2);
        StringBuffer stringBuffer = new StringBuffer(50);
        for (int i = 0; i < arrayList.size(); i++) {
            stringBuffer.append((String) ((Annotation) arrayList.get(i)).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME));
            if (i != arrayList.size() - 1) {
                stringBuffer.append(" ");
            }
        }
        return this.caseSensitive ? stringBuffer.toString() : stringBuffer.toString().toLowerCase();
    }

    protected void createAnnotList(String str, String str2) throws IOException {
        BomStrippingInputStreamReader bomStrippingInputStreamReader = null;
        try {
            bomStrippingInputStreamReader = new BomStrippingInputStreamReader(new URL(this.definitionFileURL, str).openStream(), this.encoding);
            while (true) {
                String readLine = bomStrippingInputStreamReader.readLine();
                if (readLine == null) {
                    IOUtils.closeQuietly(bomStrippingInputStreamReader);
                    return;
                }
                if (str2.compareTo(CDGLISTNAME) == 0) {
                    String trim = punctPat.matcher(readLine.toLowerCase().trim()).replaceAll(" ").trim();
                    if (this.caseSensitive) {
                        this.cdg.add(trim);
                    } else {
                        this.cdg.add(trim.toLowerCase());
                    }
                } else {
                    int indexOf = readLine.indexOf("£");
                    if (indexOf != -1) {
                        String substring = readLine.substring(0, indexOf);
                        if (!this.caseSensitive) {
                            substring = substring.toLowerCase();
                        }
                        String substring2 = readLine.substring(indexOf + 1, readLine.length());
                        if (str2.equals(ALIASLISTNAME)) {
                            this.alias.put(substring, substring2);
                        } else if (str2.equals(ARTLISTNAME)) {
                            this.def_art.put(substring, substring2);
                        } else if (str2.equals(PREPLISTNAME)) {
                            this.prepos.put(substring, substring2);
                        } else if (str2.equals(CONNECTORLISTNAME)) {
                            this.connector.put(substring, substring2);
                        } else if (str2.equals(SPURLISTNAME)) {
                            this.spur_match.put(substring, substring2);
                        }
                    }
                }
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(bomStrippingInputStreamReader);
            throw th;
        }
    }

    private boolean pairwise_person_name_match(String str, String str2) {
        String str3;
        String str4;
        if (str.length() > str2.length()) {
            str3 = str;
            str4 = str2;
        } else {
            str3 = str2;
            str4 = str;
        }
        if (this.rules.get(0).value(str3, str4)) {
            return false;
        }
        if (str3.equals(str4) || this.rules.get(2).value(str3, str4) || this.rules.get(3).value(str3, str4)) {
            return true;
        }
        return this.rules.get(0).value(str3, str4);
    }

    private boolean basic_person_match_criteria(String str, String str2, boolean[] zArr) {
        return OrthoMatcherHelper.executeDisjunction(this.rules, new int[]{1, 5, 6, 13, 15, 16}, str2, str, zArr);
    }

    private boolean apply_rules_namematch(String str, String str2, String str3, Annotation annotation, Annotation annotation2, boolean z) {
        boolean[] zArr = new boolean[this.rules.size()];
        if (this.rules.get(0).value(str3, str2)) {
            return false;
        }
        if (OrthoMatcherHelper.executeDisjunction(this.rules, new int[]{2, 3}, str3, str2, zArr)) {
            return true;
        }
        if (str.equals(this.organizationType) || str.equals("Facility")) {
            if (!this.highPrecisionOrgs.booleanValue() && OrthoMatcherHelper.executeDisjunction(this.rules, new int[]{4, 6, 7, 8, 9, 10, 11, 12, 14}, str3, str2, zArr)) {
                return true;
            }
            if (this.highPrecisionOrgs.booleanValue() && OrthoMatcherHelper.executeDisjunction(this.rules, new int[]{7, 8, 10, 11, 17}, str3, str2, zArr)) {
                return true;
            }
        }
        if (!str.equals(this.personType) || noMatchRule1(str3, str2, annotation, z) || noMatchRule2(str3, str2) || !basic_person_match_criteria(str2, str3, zArr)) {
            return false;
        }
        if (str3.length() != str2.length() && (zArr[4] || zArr[5] || zArr[14] || zArr[15])) {
            if (z) {
                annotation2.getFeatures().put("matchedWithLonger", true);
                return true;
            }
            annotation.getFeatures().put("matchedWithLonger", true);
            return true;
        }
        if (str3.length() != str2.length() || !zArr[1] || !annotation.getFeatures().containsKey("matchedWithLonger")) {
            return true;
        }
        annotation2.getFeatures().put("matchedWithLonger", true);
        return true;
    }

    @CreoleParameter(comment = "External lists otherwise internal", defaultValue = "true")
    @Optional
    public void setExtLists(Boolean bool) {
        this.extLists = bool.booleanValue();
    }

    @CreoleParameter(comment = "Should this resource diferentiate on case?", defaultValue = "false")
    @Optional
    public void setCaseSensitive(Boolean bool) {
        this.caseSensitive = bool.booleanValue();
    }

    @CreoleParameter(comment = "Annotation set name where are the annotation types (annotationTypes)")
    @RunTime
    @Optional
    public void setAnnotationSetName(String str) {
        this.annotationSetName = str;
    }

    @CreoleParameter(comment = "Name of the annotation types to use", defaultValue = "Organization;Person;Location;Date")
    @RunTime
    @Optional
    public void setAnnotationTypes(List<String> list) {
        this.annotationTypes = list;
    }

    @CreoleParameter(comment = "Should we process 'Unknown' annotations?", defaultValue = "true")
    @Optional
    public void setProcessUnknown(Boolean bool) {
        this.matchingUnknowns = bool.booleanValue();
    }

    @CreoleParameter(comment = "Annotation name for the organizations", defaultValue = ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE)
    @Optional
    public void setOrganizationType(String str) {
        this.organizationType = str;
    }

    @CreoleParameter(comment = "Annotation name for the persons", defaultValue = ANNIEConstants.PERSON_ANNOTATION_TYPE)
    @Optional
    public void setPersonType(String str) {
        this.personType = str;
    }

    public String getAnnotationSetName() {
        return this.annotationSetName;
    }

    public List<String> getAnnotationTypes() {
        return this.annotationTypes;
    }

    public String getOrganizationType() {
        return this.organizationType;
    }

    public String getPersonType() {
        return this.personType;
    }

    public Boolean getExtLists() {
        return new Boolean(this.extLists);
    }

    public Boolean getCaseSensitive() {
        return new Boolean(this.caseSensitive);
    }

    public Boolean getProcessUnknown() {
        return new Boolean(this.matchingUnknowns);
    }

    public boolean noMatchRule1(String str, String str2, Annotation annotation, boolean z) {
        return !z && annotation.getFeatures().containsKey("matchedWithLonger");
    }

    private boolean detectBadMiddleTokens(ArrayList<Annotation> arrayList) {
        for (int i = 1; i < arrayList.size() - 1; i++) {
            if (badMiddleTokens.matcher(((String) arrayList.get(i).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME)).toLowerCase().trim()).find()) {
                return true;
            }
        }
        return false;
    }

    public boolean noMatchRule2(String str, String str2) {
        if (this.normalizedTokensLongAnnot.size() <= 2 || this.normalizedTokensShortAnnot.size() <= 2) {
            return false;
        }
        boolean z = false;
        if (this.normalizedTokensLongAnnot.size() == this.normalizedTokensShortAnnot.size()) {
            int i = 1;
            while (true) {
                if (i >= this.normalizedTokensLongAnnot.size() - 1) {
                    break;
                }
                String str3 = (String) this.normalizedTokensLongAnnot.get(i).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
                String str4 = (String) this.normalizedTokensShortAnnot.get(i).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
                if (!this.caseSensitive) {
                    str3 = str3.toLowerCase();
                    str4 = str4.toLowerCase();
                }
                if (!this.rules.get(1).value(str3, str4) && !OrthoMatcherHelper.initialMatch(str3, str4)) {
                    z = true;
                    break;
                }
                i++;
            }
        } else {
            String str5 = (String) this.normalizedTokensLongAnnot.get(0).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            String str6 = (String) this.normalizedTokensShortAnnot.get(0).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            String str7 = (String) this.normalizedTokensLongAnnot.get(this.normalizedTokensLongAnnot.size() - 1).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            String str8 = (String) this.normalizedTokensShortAnnot.get(this.normalizedTokensShortAnnot.size() - 1).getFeatures().get(ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
            if (this.rules.get(1).value(str5, str6) && this.rules.get(1).value(str7, str8)) {
                if (detectBadMiddleTokens(this.tokensLongAnnot) || detectBadMiddleTokens(this.tokensShortAnnot)) {
                    return false;
                }
                z = true;
            }
        }
        if (!z || log.isDebugEnabled()) {
        }
        return z;
    }

    @CreoleParameter(comment = "The URL to the definition file", defaultValue = "resources/othomatcher/listsNM.def", suffixes = "def")
    public void setDefinitionFileURL(URL url) {
        this.definitionFileURL = url;
    }

    public URL getDefinitionFileURL() {
        return this.definitionFileURL;
    }

    @CreoleParameter(comment = "The encoding used for reading the definition file", defaultValue = "UTF-8")
    public void setEncoding(String str) {
        this.encoding = str;
    }

    public String getEncoding() {
        return this.encoding;
    }

    public Double getMinimumNicknameLikelihood() {
        return this.minimumNicknameLikelihood;
    }

    @CreoleParameter(comment = "Minimum likelihood that a name is a nickname", defaultValue = "0.50")
    public void setMinimumNicknameLikelihood(Double d) {
        this.minimumNicknameLikelihood = d;
    }

    public Boolean getHighPrecisionOrgs() {
        return this.highPrecisionOrgs;
    }

    @CreoleParameter(comment = "Use very safe features for matching orgs, such as ACME = ACME, Inc.", defaultValue = "false")
    @Optional
    public void setHighPrecisionOrgs(Boolean bool) {
        this.highPrecisionOrgs = bool;
    }

    public void setOrthography(AnnotationOrthography annotationOrthography) {
        this.orthoAnnotation = annotationOrthography;
    }

    public AnnotationOrthography getOrthography() {
        return this.orthoAnnotation;
    }
}
