package liner2.chunker;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Pattern;
import liner2.structure.Annotation;
import liner2.structure.AnnotationSet;
import liner2.structure.Document;
import liner2.structure.Paragraph;
import liner2.structure.Sentence;
import liner2.structure.Token;
import liner2.structure.TokenAttributeIndex;
import liner2.tools.ParameterException;

/* loaded from: input_file:liner2/chunker/HeuristicChunker.class */
public class HeuristicChunker extends Chunker {
    private Pattern romanNumer;
    private ArrayList<String> rules;

    public HeuristicChunker() {
        this.romanNumer = Pattern.compile("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$");
        this.rules = null;
    }

    public HeuristicChunker(String[] strArr) throws ParameterException {
        this.romanNumer = Pattern.compile("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$");
        this.rules = null;
        this.rules = new ArrayList<>();
        for (String str : strArr) {
            if (!str.equals("general-ign-dict") && !str.equals("general-camel-base") && !str.equals("person") && !str.equals("city") && !str.equals("road") && !str.equals("road-prefix") && !str.equals("nam")) {
                throw new ParameterException("HeuristicChunker: unknown heuristic " + str);
            }
            this.rules.add(str);
        }
    }

    private AnnotationSet chunkSentence(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        if (ruleActive("general-ign-dict")) {
            annotationSet.union(ruleGeneralIgnDict(sentence));
        }
        if (ruleActive("general-camel-base")) {
            annotationSet.union(ruleGeneralCamelBase(sentence));
        }
        if (ruleActive("road-prefix")) {
            annotationSet.union(ruleRoadPrefix(sentence));
        }
        if (ruleActive("person")) {
            annotationSet.union(rulePersonPanFirstLastNoun(sentence));
            annotationSet.union(rulePersonPanInitialLast(sentence));
            annotationSet.union(rulePersonFirstLastMaiden(sentence));
            annotationSet.union(rulePersonNounFirstLast(sentence));
            annotationSet.union(rulePersonNounFirstInitialLast(sentence));
        }
        if (ruleActive("city")) {
            annotationSet.union(ruleCityPrefix(sentence));
            annotationSet.union(ruleCityPostal(sentence));
        }
        if (ruleActive("road")) {
            annotationSet.union(ruleRoadPrefixNumber(sentence));
        }
        if (ruleActive("nam")) {
            annotationSet.union(ruleNamUpperCamelCase(sentence));
            annotationSet.union(ruleNamParanthesis(sentence));
            annotationSet.union(ruleNamAllUpper(sentence));
        }
        return annotationSet;
    }

    private AnnotationSet ruleNamUpperCamelCase(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        for (int i = 0; i < tokens.size(); i++) {
            if (attributeIndex.getAttributeValue(tokens.get(i), "pattern").equals("UPPER_CAMEL_CASE")) {
                annotationSet.addChunk(new Annotation(i, "NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet ruleNamParanthesis(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int i = 1;
        while (i < tokens.size()) {
            String attributeValue = attributeIndex.getAttributeValue(tokens.get(i), "orth");
            if (i + 2 < tokens.size() && ((attributeValue.equals("„") || attributeValue.equals("“") || attributeValue.equals("\"") || attributeValue.equals("&quot;")) && attributeIndex.getAttributeValue(tokens.get(i + 1), "starts_with_upper_case").equals("1"))) {
                int i2 = i + 1;
                boolean z = false;
                while (i2 < tokens.size() && !z) {
                    String attributeValue2 = attributeIndex.getAttributeValue(tokens.get(i2), "orth");
                    z = attributeValue2.equals("”") || attributeValue2.equals("\"") || attributeValue.equals("&quot;");
                    i2++;
                }
                if (z && i + 1 <= i2 - 2 && i2 - i < 5) {
                    annotationSet.addChunk(new Annotation(i + 1, i2 - 2, "NAM", sentence));
                    i = i2 - 1;
                }
            }
            i++;
        }
        return annotationSet;
    }

    private AnnotationSet ruleNamAllUpper(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("has_lower_case");
        int index2 = attributeIndex.getIndex("pattern");
        int index3 = attributeIndex.getIndex("orth");
        boolean z = false;
        for (int i = 0; i < tokens.size() && !z; i++) {
            z = tokens.get(i).getAttributeValue(index).equals("1");
        }
        if (!z) {
            return annotationSet;
        }
        for (int i2 = 0; i2 < tokens.size(); i2++) {
            if (tokens.get(i2).getAttributeValue(index2).equals("ALL_UPPER") && tokens.get(i2).getAttributeValue(index3).length() > 2) {
                int i3 = i2;
                while (i3 < tokens.size() && tokens.get(i3).getAttributeValue(index2).equals("ALL_UPPER") && tokens.get(i3).getAttributeValue(index3).length() > 2) {
                    i3++;
                }
                if (i3 - i2 > 1 || !this.romanNumer.matcher(attributeIndex.getAttributeValue(tokens.get(i2), "orth")).find()) {
                    annotationSet.addChunk(new Annotation(i2, i3 - 1, "NAM", sentence));
                }
            }
        }
        return annotationSet;
    }

    private boolean ruleActive(String str) {
        return this.rules == null || this.rules.indexOf(str) > -1;
    }

    private AnnotationSet ruleGeneralIgnDict(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        for (int i = 0; i < tokens.size(); i++) {
            Token token = tokens.get(i);
            if (attributeIndex.getAttributeValue(tokens.get(i), "class").equals("ign")) {
                int i2 = -1;
                int i3 = 0;
                while (true) {
                    if (i3 >= attributeIndex.getLength()) {
                        break;
                    }
                    if (attributeIndex.getName(i3).endsWith("_nam")) {
                        if (!token.getAttributeValue(i3).equals("B")) {
                            if (token.getAttributeValue(i3).equals("I")) {
                                i2 = -1;
                                break;
                            }
                        } else {
                            if (i2 != -1) {
                                i2 = -1;
                                break;
                            }
                            i2 = i3;
                        }
                    }
                    i3++;
                }
                if (i2 > -1) {
                    int i4 = i;
                    while (i4 + 1 < tokens.size() && tokens.get(i4 + 1).getAttributeValue(i2).equals("I")) {
                        i4++;
                    }
                    annotationSet.addChunk(new Annotation(i, i4, attributeIndex.getName(i2).toUpperCase(), sentence));
                }
            }
        }
        return annotationSet;
    }

    private AnnotationSet ruleGeneralCamelBase(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        for (int i = 0; i < tokens.size(); i++) {
            Token token = tokens.get(i);
            if (attributeIndex.getAttributeValue(token, "base").matches("\\p{Lu}\\p{Ll}*")) {
                int i2 = -1;
                int i3 = 0;
                while (true) {
                    if (i3 >= attributeIndex.getLength()) {
                        break;
                    }
                    if (attributeIndex.getName(i3).endsWith("_nam")) {
                        if (!token.getAttributeValue(i3).equals("B")) {
                            if (token.getAttributeValue(i3).equals("I")) {
                                i2 = -1;
                                break;
                            }
                        } else {
                            if (i2 != -1) {
                                i2 = -1;
                                break;
                            }
                            i2 = i3;
                        }
                    }
                    i3++;
                }
                if (i2 > -1) {
                    int i4 = i;
                    while (i4 + 1 < tokens.size() && tokens.get(i4 + 1).getAttributeValue(i2).equals("I")) {
                        i4++;
                    }
                    annotationSet.addChunk(new Annotation(i, i4, attributeIndex.getName(i2).toUpperCase(), sentence));
                }
            }
        }
        return annotationSet;
    }

    private AnnotationSet ruleRoadPrefix(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        for (int i = 1; i < tokens.size(); i++) {
            if ((attributeIndex.getAttributeValue(tokens.get(i - 1), "road_prefix").equals("B") || (attributeIndex.getAttributeValue(tokens.get(i - 1), "class").equals("interp") && i != 1 && attributeIndex.getAttributeValue(tokens.get(i - 2), "road_prefix").equals("B"))) && attributeIndex.getAttributeValue(tokens.get(i), "road_nam").equals("B")) {
                int i2 = i;
                while (i2 + 1 < tokens.size() && attributeIndex.getAttributeValue(tokens.get(i2 + 1), "road_nam").equals("I")) {
                    i2++;
                }
                annotationSet.addChunk(new Annotation(i, i2, "ROAD_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonPanFirstLastNoun(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_first_nam");
        int index2 = attributeIndex.getIndex("person_last_nam");
        int index3 = attributeIndex.getIndex("person_noun");
        int index4 = attributeIndex.getIndex("base");
        int i = 0;
        while (i + 2 < tokens.size()) {
            if ((tokens.get(i).getAttributeValue(index4).toLowerCase().equals("pan") || tokens.get(i).getAttributeValue(index4).toLowerCase().equals("pani")) && tokens.get(i + 1).getAttributeValue(index).equals("B") && tokens.get(i + 2).getAttributeValue(index2).equals("B") && (i + 3 == tokens.size() || tokens.get(i + 3).getAttributeValue(index3).equals("B"))) {
                annotationSet.addChunk(new Annotation(i + 1, "PERSON_FIRST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 2, "PERSON_LAST_NAM", sentence));
                i += 2;
            }
            i++;
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonPanInitialLast(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_last_nam");
        int index2 = attributeIndex.getIndex("pattern");
        int index3 = attributeIndex.getIndex("base");
        int index4 = attributeIndex.getIndex("orth");
        int i = 0;
        while (i + 5 < tokens.size()) {
            if ((tokens.get(i).getAttributeValue(index3).toLowerCase().equals("pan") || tokens.get(i).getAttributeValue(index3).toLowerCase().equals("pani")) && tokens.get(i + 1).getAttributeValue(index2).equals("ALL_UPPER") && tokens.get(i + 1).getAttributeValue(index4).length() == 1 && tokens.get(i + 2).getAttributeValue(index4).equals(".") && tokens.get(i + 3).getAttributeValue(index2).equals("ALL_UPPER") && tokens.get(i + 3).getAttributeValue(index4).length() == 1 && tokens.get(i + 4).getAttributeValue(index4).equals(".") && tokens.get(i + 5).getAttributeValue(index).equals("B")) {
                annotationSet.addChunk(new Annotation(i + 5, "PERSON_LAST_NAM", sentence));
                i += 5;
            }
            i++;
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonFirstLast(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_last_nam");
        int index2 = attributeIndex.getIndex("person_first_nam");
        for (int i = 0; i + 1 < tokens.size(); i++) {
            if (tokens.get(i).getAttributeValue(index2).equals("B") && tokens.get(i).getAttributeValue(index).equals("O") && tokens.get(i + 1).getAttributeValue(index).equals("B") && tokens.get(i + 1).getAttributeValue(index2).equals("O") && (i + 2 == tokens.size() || tokens.get(i + 2).getAttributeValue(index).equals("O"))) {
                annotationSet.addChunk(new Annotation(i, "PERSON_FIRST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 1, "PERSON_LAST_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonFirstLastMaiden(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_last_nam");
        int index2 = attributeIndex.getIndex("person_first_nam");
        int index3 = attributeIndex.getIndex("person_noun");
        int index4 = attributeIndex.getIndex("starts_with_lower_case");
        int index5 = attributeIndex.getIndex("orth");
        for (int i = 0; i + 4 < tokens.size(); i++) {
            if (tokens.get(i).getAttributeValue(index2).equals("B") && tokens.get(i + 1).getAttributeValue(index).equals("B") && tokens.get(i + 2).getAttributeValue(index5).equals("-") && tokens.get(i + 3).getAttributeValue(index).equals("B") && tokens.get(i + 3).getAttributeValue(index3).equals("O") && (i + 5 == tokens.size() || tokens.get(i + 5).getAttributeValue(index4).equals("1"))) {
                annotationSet.addChunk(new Annotation(i, "PERSON_FIRST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 1, "PERSON_LAST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 3, "PERSON_LAST_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonNounFirstLast(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_last_nam");
        int index2 = attributeIndex.getIndex("person_first_nam");
        int index3 = attributeIndex.getIndex("starts_with_upper_case");
        int index4 = attributeIndex.getIndex("person_noun");
        for (int i = 0; i + 2 < tokens.size(); i++) {
            if (tokens.get(i).getAttributeValue(index4).equals("B") && tokens.get(i + 1).getAttributeValue(index2).equals("B") && tokens.get(i + 2).getAttributeValue(index).equals("B") && tokens.get(i + 2).getAttributeValue(index2).equals("O") && (i + 3 == tokens.size() || tokens.get(i + 3).getAttributeValue(index3).equals("0"))) {
                annotationSet.addChunk(new Annotation(i + 1, "PERSON_FIRST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 2, "PERSON_LAST_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet rulePersonNounFirstInitialLast(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("person_last_nam");
        int index2 = attributeIndex.getIndex("person_first_nam");
        int index3 = attributeIndex.getIndex("person_noun");
        int index4 = attributeIndex.getIndex("pattern");
        int index5 = attributeIndex.getIndex("orth");
        for (int i = 0; i + 4 < tokens.size(); i++) {
            if (tokens.get(i).getAttributeValue(index3).equals("B") && tokens.get(i + 1).getAttributeValue(index2).equals("B") && tokens.get(i + 2).getAttributeValue(index4).equals("ALL_UPPER") && tokens.get(i + 2).getAttributeValue(index5).length() == 1 && tokens.get(i + 3).getAttributeValue(index5).equals(".") && tokens.get(i + 4).getAttributeValue(index).equals("B")) {
                annotationSet.addChunk(new Annotation(i + 1, "PERSON_FIRST_NAM", sentence));
                annotationSet.addChunk(new Annotation(i + 4, "PERSON_LAST_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet ruleCityPrefix(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("base");
        int index2 = attributeIndex.getIndex("city_nam");
        int index3 = attributeIndex.getIndex("case");
        for (int i = 0; i + 1 < tokens.size(); i++) {
            if ((tokens.get(i).getAttributeValue(index).equals("gmina") || tokens.get(i).getAttributeValue(index).equals("gmin") || tokens.get(i).getAttributeValue(index).equals("miasto")) && tokens.get(i + 1).getAttributeValue(index2).equals("B") && tokens.get(i + 1).getAttributeValue(index3).equals("nom") && !tokens.get(i + 1).getAttributeValue(index).equals("miasto")) {
                int i2 = i + 1;
                while (i2 + 1 < tokens.size() && tokens.get(i2 + 1).getAttributeValue(index2).equals("I")) {
                    i2++;
                }
                annotationSet.addChunk(new Annotation(i + 1, i2, "CITY_NAM", sentence));
            }
        }
        return annotationSet;
    }

    private AnnotationSet ruleCityPostal(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("orth");
        int index2 = attributeIndex.getIndex("city_nam");
        int index3 = attributeIndex.getIndex("pattern");
        int i = 0;
        while (i + 3 < tokens.size()) {
            if (tokens.get(i).getAttributeValue(index3).equals("DIGITS") && tokens.get(i).getAttributeValue(index).length() == 2 && tokens.get(i + 1).getAttributeValue(index).equals("-") && tokens.get(i + 2).getAttributeValue(index3).equals("DIGITS") && tokens.get(i + 2).getAttributeValue(index).length() == 3 && tokens.get(i + 3).getAttributeValue(index2).equals("B")) {
                int i2 = i + 3;
                while (i2 + 1 < tokens.size() && tokens.get(i2 + 1).getAttributeValue(index2).equals("I")) {
                    i2++;
                }
                annotationSet.addChunk(new Annotation(i + 3, i2, "CITY_NAM", sentence));
                i = i2;
            }
            i++;
        }
        return annotationSet;
    }

    private AnnotationSet ruleRoadPrefixNumber(Sentence sentence) {
        AnnotationSet annotationSet = new AnnotationSet(sentence);
        ArrayList<Token> tokens = sentence.getTokens();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        int index = attributeIndex.getIndex("orth");
        int index2 = attributeIndex.getIndex("pattern");
        for (int i = 0; i + 3 < tokens.size(); i++) {
            if (tokens.get(i).getAttributeValue(index).toLowerCase().equals("ul") && tokens.get(i + 1).getAttributeValue(index).equals(".") && tokens.get(i + 2).getAttributeValue(index2).equals("UPPER_INIT") && tokens.get(i + 3).getAttributeValue(index2).equals("DIGITS")) {
                annotationSet.addChunk(new Annotation(i + 2, "ROAD_NAM", sentence));
                if (i + 6 < tokens.size() && tokens.get(i + 4).getAttributeValue(index).equals("/") && tokens.get(i + 5).getAttributeValue(index2).equals("UPPER_INIT") && tokens.get(i + 6).getAttributeValue(index2).equals("DIGITS")) {
                    annotationSet.addChunk(new Annotation(i + 5, "ROAD_NAM", sentence));
                }
            }
        }
        return annotationSet;
    }

    @Override // liner2.chunker.Chunker
    public HashMap<Sentence, AnnotationSet> chunk(Document document) {
        HashMap<Sentence, AnnotationSet> hashMap = new HashMap<>();
        Iterator<Paragraph> it = document.getParagraphs().iterator();
        while (it.hasNext()) {
            Iterator<Sentence> it2 = it.next().getSentences().iterator();
            while (it2.hasNext()) {
                Sentence next = it2.next();
                hashMap.put(next, chunkSentence(next));
            }
        }
        return hashMap;
    }
}
