package liner2.features;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import liner2.features.annotations.AnnotationFeature;
import liner2.features.annotations.AnnotationFeatureClosestBase;
import liner2.features.annotations.AnnotationFeatureContextBase;
import liner2.features.annotations.AnnotationFeatureDict;
import liner2.features.annotations.AnnotationFeatureMalt;
import liner2.features.annotations.AnnotationFeatureNeFirstBase;
import liner2.features.annotations.AnnotationSentenceFeature;
import liner2.structure.Annotation;
import liner2.structure.Sentence;
import liner2.structure.Token;
import liner2.structure.TokenAttributeIndex;
import weka.core.xml.XMLDocument;

/* loaded from: input_file:liner2/features/AnnotationFeatureGenerator.class */
public class AnnotationFeatureGenerator {
    private List<AnnotationFeature> features = new ArrayList();
    private List<AnnotationFeatureMalt> maltFeatures = new ArrayList();
    private List<AnnotationSentenceFeature> sentenceFeatures = new ArrayList();
    private HashMap<String, String> nkjpToCoNLLPos = getnkjpToCoNLLPos();
    private Pattern patternBase = Pattern.compile("base:(-?[0-9]*)$");
    private Pattern patternDict = Pattern.compile("dict:([^:]*):([^:]*)$");
    private Pattern patternMalt = Pattern.compile("malt:([^:]*):([0-9]*):(base|relation)$");
    private Pattern patternClosestBase = Pattern.compile("closest-base:(-?[0-9]*):([a-z]+)$");
    private Pattern patternNeFirstBase = Pattern.compile("ne-first-base:(-?[0-9]*):([a-z]+)$");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:liner2/features/AnnotationFeatureGenerator$MaltFeatureSentence.class */
    public final class MaltFeatureSentence {
        private final String[] maltData;
        private final HashMap<Annotation, Integer> annotationIndices;

        public MaltFeatureSentence(String[] strArr, HashMap<Annotation, Integer> hashMap) {
            this.maltData = strArr;
            this.annotationIndices = hashMap;
        }

        public String[] getMaltData() {
            return this.maltData;
        }

        public HashMap<Annotation, Integer> getAnnotationIndices() {
            return this.annotationIndices;
        }
    }

    public AnnotationFeatureGenerator(List<String> list) {
        for (String str : list) {
            Matcher matcher = this.patternBase.matcher(str);
            if (matcher.find()) {
                this.features.add(new AnnotationFeatureContextBase(Integer.parseInt(matcher.group(1))));
            } else {
                Matcher matcher2 = this.patternDict.matcher(str);
                if (matcher2.find()) {
                    this.features.add(new AnnotationFeatureDict(matcher2.group(2), matcher2.group(1)));
                } else {
                    Matcher matcher3 = this.patternMalt.matcher(str);
                    if (matcher3.find()) {
                        this.maltFeatures.add(new AnnotationFeatureMalt(matcher3.group(1), Integer.parseInt(matcher3.group(2)), matcher3.group(3)));
                    } else {
                        Matcher matcher4 = this.patternClosestBase.matcher(str);
                        if (matcher4.find()) {
                            this.sentenceFeatures.add(new AnnotationFeatureClosestBase(matcher4.group(2), Integer.parseInt(matcher4.group(1))));
                        } else {
                            Matcher matcher5 = this.patternNeFirstBase.matcher(str);
                            if (matcher5.find()) {
                                this.sentenceFeatures.add(new AnnotationFeatureNeFirstBase(matcher5.group(2), Integer.parseInt(matcher5.group(1))));
                            }
                        }
                    }
                }
            }
        }
    }

    public List<String> generate(Annotation annotation) {
        ArrayList arrayList = new ArrayList();
        Iterator<AnnotationFeature> it = this.features.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().generate(annotation));
        }
        return arrayList;
    }

    public List<HashMap<Annotation, String>> generate(Sentence sentence, HashSet<Annotation> hashSet) {
        ArrayList arrayList = new ArrayList();
        if (!this.maltFeatures.isEmpty()) {
            MaltFeatureSentence prepareSentenceForMaltparser = prepareSentenceForMaltparser(sentence, hashSet);
            Iterator<AnnotationFeatureMalt> it = this.maltFeatures.iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().generate(prepareSentenceForMaltparser.getMaltData(), prepareSentenceForMaltparser.getAnnotationIndices()));
            }
        }
        Iterator<AnnotationSentenceFeature> it2 = this.sentenceFeatures.iterator();
        while (it2.hasNext()) {
            arrayList.add(it2.next().generate(sentence, hashSet));
        }
        return arrayList;
    }

    public int getFeaturesCount() {
        return this.features.size() + this.maltFeatures.size() + this.sentenceFeatures.size();
    }

    public List<String[]> convertToCoNLL(Sentence sentence) {
        ArrayList arrayList = new ArrayList();
        ListIterator<Token> listIterator = sentence.getTokens().listIterator();
        TokenAttributeIndex attributeIndex = sentence.getAttributeIndex();
        while (listIterator.hasNext()) {
            String[] strArr = new String[8];
            strArr[0] = String.valueOf(listIterator.nextIndex() + 1);
            Token next = listIterator.next();
            strArr[1] = next.getAttributeValue(attributeIndex.getIndex("orth"));
            strArr[2] = next.getAttributeValue(attributeIndex.getIndex("base"));
            List asList = Arrays.asList(next.getAttributeValue(attributeIndex.getIndex("ctag")).split(":"));
            String str = (String) asList.get(0);
            strArr[3] = this.nkjpToCoNLLPos.get(str);
            strArr[4] = str;
            String join = join(asList.subList(1, asList.size()), XMLDocument.DTD_SEPARATOR);
            strArr[5] = join.length() != 0 ? join.toString() : "_";
            strArr[6] = "_";
            strArr[7] = "_";
            arrayList.add(strArr);
        }
        return arrayList;
    }

    public MaltFeatureSentence prepareSentenceForMaltparser(Sentence sentence, HashSet<Annotation> hashSet) {
        List<String[]> convertToCoNLL = convertToCoNLL(sentence);
        HashMap hashMap = new HashMap();
        Iterator<Annotation> it = hashSet.iterator();
        while (it.hasNext()) {
            Annotation next = it.next();
            hashMap.put(Integer.valueOf(next.getBegin()), next);
        }
        int i = 1;
        HashMap hashMap2 = new HashMap();
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (i2 < convertToCoNLL.size()) {
            if (hashMap.containsKey(Integer.valueOf(i2))) {
                Annotation annotation = (Annotation) hashMap.get(Integer.valueOf(i2));
                if (annotation.getEnd() != i2) {
                    new ArrayList();
                    boolean z = false;
                    int i3 = i2;
                    Iterator<Integer> it2 = annotation.getTokens().iterator();
                    while (it2.hasNext()) {
                        Integer next2 = it2.next();
                        if (!z && convertToCoNLL.get(next2.intValue())[4].equals("subst")) {
                            i3 = i2;
                            z = true;
                        }
                    }
                    i2 = annotation.getEnd();
                    String[] strArr = convertToCoNLL.get(i3);
                    strArr[0] = String.valueOf(i);
                    strArr[1] = annotation.getText();
                    strArr[2] = annotation.getBaseText();
                    arrayList.add(strArr);
                    hashMap2.put(annotation, Integer.valueOf(arrayList.size() - 1));
                } else {
                    String[] strArr2 = convertToCoNLL.get(i2);
                    strArr2[0] = String.valueOf(i);
                    arrayList.add(strArr2);
                    hashMap2.put(annotation, Integer.valueOf(i - 1));
                }
            } else {
                String[] strArr3 = convertToCoNLL.get(i2);
                strArr3[0] = String.valueOf(i);
                arrayList.add(strArr3);
            }
            i++;
            i2++;
        }
        String[] strArr4 = new String[arrayList.size()];
        for (int i4 = 0; i4 < arrayList.size(); i4++) {
            strArr4[i4] = join(Arrays.asList((Object[]) arrayList.get(i4)), "\t");
        }
        return new MaltFeatureSentence(strArr4, hashMap2);
    }

    public String join(List<String> list, String str) {
        StringBuilder sb = new StringBuilder();
        boolean z = true;
        for (String str2 : list) {
            if (z) {
                z = false;
            } else {
                sb.append(str);
            }
            sb.append(str2);
        }
        return sb.toString();
    }

    public static HashMap<String, String> getnkjpToCoNLLPos() {
        HashMap<String, String> hashMap = new HashMap<>();
        hashMap.put("bedzie", "verb");
        hashMap.put("fin", "verb");
        hashMap.put("imps", "verb");
        hashMap.put("impt", "verb");
        hashMap.put("inf", "verb");
        hashMap.put("praet", "verb");
        hashMap.put("pred", "verb");
        hashMap.put("winien", "verb");
        hashMap.put("subst", "subst");
        hashMap.put("depr", "subst");
        hashMap.put("ger", "subst");
        hashMap.put("ppron12", "subst");
        hashMap.put("ppron3", "subst");
        hashMap.put("siebie", "subst");
        hashMap.put("adj", "adj");
        hashMap.put("adja", "adj");
        hashMap.put("adjc", "adj");
        hashMap.put("adjp", "adj");
        hashMap.put("pact", "adj");
        hashMap.put("ppas", "adj");
        hashMap.put("adv", "adv");
        hashMap.put("pant", "adv");
        hashMap.put("pcon", "adv");
        hashMap.put("aglt", "aglt");
        hashMap.put("brev", "brev");
        hashMap.put("burk", "burk");
        hashMap.put("comp", "comp");
        hashMap.put("conj", "conj");
        hashMap.put("ign", "ign");
        hashMap.put("interj", "interj");
        hashMap.put("interp", "interp");
        hashMap.put("num", "num");
        hashMap.put("numcol", "numcol");
        hashMap.put("prep", "prep");
        hashMap.put("qub", "qub");
        hashMap.put("xxx", "xxx");
        return hashMap;
    }
}
