package com.kotlinnlp.tokensencoder.morpho;

import com.kotlinnlp.linguisticdescription.lexicon.LexiconDictionary;
import com.kotlinnlp.linguisticdescription.morphology.Morphology;
import com.kotlinnlp.morphologicalanalyzer.MorphologicalAnalysis;
import com.kotlinnlp.morphologicalanalyzer.MorphologicalAnalyzer;
import com.kotlinnlp.morphologicalanalyzer.MultiWordsMorphology;
import com.kotlinnlp.morphologicalanalyzer.dictionary.MorphologyEntry;
import com.kotlinnlp.neuraltokenizer.Token;
import com.kotlinnlp.tokensencoder.morpho.extractors.MorphoFeaturesExtractor;
import com.kotlinnlp.tokensencoder.morpho.extractors.MorphoFeaturesExtractorBuilder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.collections.CollectionsKt;
import kotlin.collections.IntIterator;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: FeaturesExtractor.kt */
@Metadata(mv = {1, 1, 10}, bv = {1, 0, 2}, k = 1, d1 = {"��N\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010 \n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0002\n��\n\u0002\u0010#\n��\n\u0002\u0010\"\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\u0018��2\u00020\u0001B-\u0012\f\u0010\u0002\u001a\b\u0012\u0004\u0012\u00020\u00040\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0006\u0012\u0006\u0010\u0007\u001a\u00020\b\u0012\b\u0010\t\u001a\u0004\u0018\u00010\n¢\u0006\u0002\u0010\u000bJ\u001c\u0010\u0010\u001a\u00020\u00112\u0012\u0010\u0012\u001a\u000e\u0012\n\u0012\b\u0012\u0004\u0012\u00020\b0\u00130\u0003H\u0002J\u0012\u0010\u0014\u001a\u000e\u0012\n\u0012\b\u0012\u0004\u0012\u00020\b0\u00150\u0003J\u0012\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\b0\u0003*\u00020\u0017H\u0002J\u0018\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u000f0\u0003*\b\u0012\u0004\u0012\u00020\u00040\u0003H\u0002R\u000e\u0010\f\u001a\u00020\rX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u0004¢\u0006\u0002\n��R\u0010\u0010\t\u001a\u0004\u0018\u00010\nX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u000f0\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0002\u001a\b\u0012\u0004\u0012\u00020\u00040\u0003X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u0019"}, d2 = {"Lcom/kotlinnlp/tokensencoder/morpho/FeaturesExtractor;", "", "tokens", "", "Lcom/kotlinnlp/neuralparser/language/Token;", "analyzer", "Lcom/kotlinnlp/morphologicalanalyzer/MorphologicalAnalyzer;", "langCode", "", "lexicalDictionary", "Lcom/kotlinnlp/linguisticdescription/lexicon/LexiconDictionary;", "(Ljava/util/List;Lcom/kotlinnlp/morphologicalanalyzer/MorphologicalAnalyzer;Ljava/lang/String;Lcom/kotlinnlp/linguisticdescription/lexicon/LexiconDictionary;)V", "analysis", "Lcom/kotlinnlp/morphologicalanalyzer/MorphologicalAnalysis;", "tkTokens", "Lcom/kotlinnlp/neuraltokenizer/Token;", "addMWEFeatures", "", "tokensFeatures", "", "extractFeatures", "", "toFeatures", "Lcom/kotlinnlp/morphologicalanalyzer/dictionary/MorphologyEntry;", "toTKTokens", "tokensencoder"})
/* loaded from: input_file:com/kotlinnlp/tokensencoder/morpho/FeaturesExtractor.class */
public final class FeaturesExtractor {
    private final List<Token> tkTokens;
    private final MorphologicalAnalysis analysis;
    private final List<com.kotlinnlp.neuralparser.language.Token> tokens;
    private final MorphologicalAnalyzer analyzer;
    private final String langCode;
    private final LexiconDictionary lexicalDictionary;

    @NotNull
    public final List<Set<String>> extractFeatures() {
        ArrayList arrayList = new ArrayList();
        List zip = CollectionsKt.zip(this.analysis.getTokens(), this.tkTokens);
        ArrayList<Pair> arrayList2 = new ArrayList();
        for (Object obj : zip) {
            if (!((Token) ((Pair) obj).component2()).isSpace()) {
                arrayList2.add(obj);
            }
        }
        for (Pair pair : arrayList2) {
            List list = (List) pair.component1();
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            if (list != null) {
                List list2 = list;
                ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
                Iterator it = list2.iterator();
                while (it.hasNext()) {
                    arrayList3.add(Boolean.valueOf(linkedHashSet.addAll(toFeatures((MorphologyEntry) it.next()))));
                }
            } else {
                linkedHashSet.add("i:0 _");
            }
            arrayList.add(linkedHashSet);
        }
        addMWEFeatures(arrayList);
        return arrayList;
    }

    private final void addMWEFeatures(List<? extends Set<String>> list) {
        if (!this.analysis.getMultiWords().isEmpty()) {
            for (MultiWordsMorphology multiWordsMorphology : this.analysis.getMultiWords()) {
                IntIterator it = new IntRange(multiWordsMorphology.getStartToken(), multiWordsMorphology.getEndToken()).iterator();
                while (it.hasNext()) {
                    int nextInt = it.nextInt();
                    LinkedHashSet linkedHashSet = new LinkedHashSet();
                    List morphologies = multiWordsMorphology.getMorphologies();
                    ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(morphologies, 10));
                    Iterator it2 = morphologies.iterator();
                    while (it2.hasNext()) {
                        arrayList.add(Boolean.valueOf(linkedHashSet.addAll(toFeatures((MorphologyEntry) it2.next()))));
                    }
                    list.get(nextInt).addAll(linkedHashSet);
                }
            }
        }
    }

    private final List<String> toFeatures(@NotNull MorphologyEntry morphologyEntry) {
        ArrayList arrayList;
        Collection listOf;
        LexiconDictionary.SyntacticInfo syntax;
        List<String> list;
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (Object obj : morphologyEntry.getList()) {
            int i2 = i;
            i++;
            Morphology morphology = (Morphology) obj;
            MorphoFeaturesExtractor invoke = MorphoFeaturesExtractorBuilder.INSTANCE.invoke(morphology);
            if (invoke == null || (list = invoke.get()) == null) {
                Object[] objArr = {Integer.valueOf(i2), morphology.getType()};
                String format = String.format("i:%d p:%s", Arrays.copyOf(objArr, objArr.length));
                Intrinsics.checkExpressionValueIsNotNull(format, "java.lang.String.format(this, *args)");
                arrayList = arrayList2;
                listOf = CollectionsKt.listOf(format);
            } else {
                List<String> list2 = list;
                Collection arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
                Iterator<T> it = list2.iterator();
                while (it.hasNext()) {
                    arrayList3.add("i:" + i2 + ' ' + ((String) it.next()));
                }
                Collection collection = (List) arrayList3;
                arrayList = arrayList2;
                listOf = collection;
            }
            arrayList.addAll(listOf);
            LexiconDictionary lexiconDictionary = this.lexicalDictionary;
            if (lexiconDictionary != null) {
                LexiconDictionary.LexicalEntry lexicalEntry = lexiconDictionary.get(morphology.getLemma(), morphology.getType().getBaseAnnotation());
                if (lexicalEntry != null && (syntax = lexicalEntry.getSyntax()) != null) {
                    List regencies = syntax.getRegencies();
                    if (regencies != null) {
                        List list3 = regencies;
                        ArrayList arrayList4 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list3, 10));
                        Iterator it2 = list3.iterator();
                        while (it2.hasNext()) {
                            Object[] objArr2 = {Integer.valueOf(i2), morphology.getType(), (String) it2.next()};
                            String format2 = String.format("i:%d p:%s r:%s", Arrays.copyOf(objArr2, objArr2.length));
                            Intrinsics.checkExpressionValueIsNotNull(format2, "java.lang.String.format(this, *args)");
                            arrayList4.add(format2);
                        }
                        arrayList2.addAll(arrayList4);
                    }
                    List subcategorization = syntax.getSubcategorization();
                    if (subcategorization != null) {
                        List list4 = subcategorization;
                        ArrayList arrayList5 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list4, 10));
                        Iterator it3 = list4.iterator();
                        while (it3.hasNext()) {
                            Object[] objArr3 = {Integer.valueOf(i2), morphology.getType(), (String) it3.next()};
                            String format3 = String.format("i:%d p:%s s:%s", Arrays.copyOf(objArr3, objArr3.length));
                            Intrinsics.checkExpressionValueIsNotNull(format3, "java.lang.String.format(this, *args)");
                            arrayList5.add(format3);
                        }
                        arrayList2.addAll(arrayList5);
                    }
                }
            }
        }
        return arrayList2;
    }

    private final List<Token> toTKTokens(@NotNull List<com.kotlinnlp.neuralparser.language.Token> list) {
        List<com.kotlinnlp.neuralparser.language.Token> list2 = list;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        for (com.kotlinnlp.neuralparser.language.Token token : list2) {
            arrayList.add(new Token(token.getId(), token.getWord(), 0, 0, false));
        }
        return arrayList;
    }

    public FeaturesExtractor(@NotNull List<com.kotlinnlp.neuralparser.language.Token> list, @NotNull MorphologicalAnalyzer morphologicalAnalyzer, @NotNull String str, @Nullable LexiconDictionary lexiconDictionary) {
        Intrinsics.checkParameterIsNotNull(list, "tokens");
        Intrinsics.checkParameterIsNotNull(morphologicalAnalyzer, "analyzer");
        Intrinsics.checkParameterIsNotNull(str, "langCode");
        this.tokens = list;
        this.analyzer = morphologicalAnalyzer;
        this.langCode = str;
        this.lexicalDictionary = lexiconDictionary;
        this.tkTokens = toTKTokens(this.tokens);
        this.analysis = this.analyzer.analyze(CollectionsKt.joinToString$default(this.tkTokens, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Token, String>() { // from class: com.kotlinnlp.tokensencoder.morpho.FeaturesExtractor$analysis$1
            @NotNull
            public final String invoke(@NotNull Token token) {
                Intrinsics.checkParameterIsNotNull(token, "it");
                return token.getForm();
            }
        }, 31, (Object) null), this.tkTokens, this.langCode);
    }
}
