package com.code972.elasticsearch.analysis;

import com.code972.hebmorph.LookupTolerators;
import com.code972.hebmorph.MorphData;
import com.code972.hebmorph.Tokenizer;
import com.code972.hebmorph.datastructures.DictRadix;
import com.code972.hebmorph.hspell.LingInfo;
import com.code972.hebmorph.hspell.Loader;
import com.code972.hebmorph.lemmafilters.BasicLemmaFilter;
import com.code972.hebmorph.lemmafilters.LemmaFilterBase;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;

/* loaded from: input_file:com/code972/elasticsearch/analysis/HebrewAnalyzer.class */
public abstract class HebrewAnalyzer extends Analyzer {
    protected static DictRadix<MorphData> dictRadix;
    protected static DictRadix<MorphData> customWords;
    private static final Integer[] descFlags_noun;
    private static final Integer[] descFlags_person_name;
    private static final Integer[] descFlags_place_name;
    private static final Integer[] descFlags_empty;
    protected static DictRadix<Byte> SPECIAL_TOKENIZATION_CASES;
    protected static final Version matchVersion = Version.LUCENE_48;
    protected static final DictRadix<Integer> prefixesTree = LingInfo.buildPrefixTree(false);
    private static final Byte dummyData = (byte) 0;
    protected final char originalTermSuffix = '$';
    protected CharArraySet commonWords = null;
    protected final LemmaFilterBase lemmaFilter = new BasicLemmaFilter();

    /* loaded from: input_file:com/code972/elasticsearch/analysis/HebrewAnalyzer$WordType.class */
    public enum WordType {
        HEBREW,
        HEBREW_WITH_PREFIX,
        HEBREW_TOLERATED,
        HEBREW_TOLERATED_WITH_PREFIX,
        NON_HEBREW,
        UNRECOGNIZED,
        CUSTOM,
        CUSTOM_WITH_PREFIX
    }

    public static void setCustomWords(InputStream inputStream) throws IOException {
        customWords = Loader.loadCustomWords(inputStream, dictRadix);
    }

    public static boolean isHebrewWord(CharSequence charSequence) {
        for (int i = 0; i < charSequence.length(); i++) {
            if (Tokenizer.isHebrewLetter(charSequence.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    public static WordType isRecognizedWord(String str, boolean z) {
        MorphData morphData;
        MorphData morphData2;
        byte b = 0;
        if (customWords != null) {
            try {
                if (customWords.lookup(str) != null) {
                    return WordType.CUSTOM;
                }
            } catch (IllegalArgumentException e) {
            }
            while (str.length() - b >= 2) {
                try {
                    b = (byte) (b + 1);
                    Integer num = (Integer) prefixesTree.lookup(str.substring(0, b));
                    try {
                        morphData = (MorphData) customWords.lookup(str.substring(b));
                    } catch (IllegalArgumentException e2) {
                        morphData = null;
                    }
                    if (morphData != null && (morphData.getPrefixes() & num.intValue()) > 0) {
                        for (int i = 0; i < morphData.getLemmas().length; i++) {
                            if ((LingInfo.DMask2ps(morphData.getDescFlags()[i]).intValue() & num.intValue()) > 0) {
                                return WordType.CUSTOM_WITH_PREFIX;
                            }
                        }
                    }
                } catch (IllegalArgumentException e3) {
                }
            }
        }
        if (!isHebrewWord(str)) {
            return WordType.NON_HEBREW;
        }
        try {
            if (dictRadix.lookup(str) != null) {
                return WordType.HEBREW;
            }
        } catch (IllegalArgumentException e4) {
        }
        if (str.endsWith("'")) {
            try {
                if (dictRadix.lookup(str.substring(0, str.length() - 1)) != null) {
                    return WordType.HEBREW;
                }
            } catch (IllegalArgumentException e5) {
            }
        }
        byte b2 = 0;
        while (str.length() - b2 >= 2) {
            try {
                b2 = (byte) (b2 + 1);
                Integer num2 = (Integer) prefixesTree.lookup(str.substring(0, b2));
                try {
                    morphData2 = (MorphData) dictRadix.lookup(str.substring(b2));
                } catch (IllegalArgumentException e6) {
                    morphData2 = null;
                }
                if (morphData2 != null && (morphData2.getPrefixes() & num2.intValue()) > 0) {
                    for (int i2 = 0; i2 < morphData2.getLemmas().length; i2++) {
                        if ((LingInfo.DMask2ps(morphData2.getDescFlags()[i2]).intValue() & num2.intValue()) > 0) {
                            return WordType.HEBREW_WITH_PREFIX;
                        }
                    }
                }
            } catch (IllegalArgumentException e7) {
            }
        }
        if (z) {
            if (str.length() > 20) {
                return WordType.UNRECOGNIZED;
            }
            List lookupTolerant = dictRadix.lookupTolerant(str, LookupTolerators.TolerateEmKryiaAll);
            if (lookupTolerant != null && lookupTolerant.size() > 0) {
                return WordType.HEBREW_TOLERATED;
            }
            byte b3 = 0;
            while (str.length() - b3 >= 2) {
                try {
                    b3 = (byte) (b3 + 1);
                    Integer num3 = (Integer) prefixesTree.lookup(str.substring(0, b3));
                    List<DictRadix.LookupResult> lookupTolerant2 = dictRadix.lookupTolerant(str.substring(b3), LookupTolerators.TolerateEmKryiaAll);
                    if (lookupTolerant2 != null) {
                        for (DictRadix.LookupResult lookupResult : lookupTolerant2) {
                            for (int i3 = 0; i3 < ((MorphData) lookupResult.getData()).getLemmas().length; i3++) {
                                if ((LingInfo.DMask2ps(((MorphData) lookupResult.getData()).getDescFlags()[i3]).intValue() & num3.intValue()) > 0) {
                                    return WordType.HEBREW_TOLERATED_WITH_PREFIX;
                                }
                            }
                        }
                    }
                } catch (IllegalArgumentException e8) {
                }
            }
        }
        return WordType.UNRECOGNIZED;
    }

    static {
        ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
        try {
            dictRadix = new Loader(contextClassLoader, "hspell-data-files/", true).loadDictionaryFromHSpellData();
        } catch (IOException e) {
        }
        descFlags_noun = new Integer[]{69};
        descFlags_person_name = new Integer[]{262145};
        descFlags_place_name = new Integer[]{262153};
        descFlags_empty = new Integer[]{0};
        try {
            InputStream resourceAsStream = contextClassLoader.getResourceAsStream("special-tokenization-cases.txt");
            if (resourceAsStream != null) {
                CharArraySet snowballWordSet = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(resourceAsStream, StandardCharsets.UTF_8), matchVersion);
                DictRadix<Byte> dictRadix2 = new DictRadix<>(false);
                Iterator it = snowballWordSet.iterator();
                while (it.hasNext()) {
                    dictRadix2.addNode((char[]) it.next(), dummyData);
                }
                SPECIAL_TOKENIZATION_CASES = dictRadix2;
            }
        } catch (IOException e2) {
        }
        try {
            setCustomWords(contextClassLoader.getResourceAsStream("custom-words.txt"));
        } catch (IOException e3) {
        }
    }
}
