package hivemall.nlp.tokenizer;

import hivemall.annotations.VisibleForTesting;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.ArrayUtils;
import hivemall.utils.lang.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

@UDFType(deterministic = true, stateful = false)
@Description(name = "stoptags_exclude", value = "_FUNC_(array<string> excludeTags, [, const string lang='ja']) - Returns stoptags excluding given tags", extended = "SELECT stoptags_exclude(array('名詞-固有名詞', '形容詞'))")
/* loaded from: input_file:hivemall/nlp/tokenizer/StoptagsExcludeUDF.class */
public final class StoptagsExcludeUDF extends GenericUDF {
    static final String[] STOPTAGS_JA = {"名詞", "名詞-一般", "名詞-固有名詞", "名詞-固有名詞-一般", "名詞-固有名詞-人名", "名詞-固有名詞-人名-一般", "名詞-固有名詞-人名-姓", "名詞-固有名詞-人名-名", "名詞-固有名詞-組織", "名詞-固有名詞-地域", "名詞-固有名詞-地域-一般", "名詞-固有名詞-地域-国", "名詞-代名詞", "名詞-代名詞-一般", "名詞-代名詞-縮約", "名詞-副詞可能", "名詞-サ変接続", "名詞-形容動詞語幹", "名詞-数", "名詞-非自立", "名詞-非自立-一般", "名詞-非自立-副詞可能", "名詞-非自立-助動詞語幹", "名詞-非自立-形容動詞語幹", "名詞-特殊", "名詞-特殊-助動詞語幹", "名詞-接尾", "名詞-接尾-一般", "名詞-接尾-人名", "名詞-接尾-地域", "名詞-接尾-サ変接続", "名詞-接尾-助動詞語幹", "名詞-接尾-形容動詞語幹", "名詞-接尾-副詞可能", "名詞-接尾-助数詞", "名詞-接尾-特殊", "名詞-接続詞的", "名詞-動詞非自立的", "名詞-引用文字列", "名詞-ナイ形容詞語幹", "接頭詞", "接頭詞-名詞接続", "接頭詞-動詞接続", "接頭詞-形容詞接続", "接頭詞-数接", "動詞", "動詞-自立", "動詞-非自立", "動詞-接尾", "形容詞", "形容詞-自立", "形容詞-非自立", "形容詞-接尾", "副詞", "副詞-一般", "副詞-助詞類接続", "連体詞", "接続詞", "助詞", "助詞-格助詞", "助詞-格助詞-一般", "助詞-格助詞-引用", "助詞-格助詞-連語", "助詞-接続助詞", "助詞-係助詞", "助詞-副助詞", "助詞-間投助詞", "助詞-並立助詞", "助詞-終助詞", "助詞-副助詞／並立助詞／終助詞", "助詞-連体化", "助詞-副詞化", "助詞-特殊", "助動詞", "感動詞", "記号", "記号-一般", "記号-読点", "記号-句点", "記号-空白", "記号-括弧開", "記号-括弧閉", "記号-アルファベット", "その他", "その他-間投", "フィラー", "非言語音", "語断片", "未知語"};
    private ListObjectInspector tagsOI;
    private String[] stopTags;

    @Nullable
    private List<String> result;

    public ObjectInspector initialize(ObjectInspector[] objectInspectorArr) throws UDFArgumentException {
        if (objectInspectorArr.length != 1 && objectInspectorArr.length != 2) {
            throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) takes one or two arguments: " + objectInspectorArr.length);
        }
        if (!HiveUtils.isStringListOI(objectInspectorArr[0])) {
            throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) expects array<string> for the first argument : " + objectInspectorArr[0].getTypeName());
        }
        this.tagsOI = HiveUtils.asListOI(objectInspectorArr[0]);
        if (objectInspectorArr.length == 2) {
            if (!HiveUtils.isConstString(objectInspectorArr[1])) {
                throw new UDFArgumentException("stoptags_exclude(array<string> tags, [, const string lang='ja']) expects const string for the second argument: " + objectInspectorArr[1].getTypeName());
            }
            String constString = HiveUtils.getConstString(objectInspectorArr[1]);
            if (!"ja".equalsIgnoreCase(constString)) {
                throw new UDFArgumentException("Unsupported lang: " + constString);
            }
        }
        this.stopTags = STOPTAGS_JA;
        if (ObjectInspectorUtils.isConstantObjectInspector(this.tagsOI)) {
            this.result = getStoptags(this.stopTags, HiveUtils.getConstStringArray(this.tagsOI));
        }
        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    /* renamed from: evaluate, reason: merged with bridge method [inline-methods] */
    public List<String> m238evaluate(GenericUDF.DeferredObject[] deferredObjectArr) throws HiveException {
        if (this.result != null) {
            return this.result;
        }
        Objects.requireNonNull(this.stopTags);
        String[] asStringArray = HiveUtils.asStringArray(deferredObjectArr[0], this.tagsOI);
        return asStringArray == null ? ArrayUtils.asKryoSerializableList(this.stopTags) : getStoptags(this.stopTags, asStringArray);
    }

    @VisibleForTesting
    @Nonnull
    static List<String> getStoptags(@Nonnull String[] strArr, @Nonnull String[] strArr2) {
        String[] strArr3 = (String[]) strArr.clone();
        for (String str : strArr2) {
            if (Arrays.binarySearch(strArr, str) >= 0) {
                for (int i = r0; i < strArr3.length; i++) {
                    String str2 = strArr3[i];
                    if (str2 != null) {
                        if (str2.startsWith(str)) {
                            int length = str.length();
                            if (str2.length() <= length || str2.charAt(length) == '-') {
                                strArr3[i] = null;
                            }
                        }
                    }
                }
            }
        }
        ArrayList arrayList = new ArrayList(strArr3.length);
        for (String str3 : strArr3) {
            if (str3 != null) {
                arrayList.add(str3);
            }
        }
        return arrayList;
    }

    public String getDisplayString(String[] strArr) {
        return "stoptags_exclude(" + StringUtils.join((Object[]) strArr, ',') + ')';
    }

    static {
        Arrays.sort(STOPTAGS_JA);
    }
}
