package hivemall.tools.text;

import hivemall.utils.lang.StringUtils;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.Text;

@UDFType(deterministic = true, stateful = false)
@Description(name = "word_ngrams", value = "_FUNC_(array<string> words, int minSize, int maxSize]) - Returns list of n-grams for given words, where `minSize <= n <= maxSize`", extended = "SELECT word_ngrams(tokenize('Machine learning is fun!', true), 1, 2);\n\n [\"machine\",\"machine learning\",\"learning\",\"learning is\",\"is\",\"is fun\",\"fun\"]")
/* loaded from: input_file:hivemall/tools/text/WordNgramsUDF.class */
public final class WordNgramsUDF extends UDF {
    @Nullable
    public List<Text> evaluate(@Nullable List<Text> list, int i, int i2) throws HiveException {
        if (list == null) {
            return null;
        }
        if (i <= 0) {
            throw new UDFArgumentException("`minSize` must be greater than zero: " + i);
        }
        if (i > i2) {
            throw new UDFArgumentException("`maxSize` must be greater than or equal to `minSize`: " + i2);
        }
        return getNgrams(list, i, i2);
    }

    @Nonnull
    private static List<Text> getNgrams(@Nonnull List<Text> list, @Nonnegative int i, @Nonnegative int i2) throws HiveException {
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        int size = list.size();
        for (int i3 = 0; i3 < size; i3++) {
            for (int i4 = i; i4 <= i2; i4++) {
                int i5 = i3 + i4;
                if (i5 <= size) {
                    StringUtils.clear(sb);
                    for (int i6 = i3; i6 < i5; i6++) {
                        Text text = list.get(i6);
                        if (text == null) {
                            throw new UDFArgumentException("`array<string> words` must not contain NULL element");
                        }
                        if (i6 > i3) {
                            sb.append(" ");
                        }
                        sb.append(text.toString());
                    }
                    arrayList.add(new Text(sb.toString()));
                }
            }
        }
        return arrayList;
    }
}
