package fi.seco.lexical;

import java.util.Arrays;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:fi/seco/lexical/LexicalAnalysisUtil.class */
public class LexicalAnalysisUtil {
    private static final Pattern sp = Pattern.compile("\\p{P}*(^|\\s+|$)\\p{P}*");
    private static final Pattern dp = Pattern.compile("([^\\p{C}\\p{P}\\s\\p{S}]+)");
    private static final Pattern sentenceSplit = Pattern.compile("(?<=[.?!;])\\s+(?=\\p{Lu})");
    private static final Pattern numbers = Pattern.compile("\\p{N}+");

    public static Collection<String> tokenize(String str) {
        return Arrays.asList(sp.split(str));
    }

    public static Collection<String> split(String str) {
        return Arrays.asList(sentenceSplit.split(str));
    }

    public static String normalize(String str) {
        return sp.matcher(str).replaceAll(" ").trim();
    }

    public static Matcher spaceMatcher(String str) {
        return sp.matcher(str);
    }

    public static Matcher dataMatcher(String str) {
        return dp.matcher(str);
    }

    public static boolean isNumber(String str) {
        return numbers.matcher(str).matches();
    }
}
