package be.bagofwords.util;

import be.bagofwords.application.BaseServer;
import be.bagofwords.text.BowString;
import be.bagofwords.text.BowStringImpl;
import be.bagofwords.text.HTMLEntities;
import be.bagofwords.text.MappedText;
import be.bagofwords.text.Match;
import be.bagofwords.text.WordIterator;
import be.bagofwords.ui.UI;
import it.unimi.dsi.fastutil.chars.CharArrayList;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:be/bagofwords/util/StringUtils.class */
public class StringUtils extends org.apache.commons.lang3.StringUtils {
    public static final int NUM_OF_PADDED_SPACES = 1;
    static final char[] quotes = {'\"', '\'', 8216, 8217, 8220, 8221};
    private static final CharArrayList characterMapping = new CharArrayList();
    private static final Map<Character, Character> escapeFileNameMapping = new HashMap();
    private static final Map<Character, Character> escapeTab;
    private static final Map<Character, Character> escapePath;
    private static final Map<Character, Character> escapeNewLine;
    private static final String[] topLevelDomains;
    private static final List<String> certainErrorRegex;
    private static Pattern namePattern;

    public static void normalizeQuotationMarks(char[] cArr) {
        for (int i = 0; i < cArr.length; i++) {
            boolean z = false;
            for (int i2 = 0; i2 < quotes.length && !z; i2++) {
                if (quotes[i2] == cArr[i]) {
                    z = true;
                    cArr[i] = '\'';
                }
            }
        }
    }

    public static boolean isASCIIVowel(char c) {
        return c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' || c == 'y';
    }

    public static boolean isASCIIConsonant(char c) {
        return c == 'b' || c == 'c' || c == 'd' || c == 'f' || c == 'g' || c == 'h' || c == 'j' || c == 'k' || c == 'l' || c == 'm' || c == 'n' || c == 'p' || c == 'q' || c == 'r' || c == 's' || c == 't' || c == 'v' || c == 'w' || c == 'x' || c == 'z';
    }

    public static String removeAccentsSlow(String str) {
        if (str.contains("Ø") || str.contains("ø")) {
            str = str.replaceAll("ø", "o").replaceAll("Ø", "O");
        }
        return Pattern.compile("\\p{InCombiningDiacriticalMarks}+").matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll("");
    }

    public static void removeHTML(MappedText mappedText) {
        replaceScripts(mappedText);
        replaceAll("<style[^<]+</style>", "", mappedText);
        replaceAll("<br[^>]*>", "\n", mappedText);
        replaceAll("</?p[^>]*>", "\n", mappedText);
        replaceAll("</?div[^>]*>", "\n", mappedText);
        replaceAll("</?h[1234][^>]*>", "\n", mappedText);
        replaceAll("<[^>]*>", "", mappedText);
        replaceHTMLEntities(mappedText);
    }

    private static void replaceScripts(MappedText mappedText) {
        Pattern compile = Pattern.compile("<script[^>]*>");
        Pattern compile2 = Pattern.compile("</script");
        Matcher matcher = compile.matcher(mappedText.getText());
        Matcher matcher2 = compile2.matcher(mappedText.getText());
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        while (matcher.find()) {
            arrayList.add(new Pair(Integer.valueOf(matcher.start()), Integer.valueOf(matcher.end())));
        }
        while (matcher2.find()) {
            arrayList2.add(new Pair(Integer.valueOf(matcher2.start()), Integer.valueOf(matcher2.end())));
        }
        if (arrayList.size() != arrayList2.size()) {
            throw new IllegalArgumentException("Could not parse this file!");
        }
        ArrayList arrayList3 = new ArrayList();
        for (int i = 0; i < arrayList.size(); i++) {
            arrayList3.add(new Pair(((Pair) arrayList.get(i)).getSecond(), ((Pair) arrayList2.get(i)).getFirst()));
        }
        replaceMatches(arrayList3, "", mappedText);
    }

    public static void replaceMatches(List<Match> list, MappedText mappedText) {
        Collections.sort(list);
        checkForOverlapping(list);
        int computeNewLength = computeNewLength(mappedText, list);
        char[] cArr = new char[computeNewLength];
        int[] iArr = new int[computeNewLength];
        int i = 0;
        int i2 = 0;
        for (Match match : list) {
            if (i > match.getStart()) {
                throw new RuntimeException("Something went wrong while replacing matches in text.");
            }
            String replacement = match.getReplacement();
            while (i < match.getStart()) {
                cArr[i + i2] = mappedText.getTextArray()[i];
                iArr[i + i2] = mappedText.getMappingToOrig()[i];
                i++;
            }
            Pair<Integer, Integer> mappingToOrig = match.keepWordMapping() ? mappedText.getMappingToOrig(match.getStart(), match.getEnd()) : null;
            int i3 = 0;
            while (i3 < replacement.length()) {
                cArr[i + i2 + i3] = replacement.charAt(i3);
                iArr[i + i2 + i3] = (mappingToOrig == null || i3 != 0) ? (mappingToOrig == null || i3 != replacement.length() - 1) ? -1 : mappingToOrig.getSecond().intValue() - 1 : mappingToOrig.getFirst().intValue();
                i3++;
            }
            i2 += replacement.length() - (match.getEnd() - match.getStart());
            i += match.getEnd() - match.getStart();
        }
        while (i < mappedText.getTextArray().length) {
            cArr[i + i2] = mappedText.getTextArray()[i];
            iArr[i + i2] = mappedText.getMappingToOrig()[i];
            i++;
        }
        mappedText.setText(cArr);
        mappedText.setMappingToOrig(iArr);
    }

    private static int computeNewLength(MappedText mappedText, List<Match> list) {
        int length = mappedText.getText().length();
        for (Match match : list) {
            length = (length - (match.getEnd() - match.getStart())) + match.getReplacement().length();
        }
        return length;
    }

    private static void checkForOverlapping(List<Match> list) {
        for (int i = 0; i < list.size(); i++) {
            Match match = list.get(i);
            for (int i2 = i + 1; i2 < list.size(); i2++) {
                Match match2 = list.get(i2);
                if (match2.getStart() < match.getEnd()) {
                    throw new RuntimeException("Found overlapping matches " + match + " " + match2);
                }
            }
        }
    }

    public static void replaceMatches(List<Pair<Integer, Integer>> list, String str, MappedText mappedText) {
        ArrayList arrayList = new ArrayList();
        for (Pair<Integer, Integer> pair : list) {
            arrayList.add(new Match(pair.getFirst().intValue(), pair.getSecond().intValue(), str));
        }
        replaceMatches(arrayList, mappedText);
    }

    private static void replaceHTMLEntities(MappedText mappedText) {
        String text = mappedText.getText();
        Matcher matcher = HTMLEntities.htmlEntityPattern.matcher(text);
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            String substring = text.substring(matcher.start(), matcher.end());
            if (substring.matches("&#\\d{1,6};")) {
                arrayList.add(new Match(matcher.start(), matcher.end(), "" + ((char) Integer.parseInt(substring.substring(2, substring.length() - 1)))));
            } else {
                Iterator<String> it = HTMLEntities.entityMapping.keySet().iterator();
                while (true) {
                    if (it.hasNext()) {
                        String next = it.next();
                        if (substring.equals(next)) {
                            arrayList.add(new Match(matcher.start(), matcher.end(), "" + HTMLEntities.entityMapping.get(next).charValue()));
                            break;
                        }
                    }
                }
            }
        }
        replaceMatches(arrayList, mappedText);
    }

    public static String replaceHTMLEntities(String str) {
        Matcher matcher = HTMLEntities.htmlEntityPattern.matcher(str);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        while (matcher.find()) {
            String substring = str.substring(matcher.start(), matcher.end());
            if (substring.matches("&#\\d{1,6};")) {
                char parseInt = (char) Integer.parseInt(substring.substring(2, substring.length() - 1));
                arrayList.add(new Pair(Integer.valueOf(matcher.start()), Integer.valueOf(matcher.end())));
                arrayList2.add("" + parseInt);
            } else {
                Iterator<String> it = HTMLEntities.entityMapping.keySet().iterator();
                while (true) {
                    if (it.hasNext()) {
                        String next = it.next();
                        if (substring.equals(next)) {
                            char charValue = HTMLEntities.entityMapping.get(next).charValue();
                            arrayList.add(new Pair(Integer.valueOf(matcher.start()), Integer.valueOf(matcher.end())));
                            arrayList2.add("" + charValue);
                            break;
                        }
                    }
                }
            }
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            Pair pair = (Pair) arrayList.get(i2);
            sb.append(str.substring(i, ((Integer) pair.getFirst()).intValue()));
            sb.append((String) arrayList2.get(i2));
            i = ((Integer) pair.getSecond()).intValue();
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    public static void convertToLowerCase(char[] cArr) {
        for (int i = 0; i < cArr.length; i++) {
            cArr[i] = Character.toLowerCase(cArr[i]);
        }
    }

    public static void split(MappedText mappedText, int i, int i2) {
        char[] cArr = new char[i2 - i];
        int[] iArr = new int[i2 - i];
        for (int i3 = i; i3 < i2; i3++) {
            cArr[i3 - i] = mappedText.getTextArray()[i3];
            iArr[i3 - i] = mappedText.getMappingToOrig()[i3];
        }
        mappedText.setText(cArr);
        mappedText.setMappingToOrig(iArr);
    }

    public static void replaceString(int i, int i2, String str, MappedText mappedText) {
        replaceMatches(Arrays.asList(new Match(i, i2, str)), mappedText);
    }

    public static void replaceAll(String str, String str2, MappedText mappedText) {
        Matcher matcher = Pattern.compile(str).matcher(mappedText.getText());
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            arrayList.add(new Pair(Integer.valueOf(matcher.start()), Integer.valueOf(matcher.end())));
        }
        replaceMatches(arrayList, str2, mappedText);
    }

    public static byte[] convert(String str) {
        byte[] bytes = str.getBytes();
        int length = bytes.length;
        while (length > 0 && bytes[length - 1] == 0) {
            length--;
        }
        if (length < bytes.length) {
            bytes = Arrays.copyOf(bytes, length);
        }
        return bytes;
    }

    public static byte[] convert(char[] cArr) {
        return convert(new String(cArr));
    }

    public static String convert(byte[] bArr) {
        return convert(bArr, 0, bArr.length);
    }

    public static String convert(byte[] bArr, int i, int i2) {
        return Charset.forName("utf-8").decode(ByteBuffer.wrap(bArr, i, i2)).toString();
    }

    public static String convert(ByteBuffer byteBuffer) {
        return Charset.forName("utf-8").decode(byteBuffer).toString();
    }

    public static String prepareWordForIndex(String str) {
        return new String(addSpaces(reduceCharacterDiversityLevel2(str)));
    }

    private static char[] addSpaces(CharSequence charSequence) {
        char[] cArr = new char[charSequence.length() + 2];
        for (int i = 0; i < cArr.length; i++) {
            if (i < 1 || i >= charSequence.length() + 1) {
                cArr[i] = ' ';
            } else {
                cArr[i] = charSequence.charAt(i - 1);
            }
        }
        return cArr;
    }

    public static String removeAccents(char[] cArr) {
        for (int i = 0; i < cArr.length; i++) {
            cArr[i] = removeAccent(cArr[i]);
        }
        return new String(cArr);
    }

    public static char removeAccent(char c) {
        return c < 0 || c > 128 ? getMappedCharacter(c) : c;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v20, types: [int] */
    /* JADX WARN: Type inference failed for: r12v2, types: [int] */
    private static char getMappedCharacter(char c) {
        char charValue = c < characterMapping.size() ? characterMapping.get(c).charValue() : (char) 65535;
        if (charValue == 65535) {
            synchronized (characterMapping) {
                String removeAccentsSlow = removeAccentsSlow(new String(new char[]{c}));
                charValue = removeAccentsSlow.isEmpty() ? c : removeAccentsSlow.charAt(0);
                if (characterMapping.size() <= c) {
                    for (char size = characterMapping.size(); size <= c; size++) {
                        characterMapping.add((char) 65535);
                    }
                }
                characterMapping.set(c, charValue);
            }
        }
        return charValue;
    }

    public static String removeAccents(String str) {
        return removeAccents(str.toCharArray());
    }

    public static String removeIncorrectDash(String str) {
        return str.replace((char) 8211, '-');
    }

    public static int getPartition(String str, int i) {
        if (str.isEmpty()) {
            return 0;
        }
        return getPartition(str.charAt(0), i);
    }

    public static int getPartition(char c, int i) {
        char lowerCase = Character.toLowerCase(c);
        if (lowerCase < 'a' || lowerCase > 'z') {
            return 0;
        }
        return ((lowerCase - 'a') * i) / 26;
    }

    public static boolean crossesParagraphBoundaries(String str) {
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt == '\r' || charAt == '\n' || charAt == '\t') {
                return true;
            }
        }
        return false;
    }

    public static String escapeFileName(String str) {
        return escapeString(str, '_', escapeFileNameMapping);
    }

    public static String unescapeFileName(String str) {
        return unescapeString(str, '_', escapeFileNameMapping);
    }

    private static String escapeString(String str, char c, Map<Character, Character> map) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            Character valueOf = charAt == c ? Character.valueOf(c) : map.get(Character.valueOf(charAt));
            if (valueOf != null) {
                sb.append(str.substring(i, i2));
                sb.append(c);
                sb.append(valueOf);
                i = i2 + 1;
            }
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    private static String unescapeString(String str, char c, Map<Character, Character> map) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int i2 = 0;
        while (i2 < str.length()) {
            if (str.charAt(i2) == c) {
                if (i2 + 1 >= str.length()) {
                    throw new RuntimeException("Incorrect string " + str + " for escape char " + c);
                }
                Character findOrig = findOrig(c, map, str.charAt(i2 + 1));
                if (findOrig == null) {
                    throw new RuntimeException("Incorrect string " + str + " for escape char " + c);
                }
                sb.append(str.substring(i, i2));
                sb.append(findOrig);
                i2++;
                i = i2 + 1;
            }
            i2++;
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    private static Character findOrig(char c, Map<Character, Character> map, char c2) {
        if (c2 == c) {
            return Character.valueOf(c);
        }
        for (Map.Entry<Character, Character> entry : map.entrySet()) {
            if (entry.getValue().charValue() == c2) {
                return entry.getKey();
            }
        }
        return null;
    }

    public static String escapeTab(String str) {
        return escapeString(str, '_', escapeTab);
    }

    public static String unescapeTab(String str) {
        return unescapeString(str, '_', escapeTab);
    }

    public static String unescapePathName(String str) {
        return unescapeString(str, '_', escapePath);
    }

    public static String escapeNewLine(String str) {
        return escapeString(str, (char) 167, escapeNewLine);
    }

    public static String unescapeNewLine(String str) {
        return unescapeString(str, (char) 167, escapeNewLine);
    }

    public static String escapePathName(String str) {
        return escapeString(str, '_', escapePath);
    }

    public static String removeRedundantWhiteSpace(String str) {
        String str2 = str;
        String replaceAll = str2.replaceAll("\\s", " ");
        while (true) {
            String str3 = replaceAll;
            if (str3.equals(str2)) {
                return str3.trim();
            }
            str2 = str3;
            replaceAll = str2.replaceAll("  ", " ");
        }
    }

    public static boolean isPossibleName(String str) {
        return namePattern.matcher(str).matches();
    }

    public static String reduceCharacterDiversityLevel1(BowString bowString) {
        return reduceCharacterDiversityLevel1(bowString.getS());
    }

    public static String reduceCharacterDiversityLevel1(String str) {
        return reduceCharacterDiversityLevel1(str.toCharArray());
    }

    public static String reduceCharacterDiversityLevel1(char[] cArr) {
        convertToLowerCase(cArr);
        return reduceCharacterDiversityLevel0(cArr);
    }

    public static String reduceCharacterDiversityLevel0(String str) {
        return reduceCharacterDiversityLevel0(str.toCharArray());
    }

    public static String reduceCharacterDiversityLevel0(char[] cArr) {
        replaceDigits(cArr);
        return new String(cArr);
    }

    public static String reduceCharacterDiversityLevel2(String str) {
        return removeAccents(reduceCharacterDiversityLevel1(str));
    }

    public static String reduceCharacterDiversityLevel2(BowString bowString) {
        return reduceCharacterDiversityLevel2(bowString.getS());
    }

    public static String reduceCharacterDiversityLevel3(String str) {
        return toVownNonVowl(reduceCharacterDiversityLevel2(str));
    }

    public static String reduceCharacterDiversityLevel(int i, String str) {
        switch (i) {
            case UI.printCaller /* 0 */:
                return reduceCharacterDiversityLevel0(str);
            case 1:
                return reduceCharacterDiversityLevel1(str);
            case 2:
                return reduceCharacterDiversityLevel2(str);
            case 3:
                return reduceCharacterDiversityLevel3(str);
            default:
                throw new RuntimeException("Unknown level " + i);
        }
    }

    private static String toVownNonVowl(String str) {
        char[] cArr = new char[str.length()];
        for (int i = 0; i < str.length(); i++) {
            if (isASCIIVowel(cArr[i])) {
                cArr[i] = 'v';
            } else if (isASCIIConsonant(cArr[i])) {
                cArr[i] = 'c';
            } else if (cArr[i] == ' ') {
                cArr[i] = ' ';
            } else {
                cArr[i] = 'o';
            }
        }
        return new String(cArr);
    }

    private static void replaceDigits(char[] cArr) {
        for (int i = 0; i < cArr.length; i++) {
            if (Character.isDigit(cArr[i])) {
                cArr[i] = '0';
            }
        }
    }

    public static String urlEncode(String str) {
        try {
            return URLEncoder.encode(str, BaseServer.ENCODING);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    public static String urlDecode(String str) {
        try {
            return URLDecoder.decode(str, BaseServer.ENCODING);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    public static boolean mightBeUrlOrEmailOrHashTag(String str) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.contains("@") || lowerCase.contains("#")) {
            return true;
        }
        if (!lowerCase.contains(".")) {
            return false;
        }
        if (lowerCase.contains("/") || lowerCase.contains("#") || lowerCase.contains("www")) {
            return true;
        }
        for (String str2 : topLevelDomains) {
            if (lowerCase.matches(".*\\." + str2 + "$") || lowerCase.matches(".*\\." + str2 + "\\W.*")) {
                return true;
            }
        }
        return false;
    }

    public static List<BowString> splitInWords(String str, Set<String> set) {
        ArrayList arrayList = new ArrayList();
        WordIterator wordIterator = new WordIterator(str, set);
        while (wordIterator.hasNext()) {
            arrayList.add(wordIterator.next());
        }
        return arrayList;
    }

    public static String createStringContext(BowString bowString, int i) {
        String replaceAll = (bowString.getTextS().substring(Math.max(0, bowString.getStart() - i), bowString.getStart()) + ("**" + bowString.getTextS().substring(bowString.getStart(), bowString.getEnd()) + "**") + bowString.getTextS().substring(bowString.getEnd(), Math.min(bowString.getTextS().length(), bowString.getEnd() + i))).replaceAll("\n", " ").replaceAll("\t", " ");
        while (true) {
            String str = replaceAll;
            if (!str.contains("  ")) {
                return str;
            }
            replaceAll = str.replaceAll("  ", " ");
        }
    }

    public static BowStringImpl getPrevWord(BowString bowString, Set<String> set) {
        return WordIterator.findWord(bowString.getText(), bowString.getStart(), Direction.Left, set);
    }

    public static BowStringImpl getNextWord(BowString bowString, Set<String> set) {
        return WordIterator.findWord(bowString.getText(), bowString.getEnd(), Direction.Right, set);
    }

    public static String getLastNgram(BowString bowString, int i) {
        String s = bowString.getS();
        while (true) {
            String str = s;
            if (str.length() >= i) {
                return str.substring(str.length() - i);
            }
            s = str + " ";
        }
    }

    public static String getFirstNgram(BowString bowString, int i) {
        String s = bowString.getS();
        while (true) {
            String str = s;
            if (str.length() >= i) {
                return str.substring(0, i);
            }
            s = " " + str;
        }
    }

    public static boolean isCapital(char c) {
        return Character.isUpperCase(removeAccent(c));
    }

    public static String getSuffix(String str, int i, int i2) {
        String reduceCharacterDiversityLevel = reduceCharacterDiversityLevel(i2, str);
        return reduceCharacterDiversityLevel.substring(Math.max(0, reduceCharacterDiversityLevel.length() - i));
    }

    public static String getPrefix(String str, int i, int i2) {
        String reduceCharacterDiversityLevel = reduceCharacterDiversityLevel(i2, str);
        return reduceCharacterDiversityLevel.substring(0, Math.min(reduceCharacterDiversityLevel.length(), i));
    }

    public static String getSuffix(BowString bowString, int i, int i2) {
        return getSuffix(bowString.getS(), i, i2);
    }

    public static String cleanForPrint(String str) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (String str2 : str.split("\n")) {
            String trim = str2.trim();
            i = trim.length() == 0 ? i + 1 : 0;
            if (i <= 1) {
                sb.append(trim);
            }
        }
        return sb.toString();
    }

    static {
        escapeFileNameMapping.put('/', '+');
        escapeFileNameMapping.put('\\', '-');
        escapeTab = new HashMap();
        escapeTab.put('\t', ' ');
        escapePath = new HashMap();
        escapePath.put('/', '-');
        escapePath.put('\r', '+');
        escapePath.put('\n', '$');
        escapePath.put('\t', (char) 167);
        escapePath.put(' ', '.');
        escapeNewLine = new HashMap();
        escapeNewLine.put('\r', 'r');
        escapeNewLine.put('\n', 'n');
        topLevelDomains = new String[]{"com", "co.uk", "net", "org", "nl", "be", "me", "nu"};
        certainErrorRegex = Arrays.asList(".*\\d+[A-Za-z]{3,}.*", ".*\\d+\\.[A-Za-z]{3,}.*");
        namePattern = Pattern.compile("([^a-zA-Z]*[A-Z][a-z]*[^a-zA-Z]*)+");
    }
}
