package uk.ac.open.crc.intt;

import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:uk/ac/open/crc/intt/NumericTokeniser.class */
public class NumericTokeniser {
    private final AggregatedDictionary aggregatedDictionary;
    private final DigitAbbreviationDictionary numericAbbreviationDictionary;
    private static final Logger LOGGER = LoggerFactory.getLogger(NumericTokeniser.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    public NumericTokeniser(DictionarySet dictionarySet) {
        this.aggregatedDictionary = dictionarySet.getAggregatedDictionary();
        this.numericAbbreviationDictionary = dictionarySet.getDigitAbbreviationDictionary();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public ArrayList<String> tokenise(String str, boolean z) {
        ArrayList<String> arrayList = new ArrayList<>();
        List<String> findKnownSubstrings = this.numericAbbreviationDictionary.findKnownSubstrings(str);
        if (findKnownSubstrings.isEmpty()) {
            if (str.matches("^[a-zA-Z]+[0-9]+$")) {
                if (z) {
                    arrayList.addAll(tokeniseOnDigits(str));
                } else {
                    arrayList.add(str);
                }
            } else if (str.matches("^[0-9]+[a-zA-Z]+$")) {
                arrayList.addAll(tokeniseOnDigits(str));
            } else if (str.matches("^.+[0-9]+.+$")) {
                ArrayList arrayList2 = new ArrayList();
                if (str.matches("^[A-Z][0-9][A-Za-z]$")) {
                    arrayList.add(str);
                } else {
                    Integer ucLcBoundary = getUcLcBoundary(str);
                    if (ucLcBoundary.intValue() > 0) {
                        arrayList.addAll(tokeniseOnUcLcBoundary(str, ucLcBoundary.intValue(), z));
                    } else {
                        arrayList.addAll(splitMixedString(str));
                    }
                }
                arrayList.addAll(arrayList2);
                if (str.matches("^[a-zA-Z]+(2|4)[a-zA-Z]+$")) {
                    List<String> textSpeakSplit = textSpeakSplit(str);
                    if (this.aggregatedDictionary.percentageKnown(textSpeakSplit) >= this.aggregatedDictionary.percentageKnown(arrayList)) {
                        arrayList.clear();
                        arrayList.addAll(textSpeakSplit);
                    }
                }
            } else {
                arrayList.add(str);
            }
        } else if (findKnownSubstrings.size() != 1) {
            int i = 0;
            for (int i2 = 0; i2 < findKnownSubstrings.size(); i2++) {
                int indexOf = str.toLowerCase().indexOf(findKnownSubstrings.get(i2));
                String substring = str.substring(i, indexOf);
                if (substring.length() > 0) {
                    arrayList.add(substring);
                }
                arrayList.add(str.substring(indexOf, indexOf + findKnownSubstrings.get(i2).length()));
                i = indexOf + findKnownSubstrings.get(i2).length();
            }
            String substring2 = str.substring(i, str.length());
            if (substring2.length() > 0) {
                arrayList.add(substring2);
            }
        } else if (findKnownSubstrings.get(0).equals(str.toLowerCase())) {
            arrayList.add(str);
        } else {
            int indexOf2 = str.toLowerCase().indexOf(findKnownSubstrings.get(0).toLowerCase());
            String substring3 = str.substring(0, indexOf2);
            if (substring3.length() > 0) {
                arrayList.add(substring3);
            }
            arrayList.add(str.substring(indexOf2, indexOf2 + findKnownSubstrings.get(0).length()));
            String substring4 = str.substring(indexOf2 + findKnownSubstrings.get(0).length());
            if (substring4.length() > 0) {
                arrayList.add(substring4);
            }
        }
        return arrayList;
    }

    private List<String> tokeniseOnUcLcBoundary(String str, int i, boolean z) {
        Boolean valueOf = Boolean.valueOf(this.aggregatedDictionary.isWord(str.substring(0, i)) || this.aggregatedDictionary.isWord(str.substring(i)));
        Boolean.valueOf(this.aggregatedDictionary.isWord(str.substring(0, i + 1)) || this.aggregatedDictionary.isWord(str.substring(i + 1)));
        ArrayList arrayList = new ArrayList();
        if (valueOf.booleanValue()) {
            arrayList.add(str.substring(0, i));
            arrayList.add(str.substring(i));
        } else {
            arrayList.add(str.substring(0, i + 1));
            arrayList.add(str.substring(i + 1));
        }
        ArrayList arrayList2 = new ArrayList();
        if (((String) arrayList.get(0)).matches("^.*[0-9]+.*$")) {
            arrayList2.addAll(tokenise((String) arrayList.get(0), false));
            arrayList2.add((String) arrayList.get(1));
        } else {
            arrayList2.add((String) arrayList.get(0));
            arrayList2.addAll(tokenise((String) arrayList.get(1), z));
        }
        return arrayList2;
    }

    private List<String> textSpeakSplit(String str) {
        int indexOf = str.indexOf("2");
        if (indexOf == -1) {
            indexOf = str.indexOf("4");
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(str.substring(0, indexOf));
        arrayList.add(str.substring(indexOf, indexOf + 1));
        arrayList.add(str.substring(indexOf + 1));
        return arrayList;
    }

    private ArrayList<String> tokeniseOnDigits(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList arrayList2 = new ArrayList();
        for (Integer num = 0; num.intValue() < str.length(); num = Integer.valueOf(num.intValue() + 1)) {
            if (num.intValue() == 0) {
                arrayList2.add(num);
            }
            if (Character.isDigit(str.codePointAt(num.intValue()))) {
                if (num.intValue() > 0 && !Character.isDigit(str.codePointAt(num.intValue() - 1))) {
                    arrayList2.add(Integer.valueOf(num.intValue() - 1));
                    arrayList2.add(num);
                }
                if (num.intValue() < str.length() - 1 && !Character.isDigit(str.codePointAt(num.intValue() + 1))) {
                    arrayList2.add(num);
                    arrayList2.add(Integer.valueOf(num.intValue() + 1));
                }
            }
            if (num.intValue() == str.length() - 1) {
                arrayList2.add(num);
            }
        }
        if (arrayList2.size() % 2 == 1) {
            LOGGER.warn("Odd number of boundaries found for: \"{}\"", str);
        }
        for (int i = 0; i < arrayList2.size(); i += 2) {
            arrayList.add(str.substring(((Integer) arrayList2.get(i)).intValue(), ((Integer) arrayList2.get(i + 1)).intValue() + 1));
        }
        return arrayList;
    }

    private Integer getUcLcBoundary(String str) {
        Integer num = -1;
        for (Integer num2 = 0; num2.intValue() < str.length() - 1; num2 = Integer.valueOf(num2.intValue() + 1)) {
            if (Character.isUpperCase(str.codePointAt(num2.intValue())) && Character.isLowerCase(str.codePointAt(num2.intValue() + 1)) && (num2.intValue() == 0 || Character.isUpperCase(str.codePointAt(num2.intValue() - 1)))) {
                num = num2;
                break;
            }
        }
        return num;
    }

    private ArrayList<String> splitMixedString(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList<String> arrayList2 = tokeniseOnDigits(str);
        if (arrayList2.size() == 2) {
            arrayList.add(arrayList2.get(0) + arrayList2.get(1));
        } else {
            Integer num = 0;
            while (num.intValue() < arrayList2.size()) {
                if (num.intValue() + 2 >= arrayList2.size()) {
                    if (num.intValue() < arrayList2.size() - 1) {
                        arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    } else {
                        arrayList.add(arrayList2.get(num.intValue()));
                    }
                    num = Integer.valueOf(num.intValue() + 2);
                } else if (this.aggregatedDictionary.isWord(arrayList2.get(num.intValue() + 2))) {
                    arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    num = Integer.valueOf(num.intValue() + 2);
                } else if (this.aggregatedDictionary.isWord(arrayList2.get(num.intValue()))) {
                    arrayList.add(arrayList2.get(num.intValue()));
                    arrayList.add(arrayList2.get(num.intValue() + 1) + arrayList2.get(num.intValue() + 2));
                    Integer valueOf = Integer.valueOf(num.intValue() + 3);
                    Integer valueOf2 = Integer.valueOf(num.intValue() + 4);
                    while (true) {
                        num = valueOf2;
                        if (valueOf.intValue() < arrayList2.size()) {
                            if (num.intValue() < arrayList2.size()) {
                                arrayList.add(arrayList2.get(valueOf.intValue()) + arrayList2.get(num.intValue()));
                            } else {
                                arrayList.add(arrayList2.get(valueOf.intValue()));
                            }
                            valueOf = Integer.valueOf(valueOf.intValue() + 2);
                            valueOf2 = Integer.valueOf(num.intValue() + 2);
                        }
                    }
                } else {
                    arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    num = Integer.valueOf(num.intValue() + 2);
                }
            }
        }
        return arrayList;
    }
}
