package pl.edu.icm.sedno.service.similarity;

import ch.qos.logback.core.rolling.helper.IntegerTokenConverter;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import com.google.common.collect.Multisets;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;

/* loaded from: input_file:WEB-INF/lib/sedno-tools-1.3.7.jar:pl/edu/icm/sedno/service/similarity/LenientLevenshteinDistance.class */
public class LenientLevenshteinDistance {
    private static final String PUNCT_OR_WHITESPACE = "\\p{Punct}|\\p{Space}";
    private static final ImmutableSet<String> PL_STOPWORDS = ImmutableSet.of(IntegerTokenConverter.CONVERTER_KEY, "z", "o", "dawniej", "sp", "im", "w", "we");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/sedno-tools-1.3.7.jar:pl/edu/icm/sedno/service/similarity/LenientLevenshteinDistance$WordItem.class */
    public static class WordItem implements Comparable<WordItem> {
        int listNo;
        String word;
        WordItem loNeighbour;
        WordItem hiNeighbour;
        double loScore;
        double hiScore;
        boolean consumed;

        private WordItem(int i, String str) {
            this.consumed = false;
            Preconditions.checkNotNull(str);
            this.listNo = i;
            this.word = str;
        }

        @Override // java.lang.Comparable
        public int compareTo(WordItem wordItem) {
            return this.word.compareTo(wordItem.word);
        }

        public String toString() {
            return "list " + this.listNo + " [" + StringUtils.rightPad(this.word, 15) + "] " + (this.loNeighbour != null ? ", loNeighbour: [" + this.loNeighbour.word + "]  " + this.loScore : "") + (this.hiNeighbour != null ? ", hiNeighbour: [" + this.hiNeighbour.word + "]  " + this.hiScore : "");
        }

        /* JADX INFO: Access modifiers changed from: private */
        public double popScore() {
            WordItem wordItem = null;
            double d = 0.0d;
            if (this.loNeighbour != null && !this.loNeighbour.consumed) {
                wordItem = this.loNeighbour;
                d = this.loScore;
            }
            if (this.hiNeighbour != null && !this.hiNeighbour.consumed && (wordItem == null || d < this.hiScore)) {
                wordItem = this.hiNeighbour;
                d = this.hiScore;
            }
            if (wordItem == null) {
                return 0.0d;
            }
            wordItem.consumed = true;
            return d;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void setLoNeighbour(WordItem wordItem) {
            this.loNeighbour = wordItem;
            this.loScore = calcScore(wordItem);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void setHiNeighbour(WordItem wordItem) {
            this.hiNeighbour = wordItem;
            this.hiScore = calcScore(wordItem);
        }

        private boolean isA() {
            return this.listNo == 0;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public boolean isB() {
            return this.listNo == 1;
        }

        private double calcScore(WordItem wordItem) {
            int commonPrefixLength = LenientLevenshteinDistance.commonPrefixLength(this.word, wordItem.word);
            int min = Math.min(this.word.length(), wordItem.word.length());
            if ((commonPrefixLength == min) || commonPrefixLength > min / 2) {
                return commonPrefixLength / Math.max(this.word.length(), wordItem.word.length());
            }
            return 0.0d;
        }
    }

    private LenientLevenshteinDistance() {
    }

    public static boolean isInitial(String str) {
        if (StringUtils.isEmpty(str)) {
            return false;
        }
        return removeAccents(str.trim()).matches("\\p{Alpha}\\.?");
    }

    public static String removeAccents(String str) {
        return Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "").replaceAll("ł", "l");
    }

    private static String removeWhitespaces(String str) {
        return str.replaceAll("\\p{Space}", "");
    }

    private static String removePunctuationAndWhitespaces(String str) {
        return str.replaceAll(PUNCT_OR_WHITESPACE, "");
    }

    private static String filterAWC(String str) {
        return str == null ? "" : removeAccents(removeWhitespaces(str.toLowerCase()));
    }

    private static List<String> filterAC(List<String> list) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            newArrayList.add(removeAccents(it.next().toLowerCase()));
        }
        return newArrayList;
    }

    public static String filterAPWC(String str) {
        return str == null ? "" : removeAccents(removePunctuationAndWhitespaces(str.toLowerCase()));
    }

    public static int lenientDistance(String str, String str2) {
        return StringUtils.getLevenshteinDistance(filterAWC(str), filterAWC(str2));
    }

    public static int punctuationLenientDistance(String str, String str2) {
        return StringUtils.getLevenshteinDistance(filterAPWC(str), filterAPWC(str2));
    }

    private static List<String> tokenizeAndFilterAC(String str) {
        return filterAC(Lists.newArrayList(str.split("(\\p{Punct}|\\p{Space})+")));
    }

    public static int lenientWordsInCommon(String str, String str2) {
        Preconditions.checkArgument(str != null);
        Preconditions.checkArgument(str2 != null);
        return Multisets.intersection(createWordsMultiset(str), createWordsMultiset(str2)).size();
    }

    public static double lenientWordsInCommonD(String str, String str2) {
        Preconditions.checkArgument(str != null);
        Preconditions.checkArgument(str2 != null);
        double d = 0.0d;
        Iterator<WordItem> it = createWordDualList(str, str2).iterator();
        while (it.hasNext()) {
            d += it.next().popScore();
        }
        return d;
    }

    private static Multiset<String> createWordsMultiset(String str) {
        HashMultiset create = HashMultiset.create();
        create.addAll(tokenizeAndFilterAC(str));
        create.removeAll(PL_STOPWORDS);
        return create;
    }

    private static List<WordItem> createWordDualList(String str, String str2) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<String> it = createWordsMultiset(str).iterator();
        while (it.hasNext()) {
            newArrayList.add(new WordItem(0, it.next()));
        }
        Iterator<String> it2 = createWordsMultiset(str2).iterator();
        while (it2.hasNext()) {
            newArrayList.add(new WordItem(1, it2.next()));
        }
        Collections.sort(newArrayList);
        for (int i = 0; i < newArrayList.size(); i++) {
            WordItem wordItem = (WordItem) newArrayList.get(i);
            if (!wordItem.isB()) {
                int i2 = i - 1;
                int i3 = i + 1;
                if (i2 >= 0 && ((WordItem) newArrayList.get(i2)).isB()) {
                    wordItem.setLoNeighbour((WordItem) newArrayList.get(i2));
                }
                if (i3 < newArrayList.size() && ((WordItem) newArrayList.get(i3)).isB()) {
                    wordItem.setHiNeighbour((WordItem) newArrayList.get(i3));
                }
            }
        }
        return newArrayList;
    }

    public static int lenientWordsCount(String str) {
        if (str == null) {
            return 0;
        }
        return createWordsMultiset(str).size();
    }

    public static double orderLenientSimilarity(String str, String str2) {
        Preconditions.checkArgument(str != null);
        Preconditions.checkArgument(str2 != null);
        List<String> list = tokenizeAndFilterAC(str);
        List<String> list2 = tokenizeAndFilterAC(str2);
        Collections.sort(list);
        Collections.sort(list2);
        return (r0 - StringUtils.getLevenshteinDistance(r0, r0)) / Math.max(StringUtils.join(list, " ").length(), StringUtils.join(list2, " ").length());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static int commonPrefixLength(String str, String str2) {
        int min = Math.min(str.length(), str2.length());
        for (int i = 0; i < min; i++) {
            if (str.charAt(i) != str2.charAt(i)) {
                return i;
            }
        }
        return min;
    }
}
