package pl.edu.icm.termtrans.translator;

import java.text.Normalizer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/eudml-term-translator-2.0.4-SNAPSHOT.jar:pl/edu/icm/termtrans/translator/NodeTermTranslator.class */
public class NodeTermTranslator implements TermTranslator {
    private static Logger logger = LoggerFactory.getLogger(TermTranslator.class);
    Set trigrams;
    Map<String, TermNode> terms;
    boolean extend = true;
    long totalInputTokens = 0;
    int translationsInDictionary = 0;
    long totalTranslations = 0;
    long totalTrigramHit = 0;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/eudml-term-translator-2.0.4-SNAPSHOT.jar:pl/edu/icm/termtrans/translator/NodeTermTranslator$MatchResult.class */
    public class MatchResult {
        String translation;
        int lastIdx;

        protected MatchResult() {
        }
    }

    @Override // pl.edu.icm.termtrans.translator.TermTranslator
    public String partiallyTranslate(String str) {
        MatchResult bestMatch;
        logger.debug("Starting to translate partially string, len={}", Integer.valueOf(str.length()));
        String[] strArr = tokenize(str);
        StringBuilder sb = new StringBuilder();
        int length = strArr.length;
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        while (i3 < strArr.length) {
            String str2 = strArr[i3];
            if (isWordToken(str2)) {
                String normalizeToken = normalizeToken(str2);
                if (this.trigrams.contains(trigram(normalizeToken))) {
                    i2++;
                    TermNode termNode = this.terms.get(normalizeToken);
                    if (termNode != null && (bestMatch = bestMatch(strArr, i3, termNode)) != null) {
                        if (this.extend) {
                            for (int i4 = i3; i4 <= bestMatch.lastIdx; i4++) {
                                sb.append(strArr[i4]);
                            }
                            sb.append("{" + bestMatch.translation + "}");
                        } else {
                            sb.append(bestMatch.translation);
                        }
                        i3 = bestMatch.lastIdx;
                        i++;
                        i3++;
                    }
                }
            }
            sb.append(str2);
            i3++;
        }
        synchronized (this) {
            this.totalInputTokens += length;
            this.totalTranslations += i;
            this.totalTrigramHit += i2;
        }
        logger.debug("Translation finished.");
        logger.info("Translation stats: inputTokens={}, translations={}, trigramHit={}", Integer.valueOf(length), Integer.valueOf(i), Integer.valueOf(i2));
        logger.info("Aggregated stats: inputTokens={}, translations={}, trigramHit={}", Long.valueOf(this.totalInputTokens), Long.valueOf(this.totalTranslations), Long.valueOf(this.totalTrigramHit));
        return sb.toString();
    }

    public boolean isExtend() {
        return this.extend;
    }

    public void setExtend(boolean z) {
        this.extend = z;
    }

    protected MatchResult bestMatch(String[] strArr, int i, TermNode termNode) {
        String str;
        MatchResult bestMatch;
        int i2 = i + 1;
        if (termNode.getNextNodes() != null && !termNode.getNextNodes().isEmpty() && i2 < strArr.length) {
            String str2 = strArr[i2];
            while (true) {
                str = str2;
                if (isWordToken(str)) {
                    break;
                }
                i2++;
                if (i2 >= strArr.length) {
                    str = null;
                    i2--;
                    break;
                }
                str2 = strArr[i2];
            }
            if (str != null) {
                TermNode termNode2 = termNode.getNextNodes().get(normalizeToken(str));
                if (termNode2 != null && (bestMatch = bestMatch(strArr, i2, termNode2)) != null) {
                    return bestMatch;
                }
            }
        }
        if (!termNode.isTerminal()) {
            return null;
        }
        MatchResult matchResult = new MatchResult();
        matchResult.lastIdx = i;
        matchResult.translation = termNode.getTranslation();
        return matchResult;
    }

    public static String[] tokenize(String str) {
        String[] split = str.split("\\b");
        LinkedList linkedList = new LinkedList();
        String str2 = "";
        for (String str3 : split) {
            if (isWordToken(str3)) {
                if (!str2.isEmpty()) {
                    linkedList.add(str2);
                    str2 = "";
                }
                linkedList.add(str3);
            } else {
                str2 = str2 + str3;
            }
        }
        return (String[]) Arrays.copyOfRange(split, 1, split.length);
    }

    public static String[] wordsOnly(String[] strArr) {
        LinkedList linkedList = new LinkedList(Arrays.asList(strArr));
        ListIterator listIterator = linkedList.listIterator();
        while (listIterator.hasNext()) {
            if (!isWordToken((String) listIterator.next())) {
                listIterator.remove();
            }
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }

    public static boolean isWordToken(String str) {
        if (str == null || str.isEmpty()) {
            return false;
        }
        for (char c : str.toCharArray()) {
            if (!Character.isLetterOrDigit(c)) {
                return false;
            }
        }
        return true;
    }

    protected String normalizeToken(String str) {
        return Normalizer.normalize(str.toLowerCase(), Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "").replaceAll("ł", "l");
    }

    protected String trigram(String str) {
        return str.toLowerCase().substring(0, Math.min(3, str.length()));
    }

    public void buildDictionary(Map<String, String> map) {
        int i = 0;
        this.trigrams = new HashSet();
        this.terms = new HashMap();
        for (Map.Entry<String, String> entry : map.entrySet()) {
            String[] wordsOnly = wordsOnly(tokenize(entry.getKey()));
            if (wordsOnly.length != 0) {
                this.translationsInDictionary++;
                this.trigrams.add(trigram(normalizeToken(wordsOnly[0])));
                Map<String, TermNode> map2 = this.terms;
                for (int i2 = 0; i2 < wordsOnly.length; i2++) {
                    String normalizeToken = normalizeToken(wordsOnly[i2]);
                    TermNode termNode = map2.get(normalizeToken);
                    if (termNode == null) {
                        termNode = new TermNode(wordsOnly[i2]);
                        map2.put(normalizeToken, termNode);
                    }
                    if (i2 == wordsOnly.length - 1) {
                        termNode.setTranslation(entry.getValue());
                    }
                    map2 = termNode.getNextNodes();
                }
                i = Math.max(wordsOnly.length, i);
            }
        }
        logger.info("Finished term dictionary building, total {} entries, max depth: {}, trigrams: {}", Integer.valueOf(map.size()), Integer.valueOf(i), Integer.valueOf(this.trigrams.size()));
    }

    public long getTotalInputTokens() {
        return this.totalInputTokens;
    }

    public long getTotalTranslations() {
        return this.totalTranslations;
    }

    public long getTotalTrigramHit() {
        return this.totalTrigramHit;
    }
}
