package pl.edu.icm.yadda.parsing.authorparser;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/yadda-content-1.12.6.jar:pl/edu/icm/yadda/parsing/authorparser/AuthorParser.class */
public class AuthorParser {
    private static final Logger log = LoggerFactory.getLogger(AuthorParser.class);

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v60 */
    public static void normalizeAuthorNames(StringBuilder sb, List<StringBuilder> list, List<String> list2) {
        LinkedList<String> repairAndTokenizeAuthorText = repairAndTokenizeAuthorText(sb);
        LinkedList linkedList = new LinkedList();
        LinkedList linkedList2 = new LinkedList();
        boolean z = true;
        Iterator<String> it = repairAndTokenizeAuthorText.iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (Pattern.compile("^(&|and)$", 2).matcher(next).find()) {
                linkedList.add(normalizeAuthorName(linkedList2));
                linkedList2.clear();
                z = true;
            } else if (z > 0) {
                linkedList2.add(next);
                z = false;
            } else if (Pattern.compile(",$", 2).matcher(next).find()) {
                linkedList2.add(next);
                linkedList.add(normalizeAuthorName(linkedList2));
                linkedList2.clear();
                z = true;
            } else {
                linkedList2.add(next);
            }
        }
        if (linkedList2.size() > 0) {
            linkedList.add(normalizeAuthorName(linkedList2));
        }
        linkedList2.clear();
        Iterator it2 = linkedList.iterator();
        while (it2.hasNext()) {
            String str = (String) it2.next();
            Matcher matcher = Pattern.compile("\\b[a-zA-Z0-9]+_\\w+\\b", 2).matcher(str);
            if (matcher.find()) {
                list2.add(str.substring(matcher.start(), matcher.end()).replace('_', ' '));
                if (matcher.start() > 1) {
                    list.add(new StringBuilder(str.substring(0, matcher.start() - 1)));
                }
            } else if (str.lastIndexOf(" ") != -1) {
                list2.add(str.substring(str.lastIndexOf(" ") + 1));
                list.add(new StringBuilder(str.substring(0, str.lastIndexOf(" "))));
            } else {
                list2.add(str);
                list.add(null);
            }
        }
    }

    public static String normalizeAuthorName(LinkedList<String> linkedList) {
        StringBuilder sb = new StringBuilder(linkedList.get(0));
        for (int i = 1; i < linkedList.size(); i++) {
            sb.append(" " + linkedList.get(i));
        }
        return str_normalizeAuthorName(sb);
    }

    public static String str_normalizeAuthorName(StringBuilder sb) {
        return strBld_normalizeAuthorName(sb).toString();
    }

    public static StringBuilder strBld_normalizeAuthorName(StringBuilder sb) {
        Matcher matcher = Pattern.compile("(.+),\\s*(.+)").matcher(sb);
        if (matcher.find()) {
            String str = matcher.group(2) + " " + matcher.group(1);
            sb.setLength(0);
            sb.append(str);
        }
        while (true) {
            Matcher matcher2 = Pattern.compile("\\.\\-").matcher(sb);
            if (!matcher2.find()) {
                break;
            }
            sb.delete(matcher2.start(), matcher2.end());
            sb.insert(matcher2.start(), "-");
        }
        while (true) {
            Matcher matcher3 = Pattern.compile("[\\,\\.]").matcher(sb);
            if (!matcher3.find()) {
                break;
            }
            sb.delete(matcher3.start(), matcher3.end());
            sb.insert(matcher3.start(), " ");
        }
        while (true) {
            Matcher matcher4 = Pattern.compile("  +").matcher(sb);
            if (!matcher4.find()) {
                break;
            }
            sb.insert(matcher4.start(), " ");
            sb.delete(matcher4.start(), matcher4.end());
        }
        Matcher matcher5 = Pattern.compile("^\\s+").matcher(sb);
        if (matcher5.find()) {
            sb.delete(matcher5.start(), matcher5.end());
        }
        Matcher matcher6 = Pattern.compile("\\s+$").matcher(sb);
        if (matcher6.find()) {
            sb.delete(matcher6.start(), matcher6.end());
        }
        Matcher matcher7 = Pattern.compile("^[^\\s][^\\s]+(\\s+[^\\s]|\\s+[^\\s]\\-[^\\s])+$").matcher(sb);
        if (matcher7.find()) {
            String[] split = sb.toString().split("\\s+");
            sb.delete(matcher7.start(), matcher7.end());
            sb.setLength(0);
            for (int i = 1; i < split.length; i++) {
                sb.append(split[i] + " ");
            }
            sb.append(split[0]);
        }
        return sb;
    }

    private static LinkedList<String> repairAndTokenizeAuthorText(StringBuilder sb) {
        Matcher matcher = Pattern.compile("et\\.? al\\.?.*$").matcher(sb);
        if (matcher.find()) {
            sb.delete(matcher.start(), matcher.end());
        }
        Matcher matcher2 = Pattern.compile("^.*?[\\p{IsUpper}\\p{IsLower}][\\p{IsUpper}\\p{IsLower}]+\\. ").matcher(sb);
        if (matcher2.find()) {
            sb.delete(matcher2.start(), matcher2.end());
        }
        Matcher matcher3 = Pattern.compile("\\(.*?\\)").matcher(sb);
        while (matcher3.find()) {
            sb.delete(matcher3.start(), matcher3.end());
        }
        Matcher matcher4 = Pattern.compile("^.*?\\)\\.?").matcher(sb);
        while (matcher4.find()) {
            sb.delete(matcher4.start(), matcher4.end());
        }
        Matcher matcher5 = Pattern.compile("\\(.*?$").matcher(sb);
        while (matcher5.find()) {
            sb.delete(matcher5.start(), matcher5.end());
        }
        while (true) {
            Matcher matcher6 = Pattern.compile("\\[.*?\\]").matcher(sb);
            if (!matcher6.find()) {
                break;
            }
            sb.delete(matcher6.start(), matcher6.end());
        }
        while (true) {
            Matcher matcher7 = Pattern.compile("^.*?\\]\\.?").matcher(sb);
            if (!matcher7.find()) {
                break;
            }
            sb.delete(matcher7.start(), matcher7.end());
        }
        while (true) {
            Matcher matcher8 = Pattern.compile("\\[.*?$").matcher(sb);
            if (!matcher8.find()) {
                break;
            }
            sb.delete(matcher8.start(), matcher8.end());
        }
        int indexOf = sb.indexOf(";");
        while (true) {
            int i = indexOf;
            if (i == -1) {
                break;
            }
            sb.deleteCharAt(i);
            sb.insert(i, ',');
            indexOf = sb.indexOf(";");
        }
        int indexOf2 = sb.indexOf(",");
        while (true) {
            int i2 = indexOf2;
            if (i2 == -1) {
                break;
            }
            sb.deleteCharAt(i2);
            sb.insert(i2, ", ");
            indexOf2 = sb.indexOf(",", i2 + 1);
        }
        int indexOf3 = sb.indexOf(":");
        while (true) {
            int i3 = indexOf3;
            if (i3 == -1) {
                break;
            }
            sb.deleteCharAt(i3);
            sb.insert(i3, " ");
            indexOf3 = sb.indexOf(":");
        }
        while (true) {
            Matcher matcher9 = Pattern.compile("[\\:\\\"\\<\\>\\/\\?\\{\\}\\[\\]\\+\\=\\(\\)\\*\\^\\%\\$\\#\\@\\!\\~\\_]").matcher(sb);
            if (!matcher9.find()) {
                break;
            }
            sb.delete(matcher9.start(), matcher9.end());
        }
        joinMultiWordNames(sb);
        String[] split = sb.toString().split("\\s+");
        LinkedList<String> linkedList = new LinkedList<>();
        for (int i4 = 0; i4 < split.length; i4++) {
            String str = split[i4];
            if (!Pattern.compile("[\\p{IsUpper}\\p{IsLower}&]").matcher(str).find()) {
                if (i4 >= split.length / 2) {
                    break;
                }
                linkedList.clear();
            } else if ((!Pattern.compile("^(jr|sr|ph\\.?d|m\\.?d|esq)\\.?\\,?$", 2).matcher(str).find() || linkedList.size() <= 0 || !Pattern.compile("^(jr|sr|ph\\.?d|m\\.?d|esq)\\.?\\,?$", 2).matcher(linkedList.peekLast()).find()) && !Pattern.compile("^[IVX][IVX]+\\.?\\,?$", 2).matcher(str).find()) {
                linkedList.add(str);
            }
        }
        return linkedList;
    }

    private static void joinMultiWordNames(StringBuilder sb) {
        Matcher matcher = Pattern.compile("\\b(van|von|der|den|de|di|le|el)\\b\\s+", 2).matcher(sb);
        Matcher matcher2 = Pattern.compile("\\b(van|von|der|den|de|di|le|el)\\b", 2).matcher(sb);
        LinkedList linkedList = new LinkedList();
        LinkedList linkedList2 = new LinkedList();
        LinkedList linkedList3 = new LinkedList();
        LinkedList linkedList4 = new LinkedList();
        while (matcher.find() && matcher2.find()) {
            linkedList4.add(Integer.valueOf(matcher2.end()));
            linkedList3.add(Integer.valueOf(matcher2.start()));
            linkedList2.add(Integer.valueOf(matcher.end()));
            linkedList.add(Integer.valueOf(matcher.start()));
        }
        while (linkedList.size() > 0) {
            String str = sb.substring(((Integer) linkedList3.pollLast()).intValue(), ((Integer) linkedList4.pollLast()).intValue()) + "_";
            int intValue = ((Integer) linkedList.pollLast()).intValue();
            sb.delete(intValue, ((Integer) linkedList2.pollLast()).intValue());
            sb.insert(intValue, str);
        }
    }

    public static StringBuilder addDotsAfterShortNames(StringBuilder sb) {
        if (sb == null || sb.length() == 0) {
            return sb;
        }
        if (sb.length() == 1) {
            sb.append(".");
            return sb;
        }
        if (sb.charAt(1) == ' ' || sb.charAt(1) == '-') {
            sb.insert(1, ".");
        }
        Matcher matcher = Pattern.compile("\\W\\w\\W", 2).matcher(sb);
        while (matcher.find()) {
            sb.insert(matcher.start() + 1, ".");
        }
        Matcher matcher2 = Pattern.compile("\\W\\w$", 2).matcher(sb);
        if (matcher2.find()) {
            sb.insert(matcher2.end(), ".");
        }
        return sb;
    }

    public static void addDotsAfterShortNames(LinkedList<StringBuilder> linkedList) {
        Iterator<StringBuilder> it = linkedList.iterator();
        while (it.hasNext()) {
            addDotsAfterShortNames(it.next());
        }
    }
}
