package pl.edu.icm.cermine.metadata.extraction.enhancers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.json.util.JSONUtils;
import org.apache.commons.cli.HelpFormatter;
import org.jdom.Element;
import org.jdom.filter.Filter;
import org.springframework.beans.factory.support.PropertiesBeanDefinitionReader;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.2-SNAPSHOT.jar:pl/edu/icm/cermine/metadata/extraction/enhancers/Enhancers.class */
public class Enhancers {
    private static final int MAX_JUNK_AFF_LENGTH = 5;
    private static final String TAG_ABSTRACT = "abstract";
    private static final String TAG_ACCEPTED = "accepted";
    private static final String TAG_AFF = "aff";
    private static final String TAG_ARTICLE_ID = "article-id";
    private static final String TAG_ARTICLE_META = "article-meta";
    private static final String TAG_ARTICLE_TITLE = "article-title";
    private static final String TAG_AUTHOR = "author";
    private static final String TAG_CONTRIB = "contrib";
    private static final String TAG_CONTRIB_GROUP = "contrib-group";
    private static final String TAG_CONTRIB_TYPE = "contrib-type";
    private static final String TAG_DATE = "date";
    private static final String TAG_DATE_TYPE = "date-type";
    private static final String TAG_DAY = "day";
    private static final String TAG_EDITOR = "editor";
    private static final String TAG_EMAIL = "email";
    private static final String TAG_FPAGE = "fpage";
    private static final String TAG_FRONT = "front";
    private static final String TAG_HISTORY = "history";
    private static final String TAG_ISSN = "issn";
    private static final String TAG_ISSUE = "issue";
    private static final String TAG_JOURNAL_META = "journal-meta";
    private static final String TAG_JOURNAL_TITLE = "journal-title";
    private static final String TAG_JOURNAL_TITLE_GROUP = "journal-title-group";
    private static final String TAG_KWD = "kwd";
    private static final String TAG_KWD_GROUP = "kwd-group";
    private static final String TAG_LPAGE = "lpage";
    private static final String TAG_MONTH = "month";
    private static final String TAG_P = "p";
    private static final String TAG_PPUB = "ppub";
    private static final String TAG_PUB_DATE = "pub-date";
    private static final String TAG_PUB_ID_TYPE = "pub-id-type";
    private static final String TAG_PUB_TYPE = "pub-type";
    private static final String TAG_PUBLISHER = "publisher";
    private static final String TAG_PUBLISHER_NAME = "publisher-name";
    private static final String TAG_RECEIVED = "received";
    private static final String TAG_REVISED = "revised";
    private static final String TAG_STRING_NAME = "string-name";
    private static final String TAG_TITLE_GROUP = "title-group";
    private static final String TAG_VOLUME = "volume";
    private static final String TAG_YEAR = "year";

    public static List<String> getAuthorNames(Element element) {
        ArrayList arrayList = new ArrayList();
        for (Element element2 : getElements(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_CONTRIB_GROUP, TAG_CONTRIB})) {
            if ("author".equals(element2.getAttributeValue(TAG_CONTRIB_TYPE))) {
                arrayList.add(element2.getChildText(TAG_STRING_NAME));
            }
        }
        return arrayList;
    }

    public static void addArticleId(Element element, String str, String str2) {
        addValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_ARTICLE_ID}, TAG_PUB_ID_TYPE, str, str2);
    }

    public static void addAuthor(Element element, String str, List<String> list) {
        addContributor(element, cleanOther(cleanLigatures(str)), "author", list);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void addEditor(Element element, String str) {
        addContributor(element, cleanLigatures(str), "editor", null);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void addEmail(Element element, String str) {
        Element element2 = null;
        boolean z = true;
        for (Element element3 : getElements(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_CONTRIB_GROUP, TAG_CONTRIB})) {
            String[] split = element3.getChild(TAG_STRING_NAME).getText().split(" ");
            int length = split.length;
            int i = 0;
            while (true) {
                if (i < length) {
                    String str2 = split[i];
                    if (str2.length() > 2 && str.toLowerCase().contains(str2.toLowerCase())) {
                        if (element2 == null) {
                            element2 = element3;
                            break;
                        }
                        z = false;
                    }
                    i++;
                }
            }
        }
        if (element2 == null || !z) {
            return;
        }
        Element element4 = new Element(TAG_EMAIL);
        element4.setText(cleanLigatures(str));
        element2.addContent(element4);
    }

    public static void addKeyword(Element element, String str) {
        addValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_KWD_GROUP, TAG_KWD}, cleanOther(cleanLigatures(str)));
    }

    public static void setAbstract(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, "abstract", "p"}, clean(str));
    }

    public static void setAcceptedDate(Element element, String str, String str2, String str3) {
        setHistoryDate(element, TAG_ACCEPTED, str, str2, str3);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void setAffiliation(Element element, String str, String str2) {
        for (Element element2 : getElements(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_CONTRIB_GROUP, TAG_CONTRIB})) {
            List children = element2.getChildren(TAG_AFF);
            if ((str == null || str.isEmpty()) && "author".equals(element2.getAttributeValue(TAG_CONTRIB_TYPE))) {
                Element element3 = new Element(TAG_AFF);
                element3.setText(cleanLigatures(str2));
                element2.addContent(element3);
            }
            for (Object obj : children) {
                if ((obj instanceof Element) && str.equals(((Element) obj).getText())) {
                    ((Element) obj).setText(cleanLigatures(str2));
                }
            }
        }
    }

    public static void setIssue(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_ISSUE}, str);
    }

    public static void setJournal(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_JOURNAL_META, TAG_JOURNAL_TITLE_GROUP, TAG_JOURNAL_TITLE}, cleanLigatures(str));
    }

    public static void setJournalIssn(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_JOURNAL_META, "issn"}, TAG_PUB_TYPE, TAG_PPUB, str);
    }

    public static void setPages(Element element, String str, String str2) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_FPAGE}, str);
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_LPAGE}, str2);
    }

    public static void setPublishedDate(Element element, String str, String str2, String str3) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_PUB_DATE, TAG_DAY}, str);
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_PUB_DATE, "month"}, str2);
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_PUB_DATE, "year"}, str3);
    }

    public static void setPublisher(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_JOURNAL_META, "publisher", TAG_PUBLISHER_NAME}, cleanLigatures(str));
    }

    public static void setReceivedDate(Element element, String str, String str2, String str3) {
        setHistoryDate(element, TAG_RECEIVED, str, str2, str3);
    }

    public static void setRevisedDate(Element element, String str, String str2, String str3) {
        setHistoryDate(element, TAG_REVISED, str, str2, str3);
    }

    public static void setTitle(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_TITLE_GROUP, TAG_ARTICLE_TITLE}, cleanOther(cleanLigatures(str)));
    }

    public static void setVolume(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, "volume"}, str);
    }

    public static void setYear(Element element, String str) {
        setValue(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_PUB_DATE, "year"}, str);
    }

    public static void cleanAffiliations(Element element) {
        for (Element element2 : getElements(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_CONTRIB_GROUP, TAG_CONTRIB})) {
            List children = element2.getChildren(TAG_AFF);
            ArrayList arrayList = new ArrayList();
            for (Object obj : children) {
                if ((obj instanceof Element) && ((Element) obj).getText().length() < 5) {
                    arrayList.add((Element) obj);
                }
            }
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                element2.removeContent((Element) it.next());
            }
        }
    }

    protected static Element getElement(Element element, String[] strArr) {
        Element element2 = element;
        for (String str : strArr) {
            if (element2.getChild(str) == null) {
                element2.addContent(new Element(str));
            }
            element2 = element2.getChild(str);
        }
        return element2;
    }

    protected static List<Element> getElements(Element element, String[] strArr) {
        Element element2 = element;
        for (int i = 0; i < strArr.length - 1; i++) {
            String str = strArr[i];
            if (element2.getChild(str) == null) {
                element2.addContent(new Element(str));
            }
            element2 = element2.getChild(str);
        }
        ArrayList arrayList = new ArrayList();
        for (Object obj : element2.getChildren(strArr[strArr.length - 1])) {
            if (obj instanceof Element) {
                arrayList.add((Element) obj);
            }
        }
        return arrayList;
    }

    private static void addValue(Element element, String[] strArr, String str) {
        Element element2 = getElement(element, (String[]) Arrays.copyOfRange(strArr, 0, strArr.length - 1));
        Element element3 = new Element(strArr[strArr.length - 1]);
        element3.setText(str);
        element2.addContent(element3);
    }

    private static void addValue(Element element, String[] strArr, String str, String str2, String str3) {
        Element element2 = getElement(element, (String[]) Arrays.copyOfRange(strArr, 0, strArr.length - 1));
        Element element3 = new Element(strArr[strArr.length - 1]);
        element3.setText(str3);
        element3.setAttribute(str, str2);
        element2.addContent(element3);
    }

    private static void setValue(Element element, String[] strArr, String str) {
        getElement(element, strArr).setText(str);
    }

    private static void setValue(Element element, String[] strArr, String str, String str2, String str3) {
        Element element2 = getElement(element, strArr);
        element2.setText(str3);
        element2.setAttribute(str, str2);
    }

    private static void addContributor(Element element, String str, String str2, List<String> list) {
        Element element2 = getElement(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_CONTRIB_GROUP});
        Element element3 = new Element(TAG_CONTRIB);
        element3.setAttribute(TAG_CONTRIB_TYPE, str2);
        Element element4 = new Element(TAG_STRING_NAME);
        element4.setText(str);
        element3.addContent(element4);
        if (list != null) {
            for (String str3 : list) {
                Element element5 = new Element(TAG_AFF);
                element5.setText(str3);
                element3.addContent(element5);
            }
        }
        element2.addContent(element3);
    }

    private static void setHistoryDate(Element element, final String str, String str2, String str3, String str4) {
        Element element2 = getElement(element, new String[]{TAG_FRONT, TAG_ARTICLE_META, TAG_HISTORY});
        element2.removeContent(new Filter() { // from class: pl.edu.icm.cermine.metadata.extraction.enhancers.Enhancers.1
            @Override // org.jdom.filter.Filter
            public boolean matches(Object obj) {
                return (obj instanceof Element) && ((Element) obj).getName().equals("date") && str.equals(((Element) obj).getAttributeValue(Enhancers.TAG_DATE_TYPE));
            }
        });
        Element element3 = new Element("date");
        element3.setAttribute(TAG_DATE_TYPE, str);
        element2.addContent(element3);
        setValue(element3, new String[]{TAG_DAY}, str2);
        setValue(element3, new String[]{"month"}, str3);
        setValue(element3, new String[]{"year"}, str4);
    }

    private static String cleanOther(String str) {
        return str.replaceAll("[’‘]", JSONUtils.SINGLE_QUOTE).replaceAll("[–]", HelpFormatter.DEFAULT_OPT_PREFIX);
    }

    private static String cleanLigatures(String str) {
        return str.replaceAll("ﬀ", "ff").replaceAll("ﬁ", "fi").replaceAll("ﬂ", "fl").replaceAll("ﬃ", "ffi").replaceAll("ﬄ", "ffl").replaceAll("ﬅ", "ft").replaceAll("ﬆ", "st").replaceAll("æ", "ae");
    }

    private static String cleanHyphenation(String str) {
        Matcher matcher = Pattern.compile("([^-\u00ad‐‑‒–—―⁻₋−-]*\\S+)[-\u00ad‐‑‒–—―⁻₋−-]\n", 32).matcher(str.replace(PropertiesBeanDefinitionReader.CONSTRUCTOR_ARG_PREFIX, "\\$"));
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(stringBuffer, matcher.group(1));
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString().replaceAll("\n", " ").replace("\\$", PropertiesBeanDefinitionReader.CONSTRUCTOR_ARG_PREFIX);
    }

    private static String clean(String str) {
        return cleanHyphenation(cleanLigatures(str));
    }
}
