package eu.eudml.processing.node;

import eu.eudml.common.MSC.MSC2010Utils;
import eu.eudml.common.XmlFilterUtils;
import eu.eudml.processing.node.YElementToRelationServiceWriter;
import eu.eudml.service.relation.EudmlRelationService;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.synat.api.services.index.relations.model.RelationIndexDocument;
import pl.edu.icm.yadda.bwmeta.model.YCategoryRef;
import pl.edu.icm.yadda.bwmeta.model.YContributor;
import pl.edu.icm.yadda.bwmeta.model.YDate;
import pl.edu.icm.yadda.bwmeta.model.YDescription;
import pl.edu.icm.yadda.bwmeta.model.YElement;
import pl.edu.icm.yadda.process.model.EnrichedPayload;

/* loaded from: input_file:eu/eudml/processing/node/HierarchyToRSHelper.class */
public abstract class HierarchyToRSHelper {
    protected EudmlRelationService service;
    protected Set<String> mscCodes;
    private static final Pattern yearPat = Pattern.compile("(\\d+)(?:[-/](\\d+))?");
    private static final Pattern pagesPat = Pattern.compile("(\\d+|[ixvlcmIXVLCM]+)(?:-(?:\\d+|[ixvlcmIXVLCM]+))?");
    private static final Pattern DIACRITICAL_PATTERN = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    private static final Pattern TAGS_PATTERN = Pattern.compile("<.*?>");
    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    protected final Logger log = LoggerFactory.getLogger(getClass());
    protected HashSet<String> agregatedDocModified = new HashSet<>();

    public HierarchyToRSHelper(EudmlRelationService eudmlRelationService, Set<String> set) {
        this.mscCodes = null;
        this.service = eudmlRelationService;
        this.mscCodes = set;
    }

    public abstract List<RelationIndexDocument> agregateInfo();

    public abstract List<RelationIndexDocument> process(EnrichedPayload<YElement>[] enrichedPayloadArr, YElementToRelationServiceWriter.Cache cache);

    /* JADX INFO: Access modifiers changed from: protected */
    public String getAbstract(YElement yElement) {
        YDescription oneDescription = yElement.getOneDescription();
        return oneDescription != null ? XmlFilterUtils.onlyTextAndMML(oneDescription.getRichText()) : "";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String> getMSC(YElement yElement) {
        ArrayList arrayList = new ArrayList(yElement.getCategoryRefs().size());
        for (YCategoryRef yCategoryRef : yElement.getCategoryRefs()) {
            if (yCategoryRef.getClassification().equals("bwmeta1.category-class.MSC") || yCategoryRef.getClassification().equals("bwmeta1.category-class.MSC_2010")) {
                for (String str : yCategoryRef.getCode().split("\\s")) {
                    try {
                        String unify = MSC2010Utils.unify(str);
                        if (!unify.equals(str)) {
                            this.log.debug("FIXING MSC {} -> {}", str, unify);
                        }
                        if (this.mscCodes.contains(unify)) {
                            arrayList.add(unify);
                        } else {
                            this.log.warn("Code {} not in MSC2010. Classification: {}", unify, yCategoryRef.getClassification());
                        }
                    } catch (MSC2010Utils.WrongMSCFormatException e) {
                        this.log.error(e.getMessage());
                    }
                }
            }
        }
        Collections.sort(arrayList);
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getAuthors(YElement yElement) {
        ArrayList arrayList = new ArrayList();
        for (YContributor yContributor : yElement.getContributors()) {
            if (yContributor.getRole().equals("author")) {
                arrayList.add(yContributor.getOneName().getText());
            }
        }
        return StringUtils.join(arrayList, ", ");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] makeAncestorsId(String str) {
        return str.length() < 5 ? new String[0] : str.endsWith("-XX") ? new String[0] : str.endsWith("xx") ? new String[]{str.substring(0, 2) + "-XX"} : new String[]{str.substring(0, 3) + "xx", str.substring(0, 2) + "-XX"};
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getDate(YElement yElement) {
        YDate date = yElement.getDate("published");
        return unifyYear(date != null ? date.getText() : "");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String unifyYear(String str) {
        Matcher matcher = yearPat.matcher(str);
        if (!matcher.matches()) {
            return "[unknown]";
        }
        int parseInt = Integer.parseInt(matcher.group(1));
        if (matcher.group(2) == null) {
            return String.format("%d", Integer.valueOf(parseInt));
        }
        int parseInt2 = Integer.parseInt(matcher.group(2));
        if (parseInt2 < 100) {
            parseInt2 = ((parseInt / 100) * 100) + parseInt2;
        }
        return String.format("%d/%d", Integer.valueOf(parseInt), Integer.valueOf(parseInt2));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String yearSetToString(Set<String> set) {
        if (set.isEmpty()) {
            return "";
        }
        ArrayList arrayList = new ArrayList(set.size());
        for (String str : set) {
            Matcher matcher = yearPat.matcher(str);
            if (matcher.matches()) {
                int parseInt = Integer.parseInt(matcher.group(1));
                int parseInt2 = matcher.group(2) == null ? parseInt : Integer.parseInt(matcher.group(2));
                if (parseInt2 < 100) {
                    parseInt2 = ((parseInt / 100) * 100) + parseInt2;
                }
                int i = parseInt2 - parseInt;
                if (i < 0 || i >= 100) {
                    this.log.error("Wrong/strange year range: {}", str);
                } else {
                    for (int i2 = parseInt; i2 <= parseInt2; i2++) {
                        arrayList.add(Integer.valueOf(i2));
                    }
                }
            } else if (!str.equals("[unknown]")) {
                this.log.error("Wrong year range format: {}", str);
            }
        }
        Collections.sort(arrayList);
        if (arrayList.size() < 1) {
            return "";
        }
        int intValue = ((Integer) arrayList.get(0)).intValue();
        int intValue2 = ((Integer) arrayList.get(0)).intValue();
        ArrayList arrayList2 = new ArrayList();
        for (int i3 = 1; i3 < arrayList.size(); i3++) {
            if (((Integer) arrayList.get(i3)).intValue() > intValue2 + 1) {
                if (intValue == intValue2) {
                    arrayList2.add(Integer.toString(intValue));
                } else {
                    arrayList2.add(Integer.toString(intValue) + "-" + intValue2);
                }
                intValue = ((Integer) arrayList.get(i3)).intValue();
            }
            intValue2 = ((Integer) arrayList.get(i3)).intValue();
        }
        if (intValue == intValue2) {
            arrayList2.add(Integer.toString(intValue));
        } else {
            arrayList2.add(Integer.toString(intValue) + "-" + intValue2);
        }
        return StringUtils.join(arrayList2, ", ");
    }

    protected String deAccent(String str) {
        return DIACRITICAL_PATTERN.matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll("");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String prepareSortKey(String str) {
        return deAccent(WHITESPACE_PATTERN.matcher(TAGS_PATTERN.matcher(str).replaceAll("")).replaceAll(" ").trim()).toUpperCase();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getPageSortKey(String str, String str2) {
        if (str == null) {
            return "";
        }
        Matcher matcher = pagesPat.matcher(str);
        int i = 0;
        if (matcher.matches()) {
            i = Integer.parseInt(matcher.group(1));
        } else {
            this.log.debug("String {} do not match pages pattern", str);
        }
        return String.format("%06d%s", Integer.valueOf(i), prepareSortKey(str2));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String copeStringNull(String str) {
        return "null".equals(str) ? "" : str;
    }
}
