package pl.edu.icm.ceon.converters.dspace.uwb;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.XPath;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.ceon.converters.baztech.BaztechYaddaIdGenerator;
import pl.edu.icm.ceon.converters.commons.MetadataPart;
import pl.edu.icm.ceon.converters.dspace.DSpaceElementContentSource;
import pl.edu.icm.ceon.converters.dspace.HierarchyWithYExportablesInfo;
import pl.edu.icm.ceon.converters.dspace.HigherHierarchyLevelInformations;
import pl.edu.icm.ceon.converters.dspace.HigherLevelHarvester;
import pl.edu.icm.ceon.converters.dspace.IdInformation;
import pl.edu.icm.ceon.converters.dspace.MetsReader;
import pl.edu.icm.ceon.converters.dspace.SetInfromation;
import pl.edu.icm.ceon.converters.dspace.XPathHelper;
import pl.edu.icm.ceon.converters.dspace.XmlDataDownloader;
import pl.edu.icm.ceon.converters.mhp.MhpParser;
import pl.edu.icm.model.bwmeta.y.YAncestor;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.bwmeta.y.YExportable;
import pl.edu.icm.model.bwmeta.y.YId;
import pl.edu.icm.model.bwmeta.y.YName;
import pl.edu.icm.model.bwmeta.y.YStructure;

/* loaded from: input_file:pl/edu/icm/ceon/converters/dspace/uwb/UwbStyleHigherLevelHarvester.class */
public class UwbStyleHigherLevelHarvester implements HigherLevelHarvester {
    DSpaceElementContentSource elementContSource;
    String repoBaseUrl;
    HashMap<String, CsvData> csvDataMap;
    XmlDataDownloader downloader;
    static String topLevelRefernceSetXPath;
    static String subEls;
    private static final Logger log = LoggerFactory.getLogger(UwbStyleHigherLevelHarvester.class);
    static String standardRepoPathEnd = "/DRI/community-list";
    static String defaultCsvPath = "uwb_collections_example.csv";
    static String journalsWithAcceptedYears = "uwb_titles_years_list.csv";
    static HashMap<String, String> namespaceUris = new HashMap<>();
    String defaultLowLevel = "bwmeta1.level.hierarchy_Journal_Article";
    String defaultHierarchy = "bwmeta1.hierarchy-class.hierarchy_Journal";
    String defaultHighLevel = "bwmeta1.level.hierarchy_Journal_Journal";
    boolean defaultNoAncestors = false;
    boolean skipUnknownCollections = true;
    HashMap<String, Integer> forThisTitlesYearsNotEarlierThan = new HashMap<>();
    MetsReader reader = new MetsReader();
    protected ThreadLocal<SAXReader> saxReader = new ThreadLocal<SAXReader>() { // from class: pl.edu.icm.ceon.converters.dspace.uwb.UwbStyleHigherLevelHarvester.1
        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.lang.ThreadLocal
        public SAXReader initialValue() {
            SAXReader sAXReader = new SAXReader();
            sAXReader.setValidation(false);
            return sAXReader;
        }
    };

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:pl/edu/icm/ceon/converters/dspace/uwb/UwbStyleHigherLevelHarvester$CsvData.class */
    public static class CsvData {
        String id;
        String hierarchy;
        String level;
        String lowLevel;
        boolean use;

        public CsvData(String str) {
            String[] split = str.split(";");
            this.id = split[0];
            this.hierarchy = split[1];
            this.level = split[2];
            this.lowLevel = split[3];
            this.use = "true".equalsIgnoreCase(split[4]);
        }
    }

    public UwbStyleHigherLevelHarvester(String str, String str2, String str3, DSpaceElementContentSource dSpaceElementContentSource) throws FileNotFoundException, IOException {
        this.repoBaseUrl = str;
        createCsvDataMap(new FileInputStream(str2));
        createAcceptedJournalsList(new FileInputStream(str3));
        this.elementContSource = dSpaceElementContentSource;
    }

    public UwbStyleHigherLevelHarvester(String str, DSpaceElementContentSource dSpaceElementContentSource) throws FileNotFoundException, IOException {
        this.repoBaseUrl = str;
        createCsvDataMap(getClass().getResourceAsStream(defaultCsvPath));
        createAcceptedJournalsList(getClass().getResourceAsStream(journalsWithAcceptedYears));
        this.elementContSource = dSpaceElementContentSource;
    }

    void createCsvDataMap(InputStream inputStream) throws IOException {
        this.csvDataMap = new HashMap<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            CsvData csvData = new CsvData(readLine);
            this.csvDataMap.put(csvData.id, csvData);
        }
    }

    void addEarliestYearForTitle(String str, int i) {
        this.forThisTitlesYearsNotEarlierThan.put(str.toLowerCase(Locale.ENGLISH).replaceAll("[^a-z]", MhpParser.NO_TITLE), Integer.valueOf(i));
    }

    void addAcceptedNoLimitTitle(String str) {
        this.forThisTitlesYearsNotEarlierThan.put(str.toLowerCase(Locale.ENGLISH).replaceAll("[^a-z]", MhpParser.NO_TITLE), 0);
    }

    void createAcceptedJournalsList(InputStream inputStream) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split(";");
            if (split.length < 2 || split[1].isEmpty()) {
                addAcceptedNoLimitTitle(split[0]);
            } else {
                try {
                    addEarliestYearForTitle(split[0], Integer.parseInt(split[1]));
                } catch (Exception e) {
                    log.error(e.getMessage(), e);
                }
            }
        }
    }

    @Override // pl.edu.icm.ceon.converters.dspace.HigherLevelHarvester
    public boolean testIfShouldBePassed(MetadataPart metadataPart) {
        ArrayList arrayList = new ArrayList();
        String str = null;
        for (YElement yElement : metadataPart.getEntities()) {
            if (yElement instanceof YElement) {
                YElement yElement2 = yElement;
                YStructure structure = yElement2.getStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
                if (structure.getCurrent().getLevel().equals("bwmeta1.level.hierarchy_Journal_Journal")) {
                    Iterator it = yElement2.getNames().iterator();
                    while (it.hasNext()) {
                        arrayList.add(((YName) it.next()).getText());
                    }
                }
                if (structure.getCurrent().getLevel().equals("bwmeta1.level.hierarchy_Journal_Year")) {
                    Iterator it2 = yElement2.getNames().iterator();
                    while (it2.hasNext()) {
                        str = ((YName) it2.next()).getText();
                    }
                }
            }
        }
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            String replaceAll = ((String) it3.next()).toLowerCase(Locale.ENGLISH).replaceAll("[^a-z]", MhpParser.NO_TITLE);
            if (this.forThisTitlesYearsNotEarlierThan.containsKey(replaceAll)) {
                int intValue = this.forThisTitlesYearsNotEarlierThan.get(replaceAll).intValue();
                if (intValue == 0) {
                    return true;
                }
                try {
                    return intValue <= Integer.parseInt(str.substring(0, 4));
                } catch (Exception e) {
                    log.error(e.getMessage(), e);
                }
            }
        }
        return false;
    }

    @Override // pl.edu.icm.ceon.converters.dspace.HigherLevelHarvester
    public void setDownloader(XmlDataDownloader xmlDataDownloader) {
        this.downloader = xmlDataDownloader;
    }

    YElement parseElement(String str, HierarchyWithYExportablesInfo hierarchyWithYExportablesInfo, HierarchyWithYExportablesInfo hierarchyWithYExportablesInfo2) throws IOException {
        String str2 = null;
        if (hierarchyWithYExportablesInfo2 != null) {
            str2 = hierarchyWithYExportablesInfo2.dspaceId;
        }
        IdInformation idInformation = new IdInformation();
        idInformation.id = str;
        List<YExportable> read = this.reader.read(new InputStreamReader(this.downloader.getMets(str)), idInformation, str2 == null ? null : new SetInfromation(Collections.singletonList(str2)), this.elementContSource);
        hierarchyWithYExportablesInfo.elements.addAll(read);
        return read.get(0);
    }

    void createHierarchyWithYExportablesInfo(Element element, HierarchyWithYExportablesInfo hierarchyWithYExportablesInfo, HigherHierarchyLevelInformations higherHierarchyLevelInformations) throws IOException {
        String str;
        String str2;
        String str3;
        boolean z;
        String[] split = element.attributeValue("url").split("/");
        String str4 = split[split.length - 3] + "/" + split[split.length - 2];
        CsvData csvData = this.csvDataMap.get(str4);
        if (csvData != null) {
            str = csvData.hierarchy;
            str3 = csvData.level;
            z = csvData.use;
            str2 = csvData.lowLevel;
        } else if (hierarchyWithYExportablesInfo != null) {
            str = hierarchyWithYExportablesInfo.hierarchy;
            str2 = hierarchyWithYExportablesInfo.defaultLowLevel;
            z = hierarchyWithYExportablesInfo.getAncestors() == null;
            if ("bwmeta1.level.hierarchy_Journal_Publisher".equalsIgnoreCase(hierarchyWithYExportablesInfo.level)) {
                str3 = "bwmeta1.level.hierarchy_Journal_Journal";
                z = false;
            } else {
                str3 = "bwmeta1.level.hierarchy_Journal_Journal".equalsIgnoreCase(hierarchyWithYExportablesInfo.level) ? "bwmeta1.level.hierarchy_Journal_Volume" : "bwmeta1.hierarchy-class.hierarchy_Journal".equalsIgnoreCase(str) ? "bwmeta1.level.hierarchy_Journal_Number" : "bwmeta1.level.hierarchy_Book_Book".equalsIgnoreCase(hierarchyWithYExportablesInfo.level) ? "bwmeta1.level.hierarchy_Book_Part" : "bwmeta1.level.hierarchy_Book_Part".equalsIgnoreCase(hierarchyWithYExportablesInfo.level) ? "bwmeta1.level.hierarchy_Book_Part" : hierarchyWithYExportablesInfo.level;
            }
        } else {
            if (this.skipUnknownCollections) {
                return;
            }
            str = this.defaultHierarchy;
            str2 = this.defaultLowLevel;
            str3 = this.defaultHighLevel;
            z = this.defaultNoAncestors;
        }
        HierarchyWithYExportablesInfo hierarchyWithYExportablesInfo2 = new HierarchyWithYExportablesInfo(str, str3, str2, str4);
        if (z) {
            hierarchyWithYExportablesInfo2.setAncestors(null);
        } else {
            if (csvData == null && hierarchyWithYExportablesInfo != null && !"bwmeta1.level.hierarchy_Journal_Publisher".equalsIgnoreCase(hierarchyWithYExportablesInfo.level)) {
                hierarchyWithYExportablesInfo2.getAncestors().addAll(hierarchyWithYExportablesInfo.getAncestors());
                hierarchyWithYExportablesInfo2.elements.addAll(hierarchyWithYExportablesInfo.elements);
            }
            YElement parseElement = parseElement(str4, hierarchyWithYExportablesInfo2, hierarchyWithYExportablesInfo);
            if ("bwmeta1.level.hierarchy_Journal_Volume".equalsIgnoreCase(str3)) {
                String str5 = null;
                Iterator it = parseElement.getNames().iterator();
                while (it.hasNext()) {
                    Matcher matcher = Pattern.compile("(\\d\\d\\d\\d)(/\\d\\d\\d\\d)?").matcher(((YName) it.next()).getText());
                    if (matcher.find()) {
                        str5 = matcher.group();
                    }
                }
                if (str5 != null) {
                    YExportable yElement = new YElement();
                    ArrayList<YAncestor> ancestors = hierarchyWithYExportablesInfo.getAncestors();
                    yElement.setId(ancestors.get(ancestors.size() - 1).getIdentity() + BaztechYaddaIdGenerator.DEFAULT_PART_SEPARATOR + str5.replaceAll("[^\\d]", BaztechYaddaIdGenerator.SPACE_SUBSTITUTE));
                    yElement.addName(new YName(str5));
                    yElement.addStructure(hierarchyWithYExportablesInfo2.getStructure("bwmeta1.level.hierarchy_Journal_Year", null));
                    YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Year", yElement.getId());
                    Iterator it2 = yElement.getNames().iterator();
                    while (it2.hasNext()) {
                        yAncestor.addName((YName) it2.next());
                    }
                    hierarchyWithYExportablesInfo2.getAncestors().add(yAncestor);
                    hierarchyWithYExportablesInfo2.elements.add(hierarchyWithYExportablesInfo2.elements.size() - 2, yElement);
                }
            }
            parseElement.addStructure(hierarchyWithYExportablesInfo2.getStructure(str3, null));
            YAncestor yAncestor2 = new YAncestor(str3, parseElement.getId());
            Iterator it3 = parseElement.getNames().iterator();
            while (it3.hasNext()) {
                yAncestor2.addName((YName) it3.next());
            }
            Iterator it4 = parseElement.getIds().iterator();
            while (it4.hasNext()) {
                yAncestor2.addId((YId) it4.next());
            }
            hierarchyWithYExportablesInfo2.getAncestors().add(yAncestor2);
        }
        higherHierarchyLevelInformations.map.put(str4, hierarchyWithYExportablesInfo2);
        XPath createXPath = DocumentHelper.createXPath(subEls);
        createXPath.setNamespaceURIs(namespaceUris);
        Iterator<Element> it5 = XPathHelper.getElementsFromExpression(createXPath, element).iterator();
        while (it5.hasNext()) {
            createHierarchyWithYExportablesInfo(it5.next(), hierarchyWithYExportablesInfo2, higherHierarchyLevelInformations);
        }
    }

    @Override // pl.edu.icm.ceon.converters.dspace.HigherLevelHarvester
    public HigherHierarchyLevelInformations harvestHigherLevels() throws IOException {
        InputStream inputStream = null;
        try {
            try {
                inputStream = this.downloader.getDataForUrl(this.repoBaseUrl + standardRepoPathEnd);
                Document read = this.saxReader.get().read(inputStream);
                XPath createXPath = DocumentHelper.createXPath(topLevelRefernceSetXPath);
                createXPath.setNamespaceURIs(namespaceUris);
                HigherHierarchyLevelInformations higherHierarchyLevelInformations = new HigherHierarchyLevelInformations();
                Iterator<Element> it = XPathHelper.getElementsFromExpression(createXPath, read.getRootElement()).iterator();
                while (it.hasNext()) {
                    createHierarchyWithYExportablesInfo(it.next(), null, higherHierarchyLevelInformations);
                }
                try {
                    inputStream.close();
                } catch (Exception e) {
                }
                return higherHierarchyLevelInformations;
            } catch (Throwable th) {
                try {
                    inputStream.close();
                } catch (Exception e2) {
                }
                throw th;
            }
        } catch (DocumentException e3) {
            throw new IOException((Throwable) e3);
        }
    }

    static {
        namespaceUris.put("dri", "http://di.tamu.edu/DRI/1.0/");
        topLevelRefernceSetXPath = "dri:body/dri:div/dri:referenceSet/dri:reference";
        subEls = "dri:referenceSet/dri:reference";
    }
}
