package pl.edu.icm.ceon.converters.pan;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.ceon.commons.Pair;
import pl.edu.icm.ceon.converters.baztech.BaztechConstants;
import pl.edu.icm.ceon.converters.baztech.BaztechYaddaIdGenerator;
import pl.edu.icm.ceon.converters.commons.ContentPart;
import pl.edu.icm.ceon.converters.commons.DataBatch;
import pl.edu.icm.ceon.converters.commons.ElementContentSource;
import pl.edu.icm.ceon.converters.commons.IMetadataSource;
import pl.edu.icm.ceon.converters.commons.MetadataPart;
import pl.edu.icm.ceon.converters.mhp.MhpParser;
import pl.edu.icm.model.bwmeta.utils.IdGenerator;
import pl.edu.icm.model.bwmeta.y.YAncestor;
import pl.edu.icm.model.bwmeta.y.YAttribute;
import pl.edu.icm.model.bwmeta.y.YContentFile;
import pl.edu.icm.model.bwmeta.y.YContributor;
import pl.edu.icm.model.bwmeta.y.YCurrent;
import pl.edu.icm.model.bwmeta.y.YDescription;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.bwmeta.y.YExportable;
import pl.edu.icm.model.bwmeta.y.YId;
import pl.edu.icm.model.bwmeta.y.YName;
import pl.edu.icm.model.bwmeta.y.YStructure;
import pl.edu.icm.model.transformers.utils.AncestorsManagement;

/* loaded from: input_file:pl/edu/icm/ceon/converters/pan/PanMetadataAndContentSource.class */
public class PanMetadataAndContentSource implements IMetadataSource, ElementContentSource {
    private static final Logger log = LoggerFactory.getLogger(PanMetadataAndContentSource.class);
    String collection;
    Object timeBlocker = new Object();
    int batchSize = 1024;
    HashMap<String, ContentInfo> idsToContents = new HashMap<>();
    IdGenerator igen = new IdGenerator();
    Random rnd = new Random();
    String cacheDir = null;
    long lastTime = 0;
    long minStep = 500;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:pl/edu/icm/ceon/converters/pan/PanMetadataAndContentSource$ContentInfo.class */
    public static class ContentInfo {
        URL url;
        YContentFile cf;
        int fPage = -1;
        int lPage = -1;

        public ContentInfo(URL url, YContentFile yContentFile) {
            this.url = url;
            this.cf = yContentFile;
        }
    }

    /* loaded from: input_file:pl/edu/icm/ceon/converters/pan/PanMetadataAndContentSource$Token.class */
    public static class Token implements Serializable {
        ArrayDeque<Pair<String, String>> articlesToProc;

        public Token(String str) throws IOException {
            Properties properties = new Properties();
            Properties properties2 = new Properties();
            properties.load(new InputStreamReader(getClass().getResourceAsStream("urls_names.properties"), "UTF-8"));
            for (String str2 : properties.stringPropertyNames()) {
                properties2.put(properties.getProperty(str2).toLowerCase().trim(), str2);
            }
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(str.toLowerCase() + ".list"), "UTF-8"));
                ArrayList arrayList = new ArrayList();
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        this.articlesToProc = new ArrayDeque<>(arrayList);
                        return;
                    } else {
                        String trim = readLine.toLowerCase().trim();
                        if (properties2.getProperty(trim) != null) {
                            arrayList.add(new Pair(trim, properties2.getProperty(trim)));
                        }
                    }
                }
            } catch (Exception e) {
                PanMetadataAndContentSource.log.error("Wrong collection name");
                throw e;
            }
        }

        public Pair<String, String> getNext() {
            return this.articlesToProc.pollFirst();
        }

        public void returnToQ(Pair<String, String> pair) {
            this.articlesToProc.push(pair);
        }
    }

    public PanMetadataAndContentSource(String str) {
        this.collection = str;
    }

    public int getBatchSize() {
        return this.batchSize;
    }

    public void setBatchSize(int i) {
        this.batchSize = i;
    }

    public List<MetadataPart> processJournal(String str, String str2) throws IOException {
        ArrayList arrayList = new ArrayList();
        Document document = null;
        while (document == null) {
            synchronized (this.timeBlocker) {
                try {
                    this.timeBlocker.wait(this.rnd.nextInt(1000) + 1000);
                } catch (InterruptedException e) {
                    log.error(e.getMessage(), e);
                }
            }
            try {
                document = Jsoup.connect(str2).timeout(20000).get();
            } catch (SocketTimeoutException e2) {
                document = null;
            }
        }
        String str3 = null;
        String str4 = null;
        Iterator it = document.select("tr:contains(issn)").iterator();
        while (it.hasNext()) {
            Iterator it2 = ((Element) it.next()).children().iterator();
            while (it2.hasNext()) {
                Element element = (Element) it2.next();
                if (!element.text().toLowerCase().contains("issn")) {
                    Elements elementsByTag = element.getElementsByTag("img");
                    if (elementsByTag.isEmpty()) {
                        str4 = element.text().trim().replace(" ", MhpParser.NO_TITLE);
                    } else {
                        Iterator it3 = elementsByTag.iterator();
                        while (it3.hasNext()) {
                            Element element2 = (Element) it3.next();
                            if (element2.hasAttr("src")) {
                                str3 = element2.absUrl("src");
                            }
                        }
                    }
                }
            }
        }
        String str5 = null;
        Iterator it4 = document.select("tr:contains(publisher)").iterator();
        while (it4.hasNext()) {
            Iterator it5 = ((Element) it4.next()).children().iterator();
            while (it5.hasNext()) {
                Element element3 = (Element) it5.next();
                if (!element3.text().toLowerCase().contains("publisher")) {
                    str5 = element3.text().trim();
                }
            }
        }
        String str6 = null;
        Iterator it6 = document.select("h2:contains(Redakcja)").iterator();
        while (it6.hasNext()) {
            str6 = ((Element) it6.next()).parent().text();
        }
        String str7 = null;
        Iterator it7 = document.select("h2:contains(Kontakt)").iterator();
        while (it7.hasNext()) {
            str7 = ((Element) it7.next()).parent().text();
        }
        String str8 = "bwmeta1.element.pan-" + new URL(str2).getHost().split("\\.")[0];
        YExportable yElement = new YElement(str8);
        if (StringUtils.isNotBlank(str)) {
            yElement.addName(new YName(str));
            System.out.println(str);
        }
        if (StringUtils.isNotBlank(str4)) {
            yElement.addId(new YId("bwmeta1.id-class.ISSN", str4));
            System.out.println(str4);
        }
        if (StringUtils.isNotBlank(str5)) {
            YContributor yContributor = new YContributor("publisher", true);
            yContributor.addName(new YName(str5));
            yElement.addContributor(yContributor);
            System.out.println(str5);
        }
        if (StringUtils.isNotBlank(str2)) {
            yElement.addAttribute("journal.www", str2);
            System.out.println(str2);
        }
        if (StringUtils.isNotBlank(str6)) {
            YContributor yContributor2 = new YContributor("editorial-office", true);
            yContributor2.addDescription(new YDescription().setText(str6));
            if (StringUtils.isNotBlank(str7)) {
                yContributor2.addAttribute("contact", str7);
                System.out.println(str7);
            }
            yElement.addContributor(yContributor2);
            System.out.println(str6);
        }
        if (StringUtils.isNotBlank(str3)) {
            ArrayList arrayList2 = new ArrayList();
            String[] split = str3.split("/");
            YContentFile yContentFile = new YContentFile("journal-image", "thumbnail", "image/jpeg", arrayList2);
            this.idsToContents.put(str8, new ContentInfo(new URL(str3), yContentFile));
            yContentFile.addName(new YName(split[split.length - 1].replace('%', '-')).setType("file-name"));
            System.out.println("adding journal image: " + str3);
        }
        YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
        yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Journal"));
        yElement.addStructure(yStructure);
        Stack stack = new Stack();
        Stack<YExportable> stack2 = new Stack<>();
        YAncestor yAncestor = new YAncestor("bwmeta1.level.hierarchy_Journal_Journal", str8);
        AncestorsManagement.copyDataToAncestor(yElement, yAncestor);
        stack.push(yAncestor);
        stack2.push(yElement);
        Iterator it8 = document.select("ul.latestnews").iterator();
        while (it8.hasNext()) {
            Iterator it9 = ((Element) it8.next()).getElementsByTag("li").iterator();
            while (it9.hasNext()) {
                Iterator it10 = ((Element) it9.next()).getElementsByTag("a").iterator();
                while (it10.hasNext()) {
                    Element element4 = (Element) it10.next();
                    String str9 = str8;
                    boolean z = false;
                    boolean z2 = false;
                    String absUrl = element4.absUrl("href");
                    String[] split2 = element4.text().split("/");
                    Matcher matcher = Pattern.compile("[Nn][Oo]\\s*(\\w*)").matcher(split2[0]);
                    String group = matcher.find() ? matcher.group(1) : split2[0].trim().replaceAll("^No", MhpParser.NO_TITLE).trim();
                    String str10 = null;
                    Matcher matcher2 = Pattern.compile("[Vv][oO][lL]\\s*(\\w*)").matcher(split2[0]);
                    if (matcher2.find()) {
                        str10 = matcher2.group(1);
                    }
                    String str11 = null;
                    if (split2.length > 1) {
                        str11 = split2[1].trim();
                    }
                    if (StringUtils.isNotBlank(str11)) {
                        if (str11.contains(BaztechConstants.LANG_SEPARATOR)) {
                            str11 = str11.replaceAll(BaztechConstants.LANG_SEPARATOR, BaztechYaddaIdGenerator.DEFAULT_PART_SEPARATOR);
                        }
                        String replaceAll = str11.replaceAll("\\s", MhpParser.NO_TITLE);
                        String str12 = str9 + "-yid-" + replaceAll.trim();
                        YExportable yElement2 = new YElement(str12);
                        str9 = str12;
                        yElement2.addName(new YName(replaceAll));
                        YStructure yStructure2 = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
                        yStructure2.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Year"));
                        yStructure2.setAncestors(new ArrayList(stack));
                        yElement2.addStructure(yStructure2);
                        YAncestor yAncestor2 = new YAncestor("bwmeta1.level.hierarchy_Journal_Year", str12);
                        AncestorsManagement.copyDataToAncestor(yElement2, yAncestor2);
                        stack.push(yAncestor2);
                        stack2.push(yElement2);
                        System.out.println("\t" + replaceAll);
                        z = true;
                    }
                    if (StringUtils.isNotBlank(str10)) {
                        String str13 = str9 + "-vid-" + str10.trim();
                        YExportable yElement3 = new YElement(str13);
                        str9 = str13;
                        yElement3.addName(new YName(str10));
                        YStructure yStructure3 = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
                        yStructure3.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Volume"));
                        yStructure3.setAncestors(new ArrayList(stack));
                        yElement3.addStructure(yStructure3);
                        YAncestor yAncestor3 = new YAncestor("bwmeta1.level.hierarchy_Journal_Volume", str13);
                        AncestorsManagement.copyDataToAncestor(yElement3, yAncestor3);
                        stack.push(yAncestor3);
                        stack2.push(yElement3);
                        System.out.println("\t" + str10);
                        z2 = true;
                    }
                    System.out.println();
                    System.out.println("\t" + group);
                    String str14 = str9 + "-iid-" + group.trim().replaceAll("\\s", BaztechYaddaIdGenerator.SPACE_SUBSTITUTE);
                    YExportable yElement4 = new YElement(str14);
                    yElement4.addName(new YName(group));
                    YStructure yStructure4 = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
                    yStructure4.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Number"));
                    yStructure4.setAncestors(new ArrayList(stack));
                    yElement4.addStructure(yStructure4);
                    System.out.println("\t" + absUrl);
                    yElement4.addAttribute(new YAttribute("journal.contents.url", str2));
                    YAncestor yAncestor4 = new YAncestor("bwmeta1.level.hierarchy_Journal_Number", str14);
                    AncestorsManagement.copyDataToAncestor(yElement4, yAncestor4);
                    stack.push(yAncestor4);
                    stack2.push(yElement4);
                    arrayList.addAll(parseIssue(absUrl, str14, stack, stack2));
                    stack.pop();
                    if (z) {
                        stack.pop();
                        stack2.pop();
                    }
                    if (z2) {
                        stack.pop();
                        stack2.pop();
                    }
                }
            }
        }
        return arrayList;
    }

    void addAuthors(YElement yElement, String str) {
        String trim = str.trim();
        if (trim.endsWith(":")) {
            trim = trim.substring(0, trim.length() - 1);
        }
        ArrayList arrayList = new ArrayList();
        String[] split = trim.split(BaztechConstants.LANG_SEPARATOR);
        for (int i = 0; i < split.length - 1; i++) {
            if (StringUtils.isNotBlank(split[i])) {
                arrayList.add(split[i].trim());
            }
        }
        if (split.length >= 1) {
            String str2 = split[split.length - 1];
            if (StringUtils.isNotBlank(str2)) {
                if (str2.toLowerCase().contains(" and ")) {
                    String[] split2 = str2.split(" and ");
                    for (int i2 = 0; i2 < split2.length - 1; i2++) {
                        if (StringUtils.isNotBlank(split2[i2])) {
                            arrayList.add(split2[i2].trim());
                        }
                    }
                } else {
                    arrayList.add(str2.trim());
                }
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            yElement.addContributor((YContributor) new YContributor("author", false).addName(new YName((String) it.next()).setType("canonical")));
        }
    }

    public List<MetadataPart> parseIssue(String str, String str2, Collection<YAncestor> collection, Stack<YExportable> stack) throws IOException {
        ArrayList arrayList = new ArrayList();
        Document document = null;
        while (document == null) {
            synchronized (this.timeBlocker) {
                try {
                    this.timeBlocker.wait(this.rnd.nextInt(1000) + 1000);
                } catch (InterruptedException e) {
                    log.error(e.getMessage(), e);
                }
            }
            try {
                document = Jsoup.connect(str).timeout(20000).get();
            } catch (SocketTimeoutException e2) {
                document = null;
            }
        }
        int i = 1;
        ContentInfo contentInfo = null;
        Iterator it = document.select("div.item-page ol>li").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String str3 = null;
            String str4 = null;
            String str5 = null;
            String str6 = null;
            Iterator it2 = element.getElementsByTag("em").iterator();
            while (it2.hasNext()) {
                str3 = ((Element) it2.next()).text();
            }
            Iterator it3 = element.getElementsByTag("a").iterator();
            while (it3.hasNext()) {
                Element element2 = (Element) it3.next();
                str6 = element2.absUrl("href");
                for (TextNode textNode : element2.textNodes()) {
                    if (str4 == null) {
                        str4 = textNode.text();
                    } else {
                        str5 = textNode.text();
                    }
                }
            }
            String str7 = str2 + "-art-" + this.igen.generateIdSuffix(i);
            i++;
            YElement yElement = new YElement(str7);
            if (StringUtils.isNotBlank(str4)) {
                yElement.addName(new YName(str4));
                System.out.println("\t\t" + str4);
            }
            if (StringUtils.isNotBlank(str5)) {
                yElement.addName(new YName(str5).setType("alternative"));
                System.out.println("\t\t" + str5);
            }
            if (StringUtils.isNotBlank(str3)) {
                System.out.println("\t\t" + str3);
                addAuthors(yElement, str3);
            }
            ContentInfo contentInfo2 = null;
            if (StringUtils.isNotBlank(str6)) {
                ArrayList arrayList2 = new ArrayList();
                URL url = new URL(str6);
                String[] split = str6.split("/");
                String replaceAll = split[split.length - 1].replaceAll("[^\\w-#]", MhpParser.NO_TITLE);
                String str8 = null;
                if (replaceAll.contains("#")) {
                    String[] split2 = replaceAll.split("#");
                    replaceAll = split2[0];
                    str8 = split2[1];
                }
                if (replaceAll.endsWith("pdf")) {
                    replaceAll = replaceAll.substring(0, replaceAll.length() - 3) + ".pdf";
                }
                YContentFile yContentFile = new YContentFile(replaceAll, "full-text", "application/pdf", arrayList2);
                yContentFile.addName(new YName(yContentFile.getId()).setType("file-name"));
                contentInfo2 = new ContentInfo(url, yContentFile);
                this.idsToContents.put(str7, contentInfo2);
                System.out.println("\t\t" + str6);
                if (StringUtils.isNotBlank(str8)) {
                    for (String str9 : str8.split("&")) {
                        if (str9.contains("page")) {
                            try {
                                int parseInt = Integer.parseInt(str9.replaceAll("[^0-9]", MhpParser.NO_TITLE));
                                contentInfo2.fPage = parseInt;
                                int i2 = parseInt - 1;
                                if (i2 > 0 && contentInfo != null) {
                                    contentInfo.lPage = i2;
                                }
                            } catch (Exception e3) {
                            }
                        }
                    }
                }
            }
            contentInfo = contentInfo2;
            YStructure yStructure = new YStructure("bwmeta1.hierarchy-class.hierarchy_Journal");
            yStructure.setCurrent(new YCurrent("bwmeta1.level.hierarchy_Journal_Article"));
            yStructure.setAncestors(new ArrayList(collection));
            yElement.addStructure(yStructure);
            MetadataPart metadataPart = new MetadataPart();
            metadataPart.setEntities(new ArrayList(stack));
            metadataPart.addEntity(yElement);
            arrayList.add(metadataPart);
        }
        return arrayList;
    }

    public boolean doKnowsAboutFiles() {
        return false;
    }

    public boolean isRandomAccessSupported() {
        return false;
    }

    public boolean isSequentialAccessSupported() {
        return true;
    }

    /* renamed from: getData, reason: merged with bridge method [inline-methods] */
    public MetadataPart m168getData(String str) {
        throw new UnsupportedOperationException("This converter does not support random access");
    }

    public DataBatch<MetadataPart> getBatch(Date date, Date date2) {
        if (date != null || date2 != null) {
            throw new UnsupportedOperationException("Date ranges are not supported");
        }
        try {
            return getBatch(new Token(this.collection));
        } catch (IOException e) {
            log.error(e.getLocalizedMessage(), e);
            return null;
        }
    }

    public DataBatch<MetadataPart> getBatch(Serializable serializable) {
        if (!(serializable instanceof Token)) {
            throw new ClassCastException();
        }
        Token token = (Token) serializable;
        ArrayList arrayList = new ArrayList();
        Pair<String, String> pair = null;
        while (arrayList.size() < this.batchSize) {
            Pair<String, String> next = token.getNext();
            pair = next;
            if (next == null) {
                break;
            }
            try {
                arrayList.addAll(processJournal((String) pair.getFirst(), (String) pair.getSecond()));
            } catch (IOException e) {
                log.error(e.getMessage(), e);
            }
        }
        DataBatch<MetadataPart> dataBatch = new DataBatch<>();
        if (pair != null) {
            dataBatch.setResumptionToken(token);
        }
        dataBatch.setPayload(arrayList);
        return dataBatch;
    }

    public void setCacheDir(String str) {
        this.cacheDir = str;
    }

    String urlToCacheFileName(URL url) {
        return this.cacheDir + File.separator + url.getFile();
    }

    byte[] getFile(URL url) throws IOException {
        byte[] bArr = null;
        if (this.cacheDir != null && new File(urlToCacheFileName(url)).exists()) {
            return IOUtils.toByteArray(new FileInputStream(urlToCacheFileName(url)));
        }
        while (bArr == null) {
            long currentTimeMillis = System.currentTimeMillis();
            if (currentTimeMillis - this.lastTime < this.minStep) {
                try {
                    synchronized (this.timeBlocker) {
                        this.timeBlocker.wait(this.minStep);
                    }
                } catch (InterruptedException e) {
                    log.error(e.getMessage(), e);
                }
            }
            try {
                URLConnection openConnection = url.openConnection();
                openConnection.setConnectTimeout(2000);
                openConnection.setReadTimeout(2000);
                bArr = IOUtils.toByteArray(openConnection.getInputStream());
            } catch (SocketTimeoutException e2) {
                bArr = null;
            }
            this.lastTime = currentTimeMillis;
        }
        if (this.cacheDir != null) {
            File file = new File(urlToCacheFileName(url));
            file.getParentFile().mkdirs();
            IOUtils.write(bArr, new FileOutputStream(file));
        }
        return bArr;
    }

    public ContentPart getDataForElement(String str) {
        ContentInfo contentInfo = this.idsToContents.get(str);
        if (contentInfo == null) {
            return null;
        }
        ContentPart contentPart = new ContentPart();
        ContentPart.Entry entry = new ContentPart.Entry();
        entry.setDescription(contentInfo.cf);
        byte[] bArr = null;
        try {
            bArr = getFile(contentInfo.url);
            if (contentInfo.fPage >= 0 || contentInfo.lPage >= 0) {
                try {
                    bArr = GetNewPdf.getOnlyPages(bArr, contentInfo.fPage, contentInfo.lPage);
                } catch (Exception e) {
                    log.error(str);
                    log.error(contentInfo.url.toString());
                    log.error(e.getMessage(), e);
                }
            }
        } catch (IOException e2) {
            log.error(e2.getMessage(), e2);
        }
        if (bArr == null || bArr.length == 0) {
            return null;
        }
        entry.setData(bArr);
        contentPart.addFile(entry);
        return contentPart;
    }
}
