package pl.edu.icm.ceon.converters.springer.newSpringerJournalsPack;

import au.com.bytecode.opencsv.CSVReader;
import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.ceon.commons.CeonGeneralException;
import pl.edu.icm.ceon.converters.springer.SpringerParser;
import pl.edu.icm.ceon.converters.springer.journalsPackCreator.JournalsFromCsvCreator;
import pl.edu.icm.ceon.tools.textcat.LanguageIdentifierBean;
import pl.edu.icm.model.bwmeta.utils.IdGenerator;
import pl.edu.icm.model.bwmeta.y.AbstractNDA;
import pl.edu.icm.model.bwmeta.y.YContributor;
import pl.edu.icm.model.bwmeta.y.YCurrent;
import pl.edu.icm.model.bwmeta.y.YDescription;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.bwmeta.y.YExportable;
import pl.edu.icm.model.bwmeta.y.YId;
import pl.edu.icm.model.bwmeta.y.YLanguage;
import pl.edu.icm.model.bwmeta.y.YName;
import pl.edu.icm.model.bwmeta.y.YStructure;
import pl.edu.icm.model.general.MetadataTransformers;
import pl.edu.icm.model.transformers.MetadataWriter;
import pl.edu.icm.model.transformers.bwmeta.y.BwmetaTransformerConstants;

/* loaded from: input_file:pl/edu/icm/ceon/converters/springer/newSpringerJournalsPack/JournalsZipFromCsvCreator.class */
public class JournalsZipFromCsvCreator {
    private static final Logger log = LoggerFactory.getLogger(JournalsZipFromCsvCreator.class);
    static ThreadLocal<LanguageIdentifierBean> bean = new ThreadLocal<LanguageIdentifierBean>() { // from class: pl.edu.icm.ceon.converters.springer.newSpringerJournalsPack.JournalsZipFromCsvCreator.1
        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.lang.ThreadLocal
        public LanguageIdentifierBean initialValue() {
            try {
                return new LanguageIdentifierBean();
            } catch (IOException | CeonGeneralException e) {
                JournalsZipFromCsvCreator.log.error(e.getMessage(), e);
                return null;
            }
        }
    };
    static ThreadLocal<MetadataWriter<YExportable>> writer = new ThreadLocal<MetadataWriter<YExportable>>() { // from class: pl.edu.icm.ceon.converters.springer.newSpringerJournalsPack.JournalsZipFromCsvCreator.2
        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.lang.ThreadLocal
        public MetadataWriter<YExportable> initialValue() {
            return MetadataTransformers.BTF.getWriter(BwmetaTransformerConstants.Y, BwmetaTransformerConstants.BWMETA_2_2);
        }
    };
    static IdGenerator idGenerator = new IdGenerator();
    static String defaultResource = "pl/edu/icm/ceon/converters/springer/newSpringerJournalsPack/journals_list_oct18_2012.csv";

    /* loaded from: input_file:pl/edu/icm/ceon/converters/springer/newSpringerJournalsPack/JournalsZipFromCsvCreator$BadIdException.class */
    public static class BadIdException extends RuntimeException {
    }

    static List<YExportable> getYexportablesFromSingleEntry(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        try {
            AbstractNDA yElement = new YElement();
            int i = -1;
            if (strArr[3] != null && !strArr[3].trim().isEmpty() && !"n/a".equalsIgnoreCase(strArr[3].trim())) {
                yElement.addId(new YId("bwmeta1.id-class.ISSN", strArr[3]));
            }
            if (strArr[4] != null && !strArr[4].trim().isEmpty() && !"n/a".equalsIgnoreCase(strArr[4].trim())) {
                yElement.addId(new YId("bwmeta1.id-class.EISSN", strArr[4]));
            }
            if (strArr[0] != null && !strArr[0].trim().isEmpty()) {
                yElement.addId(new YId("bwmeta1.id-class.Springer", strArr[0]));
                try {
                    i = Integer.parseInt(strArr[0]);
                } catch (NumberFormatException e) {
                    log.error(e.getMessage(), e);
                    throw new BadIdException();
                }
            }
            YName type = new YName().setLanguage(YLanguage.Undetermined).setText(strArr[1]).setType("canonical");
            if (strArr[0] != null) {
                type.setSortKey(strArr[0]);
            }
            yElement.addName(type);
            YStructure yStructure = new YStructure();
            yStructure.setHierarchy("bwmeta1.hierarchy-class.hierarchy_Journal");
            YCurrent yCurrent = new YCurrent();
            yCurrent.setLevel("bwmeta1.level.hierarchy_Journal_Journal");
            yStructure.setCurrent(yCurrent);
            if (strArr[2] != null && !strArr[2].trim().isEmpty()) {
                YContributor yContributor = new YContributor();
                yContributor.setRole("publisher");
                yContributor.setInstitution(true);
                yContributor.addName(new YName(strArr[2]).setLanguage(YLanguage.Undetermined).setType("canonical"));
                yElement.addContributor(yContributor);
            }
            yElement.addStructure(yStructure);
            yElement.setId("bwmeta1.element." + (i >= 0 ? SpringerParser.IDSUFFIX_SPRINGER + idGenerator.generateIdSuffix(i) : idGenerator.generateIdSuffix(new AbstractNDA[]{null, yElement})));
            arrayList.add(yElement);
        } catch (BadIdException e2) {
        }
        return arrayList;
    }

    static String findCoverUrl_springer(String str) {
        String str2 = null;
        Matcher matcher = Pattern.compile("<img[^>]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>").matcher(StringUtils.substringBetween(str, "<img class=\"look-inside-cover\"", "</a>"));
        if (matcher.find()) {
            str2 = matcher.group(1);
            if (!str2.startsWith("http")) {
                str2 = "http://link.springer.com" + str2;
            }
        }
        log.info("cover: " + str2);
        return str2;
    }

    static String preparePageUrl_springer(YElement yElement) {
        String str = (String) yElement.getIds("bwmeta1.id-class.Springer").get(0);
        if (str == null) {
            log.info("No springerId for " + yElement.getId());
        }
        return "http://link.springer.com/journal/" + str;
    }

    static String parseAbstract_springer(String str) {
        Elements select = Jsoup.parse(str).select("div[class=abstract-content formatted]");
        String str2 = null;
        if (select.size() == 1) {
            str2 = prepareText(select.get(0));
            if (!StringUtils.isNotEmpty(str2)) {
                log.info("No abstract");
            }
        } else {
            log.info("No div!");
        }
        return str2;
    }

    private static byte[] fetchFileContent(String str) throws IOException {
        HttpClient httpClient = new HttpClient();
        GetMethod getMethod = new GetMethod(str);
        getMethod.getParams().setParameter("http.method.retry-handler", new DefaultHttpMethodRetryHandler(3, false));
        if (httpClient.executeMethod(getMethod) == 200) {
            return IOUtils.toByteArray(getMethod.getResponseBodyAsStream());
        }
        log.info("http failed url: " + str);
        throw new HttpException("Method failed: " + getMethod.getStatusLine());
    }

    private static String prepareText(Element element) {
        StringBuilder sb = new StringBuilder();
        Iterator it = element.children().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            if ("springerHTML".equals(element2.attr("class"))) {
                sb.append(element2.html());
                sb.append("<br />");
            } else if ("p".equals(element2.tagName())) {
                sb.append(element2.html());
                sb.append("<br />");
            }
        }
        return sb.toString();
    }

    static void getCover(String str, String str2, ZipOutputStream zipOutputStream) throws IOException {
        String findCoverUrl_springer = findCoverUrl_springer(str2);
        try {
            byte[] fetchFileContent = fetchFileContent(findCoverUrl_springer);
            String[] split = findCoverUrl_springer.split("/");
            String str3 = JournalSourceFromZip.COVER_PREFIX;
            String str4 = split[split.length - 1];
            if (str4.contains(".")) {
                String[] split2 = str4.split("\\.");
                str3 = str3 + "." + split2[split2.length - 1];
            }
            zipOutputStream.putNextEntry(new ZipEntry(str + "/" + str3));
            IOUtils.write(fetchFileContent, zipOutputStream);
        } catch (HttpException e) {
            log.info("Cover image not found for journal: " + str + " url: " + findCoverUrl_springer);
        }
    }

    static void processSingleEntry(String[] strArr, ZipOutputStream zipOutputStream, ArrayList<String> arrayList) throws IOException {
        List<YExportable> yexportablesFromSingleEntry = getYexportablesFromSingleEntry(strArr);
        if (yexportablesFromSingleEntry.isEmpty()) {
            return;
        }
        YElement yElement = yexportablesFromSingleEntry.get(0);
        String id = yElement.getId();
        if (arrayList.contains(id)) {
            log.error("Already has entry " + id + " duplicate");
            return;
        }
        arrayList.add(id);
        zipOutputStream.putNextEntry(new ZipEntry(id + "/"));
        String preparePageUrl_springer = preparePageUrl_springer(yElement);
        try {
            String parseAbstract_springer = parseAbstract_springer(new String(fetchFileContent(preparePageUrl_springer)));
            yElement.addDescription(new YDescription(YLanguage.byCode(bean.get().classify(parseAbstract_springer)), parseAbstract_springer, "abstract"));
        } catch (HttpException e) {
            log.info("Journal page not found: " + preparePageUrl_springer);
        }
        zipOutputStream.putNextEntry(new ZipEntry(id + "/" + JournalSourceFromZip.BWMETA_FILE_NAME));
        IOUtils.write(writer.get().write(yexportablesFromSingleEntry, new Object[0]), zipOutputStream);
    }

    public static void getListFromResource(String str, String str2) throws FileNotFoundException, IOException {
        InputStream resourceAsStream = ClassLoader.getSystemClassLoader().getResourceAsStream(str);
        if (resourceAsStream == null) {
            System.out.println("Not such resource");
        }
        ZipOutputStream zipOutputStream = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(str2)));
        CSVReader cSVReader = new CSVReader(new InputStreamReader(resourceAsStream), ',', '\"');
        ArrayList arrayList = new ArrayList();
        try {
            cSVReader.readNext();
            while (true) {
                String[] readNext = cSVReader.readNext();
                if (readNext == null) {
                    break;
                }
                try {
                    Integer.parseInt(readNext[0]);
                    processSingleEntry(readNext, zipOutputStream, arrayList);
                } catch (NumberFormatException e) {
                }
            }
        } catch (IOException e2) {
            java.util.logging.Logger.getLogger(JournalsFromCsvCreator.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        }
        zipOutputStream.close();
    }

    public static void main(String[] strArr) throws IOException {
        String str = defaultResource;
        String str2 = strArr[0];
        if (strArr.length >= 2) {
            str = strArr[1];
        }
        getListFromResource(str, str2);
    }
}
