package pl.edu.icm.synat.process.common.harvesting.ieee.impl;

import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.OutputDocument;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.Tag;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.model.bwmeta.y.YCategoryRef;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.synat.process.common.harvesting.HarvestingResult;
import pl.edu.icm.synat.process.common.harvesting.MetadataHarvester;
import pl.edu.icm.synat.process.common.harvesting.SubjectDisciplineMapper;
import pl.edu.icm.synat.process.common.harvesting.TitleHistory;
import pl.edu.icm.synat.process.common.harvesting.TitleHistoryElement;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.content.PrepareContentUtils;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.yelement.PrepareYElementUtils;
import pl.edu.icm.synat.process.common.model.api.Document;

/* loaded from: input_file:pl/edu/icm/synat/process/common/harvesting/ieee/impl/IeeeMetadataHarvester.class */
public class IeeeMetadataHarvester implements MetadataHarvester {
    private static final int FIRST = 0;
    private static final boolean WITHOUT_BOOKS = true;
    private PrepareYElementUtils yelementUtil;
    private PrepareContentUtils contentUtil;
    private SubjectDisciplineMapper mapper = null;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private static final Logger LOGGER = LoggerFactory.getLogger(IeeeMetadataHarvester.class);
    private static final String[] wellFormatedTags = {"ul"};
    private static final String[] wellFormatedInnerTags = {"p"};
    private static List<String> tagsToRemove = Lists.newArrayList(new String[]{"a", "img"});

    public IeeeMetadataHarvester(SubjectDisciplineMapper subjectDisciplineMapper) {
        this.yelementUtil = null;
        this.contentUtil = null;
        this.yelementUtil = new PrepareYElementUtils();
        this.contentUtil = new PrepareContentUtils();
        setMapper(subjectDisciplineMapper);
    }

    public IeeeMetadataHarvester() {
        this.yelementUtil = null;
        this.contentUtil = null;
        this.yelementUtil = new PrepareYElementUtils();
        this.contentUtil = new PrepareContentUtils();
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public boolean isApplicable(Document document) {
        return this.yelementUtil.isApplicableFor(document, "ieee", true);
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCover(Document document) {
        return new HarvestingResult();
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCoverAndMetadata(Document document) {
        String preparePageUrlIeee;
        String fetchRemoteContent;
        YElement prepareYElement = this.yelementUtil.prepareYElement(document);
        if (FIRST == prepareYElement || FIRST == (fetchRemoteContent = this.contentUtil.fetchRemoteContent((preparePageUrlIeee = preparePageUrlIeee(prepareYElement))))) {
            return null;
        }
        HarvestingResult harvestingResult = new HarvestingResult();
        harvestingResult.setSiteUrl(preparePageUrlIeee);
        org.jsoup.nodes.Document parse = Jsoup.parse(fetchRemoteContent);
        harvestingResult.setDesciption(parseAbstractIeeeNew(parse));
        harvestingResult.setOtherTitles(parseTitleHistory(parse, prepareYElement.getId()));
        String prepareImpactFactor = prepareImpactFactor(parse);
        if (!StringUtils.isEmpty(prepareImpactFactor)) {
            harvestingResult.setImpactFactor(prepareImpactFactor);
        }
        List<YCategoryRef> prepareCategories = prepareCategories(parse);
        if (FIRST != prepareCategories) {
            harvestingResult.setCategories(prepareCategories);
        }
        return harvestingResult;
    }

    private TitleHistory parseTitleHistory(org.jsoup.nodes.Document document, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = document.select("#TitleHist  ~ ul li").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String ownText = element.ownText();
            Element first = element.getElementsByTag("a").first();
            if (first != null) {
                String text = first.text();
                String attr = first.attr("href");
                String str2 = FIRST;
                if (StringUtils.isNotBlank(attr)) {
                    str2 = parseTitleLink(attr);
                }
                String str3 = FIRST;
                if (StringUtils.isNotBlank(ownText)) {
                    str3 = parseTitleYears(ownText);
                }
                TitleHistoryElement titleHistoryElement = new TitleHistoryElement();
                titleHistoryElement.setDateRange(str3);
                titleHistoryElement.setId(str2);
                titleHistoryElement.setTitleName(text);
                arrayList.add(titleHistoryElement);
            }
        }
        return createTitleHistory(arrayList, str);
    }

    TitleHistory createTitleHistory(List<TitleHistoryElement> list, String str) {
        List<TitleHistoryElement> reverse = Lists.reverse(list);
        TitleHistory titleHistory = new TitleHistory();
        List<TitleHistoryElement> previousTitles = titleHistory.getPreviousTitles();
        for (TitleHistoryElement titleHistoryElement : reverse) {
            if (str.equals(titleHistoryElement.getId())) {
                previousTitles = titleHistory.getNextTitles();
            } else {
                previousTitles.add(titleHistoryElement);
            }
        }
        return titleHistory;
    }

    String parseTitleLink(String str) {
        Matcher matcher = Pattern.compile("punumber=(.*)$").matcher(str);
        if (!matcher.find()) {
            return null;
        }
        String group = matcher.group(WITHOUT_BOOKS);
        if (NumberUtils.isNumber(group)) {
            return String.format("bwmeta1.element.ieee-pub-%012d", Integer.valueOf(NumberUtils.toInt(group)));
        }
        return null;
    }

    String parseTitleYears(String str) {
        String trim = str.trim();
        if (trim.endsWith("-") && trim.length() > 2) {
            trim = trim.substring(FIRST, trim.length() - 2).trim();
        }
        return trim;
    }

    private String preparePageUrlIeee(YElement yElement) {
        List ids = yElement.getIds("bwmeta1.id-class.IEEE-AmsId");
        if (ids.isEmpty()) {
            LOGGER.info("No ieeeId for {}", yElement.getId());
        }
        if (ids.isEmpty() || FIRST == ids.get(FIRST)) {
            return null;
        }
        return "http://ieeexplore.ieee.org/xpl/aboutJournal.jsp?reload=true&punumber=" + ((String) ids.get(FIRST));
    }

    private String parseAbstractIeeeNew(org.jsoup.nodes.Document document) {
        Element first = document.select("#AimsScope").first();
        StringBuilder sb = new StringBuilder();
        if (first != null) {
            boolean z = FIRST;
            Element nextElementSibling = first.nextElementSibling();
            while (true) {
                Element element = nextElementSibling;
                if (element == null) {
                    break;
                }
                String trim = element.text().trim();
                if (trim.startsWith("Persistent Link")) {
                    break;
                }
                if (StringUtils.isNotBlank(trim)) {
                    if (z) {
                        sb.append("<br/>");
                    }
                    sb.append(ArrayUtils.contains(wellFormatedTags, element.tagName()) ? removeUnsupportedHtmlTags(element.outerHtml()) : ArrayUtils.contains(wellFormatedInnerTags, element.tagName()) ? removeUnsupportedHtmlTags(element.html()) : element.text());
                    z = WITHOUT_BOOKS;
                }
                nextElementSibling = element.nextElementSibling();
            }
        }
        String sb2 = sb.toString();
        if (sb2.length() > 0) {
            System.out.println(sb2);
        }
        if (sb2.length() > 0) {
            return sb2;
        }
        return null;
    }

    private String removeUnsupportedHtmlTags(String str) {
        Source source = new Source(str);
        source.fullSequentialParse();
        OutputDocument outputDocument = new OutputDocument(source);
        for (Tag tag : source.getAllTags()) {
            if (tagsToRemove.contains(tag.getName().toLowerCase())) {
                outputDocument.remove(tag);
            }
        }
        return outputDocument.toString();
    }

    private String prepareImpactFactor(org.jsoup.nodes.Document document) {
        Elements select;
        Elements select2;
        Element element;
        Elements select3 = document.select("div[class=jrnl-metrics cf]");
        if (FIRST == select3 || FIRST == (select = select3.select("a[class=metric bg-org]")) || FIRST == (select2 = select.select("span[class=num]")) || select2.isEmpty() || FIRST == (element = select2.get(FIRST))) {
            return null;
        }
        return element.text();
    }

    private Elements prepareCategoriesAsString(org.jsoup.nodes.Document document) {
        return document.select("h2:contains(Subjects) + ul > li");
    }

    private List<YCategoryRef> prepareCategories(org.jsoup.nodes.Document document) {
        Elements prepareCategoriesAsString = prepareCategoriesAsString(document);
        if (FIRST == prepareCategoriesAsString) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        Iterator it = prepareCategoriesAsString.subList(FIRST, prepareCategoriesAsString.size()).iterator();
        while (it.hasNext()) {
            arrayList.add(((Element) it.next()).text());
        }
        return this.mapper.convertSubjects(arrayList);
    }

    public void setMapper(SubjectDisciplineMapper subjectDisciplineMapper) {
        this.mapper = subjectDisciplineMapper;
    }
}
