package pl.edu.icm.coansys.coansys.io.blog.crawler;

import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.url.WebURL;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:pl/edu/icm/coansys/coansys/io/blog/crawler/BlogCrawler.class */
public class BlogCrawler extends WebCrawler {
    BlogCrawlConfig config;
    private static final Logger LOGGER = LoggerFactory.getLogger(BlogCrawler.class);
    private static final Pattern FILTERS = Pattern.compile(".*(\\.(css|js|gif|jpg|png|mp3|mp3|zip|gz|jpeg))$");

    public void onStart() {
        super.onStart();
        this.config = (BlogCrawlConfig) this.myController.getCustomData();
    }

    public boolean shouldVisit(Page page, WebURL webURL) {
        try {
            String lowerCase = webURL.getURL().toLowerCase();
            URL url = new URL(lowerCase);
            if (FILTERS.matcher(lowerCase).matches() || url.getHost() == null) {
                return false;
            }
            for (SiteDataProcessor siteDataProcessor : this.config.processors) {
                if (url.getHost().equalsIgnoreCase(siteDataProcessor.domain)) {
                    return true;
                }
            }
            return false;
        } catch (MalformedURLException e) {
            LOGGER.error(e.getMessage(), e);
            return false;
        }
    }

    public void visit(Page page) {
        for (SiteDataProcessor siteDataProcessor : this.config.processors) {
            if (siteDataProcessor.isSiteEntry(page.getWebURL())) {
                System.out.println("Processing url:" + page.getWebURL().getURL());
                try {
                    this.config.outputter.outputBlog(page.getWebURL(), siteDataProcessor.processPageFromSite(page));
                } catch (Exception e) {
                    LOGGER.error(e.getMessage(), e);
                }
            }
        }
    }
}
