package pl.edu.icm.coansys.coansys.io.blog.crawler;

import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import pl.edu.icm.coansys.coansys.io.blog.crawler.outputters.DiskTestOutputter;
import pl.edu.icm.coansys.coansys.io.blog.crawler.sitesCheckers.SciamBlogsProcessor;

/* loaded from: input_file:pl/edu/icm/coansys/coansys/io/blog/crawler/DiskBlogDownloader.class */
public class DiskBlogDownloader {
    static SiteDataProcessor[] processors = {new SciamBlogsProcessor()};

    public static void main(String[] strArr) throws Exception {
        CrawlConfig crawlConfig = new CrawlConfig();
        crawlConfig.setCrawlStorageFolder("/tmp/crawl");
        crawlConfig.setResumableCrawling(true);
        PageFetcher pageFetcher = new PageFetcher(crawlConfig);
        CrawlController crawlController = new CrawlController(crawlConfig, pageFetcher, new RobotstxtServer(new RobotstxtConfig(), pageFetcher));
        BlogCrawlConfig blogCrawlConfig = new BlogCrawlConfig();
        blogCrawlConfig.processors = processors;
        blogCrawlConfig.outputter = new DiskTestOutputter();
        crawlController.setCustomData(blogCrawlConfig);
        for (SiteDataProcessor siteDataProcessor : processors) {
            crawlController.addSeed("http://" + siteDataProcessor.domain);
        }
        crawlController.start(BlogCrawler.class, 1);
    }
}
