Merge pull request #8125 from eugenp/revert-8119-BAEL-3275-2

Revert "BAEL-3275: Using blocking queue for pub-sub"
This commit is contained in:
Eric Martin
2019-10-31 20:43:47 -05:00
committed by GitHub
parent db85c8f275
commit 3225470df5
20543 changed files with 1642750 additions and 0 deletions
@@ -0,0 +1,36 @@
package com.baeldung.crawler4j;
import java.io.File;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
public class ImageCrawlerController {
public static void main(String[] args) throws Exception {
File crawlStorage = new File("src/test/resources/crawler4j");
CrawlConfig config = new CrawlConfig();
config.setCrawlStorageFolder(crawlStorage.getAbsolutePath());
config.setIncludeBinaryContentInCrawling(true);
config.setMaxPagesToFetch(500);
File saveDir = new File("src/test/resources/crawler4j");
int numCrawlers = 12;
PageFetcher pageFetcher = new PageFetcher(config);
RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
controller.addSeed("https://www.baeldung.com/");
CrawlController.WebCrawlerFactory<ImageCrawler> factory = () -> new ImageCrawler(saveDir);
controller.start(factory, numCrawlers);
}
}