DefaultDownloaderController downloaderController = new DefaultDownloaderController();
// Setting up parser controller
DefaultParserController parserController = new DefaultParserController();
// Creating crawler configuration object
CrawlerConfiguration configuration = new CrawlerConfiguration();
// Creating five crawlers (to work with 5 threads)
for (int i = 0; i < 5; i++) {
// Creating crawler and setting downloader and parser controllers
DefaultCrawler crawler = new ExampleCrawler();
crawler.setDownloaderController(downloaderController);
crawler.setParserController(parserController);
// Adding crawler to the configuration object
configuration.addCrawler(crawler);
}
// Setting maximum parallel requests to a single site limit
configuration.setMaxParallelRequests(1);
// Setting http errors limits. If this limit violated for any
// site - crawler will stop this site processing
configuration.setMaxHttpErrors(HttpURLConnection.HTTP_CLIENT_TIMEOUT, 10);
configuration.setMaxHttpErrors(HttpURLConnection.HTTP_BAD_GATEWAY, 10);
// Setting period between two requests to a single site (in milliseconds)
configuration.setPolitenessPeriod(500);
// Initializing crawler controller
CrawlerController crawlerController = new CrawlerController(configuration);
// Adding crawler seed
crawlerController.addSeed(new URL("http://en.wikipedia.org/"));