// Set connection manager parameters
ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
connPerRouteBean.setDefaultMaxPerRoute(2);
cmpb.setConnectionsPerRoute(connPerRouteBean);
DroidsHttpClient httpclient = new DroidsHttpClient(params);
HttpProtocol httpProtocol = new HttpProtocol(httpclient);
protocolFactory.setMap(new HashMap<String, Object>());
protocolFactory.getMap().put("http", httpProtocol);
protocolFactory.getMap().put("https", httpProtocol);
// Create URL filter factory.
URLFiltersFactory filtersFactory = new URLFiltersFactory();
RegexURLFilter defaultURLFilter = new RegexURLFilter();
defaultURLFilter.setFile("classpath:/regex-urlfilter.txt");
filtersFactory.setMap(new HashMap<String, Object>());
filtersFactory.getMap().put("default", defaultURLFilter);
// Create handler factory. Provide sysout handler only.
HandlerFactory handlerFactory = new HandlerFactory();
SysoutHandler defaultHandler = new SysoutHandler();
handlerFactory.setMap(new HashMap<String, Object>());
handlerFactory.getMap().put("default", defaultHandler);
// Create droid factory. Leave it empty for now.
DroidFactory<Link> droidFactory = new DroidFactory<Link>();
droidFactory.setMap(new HashMap<String, Object>());
// Create default droid
SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
simpleDelayTimer.setDelayMillis(100);
Queue<Link> simpleQueue = new LinkedList<Link>();
SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
taskMaster.setDelayTimer( simpleDelayTimer );
taskMaster.setExceptionHandler( new DefaultTaskExceptionHandler() );
CrawlingDroid helloCrawler = new SysoutCrawlingDroid( simpleQueue, taskMaster );
helloCrawler.setFiltersFactory(filtersFactory);
helloCrawler.setParserFactory(parserFactory);
helloCrawler.setProtocolFactory(protocolFactory);
Collection<String> initialLocations = new ArrayList<String>();
initialLocations.add( targetURL );
helloCrawler.setInitialLocations(initialLocations);
// Initialize and start the crawler
helloCrawler.init();
helloCrawler.start();
// Await termination
helloCrawler.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
// Shut down the HTTP connection manager
httpclient.getConnectionManager().shutdown();
}