*
* @throws IOException
* when the output folder cannot be created or emptied.
*/
public static void main(String[] args) throws IOException {
CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
builder.crawlRules().insertRandomDataInInputForms(false);
// click these elements
builder.crawlRules().clickDefaultElements();
builder.crawlRules().click("div").withAttribute("class", "clickable");
// but don't click these
builder.crawlRules().dontClick("a").withAttribute("class", "ignore");
builder.crawlRules().dontClick("a").underXPath("//DIV[@id='footer']");
// Set timeouts
builder.crawlRules().waitAfterReloadUrl(WAIT_TIME_AFTER_RELOAD, TimeUnit.MILLISECONDS);
builder.crawlRules().waitAfterEvent(WAIT_TIME_AFTER_EVENT, TimeUnit.MILLISECONDS);
// Add a condition that this XPath doesn't exits
builder.crawlRules().addCrawlCondition("No spans with foo as class",
new NotXPathCondition("//*[@class='foo']"));
// Set some input for fields
builder.crawlRules().setInputSpec(getInputSpecification());
// This will generate a nice output in the output directory.
File outFolder = new File("output");
if (outFolder.exists()) {
FileUtils.deleteDirectory(outFolder);
}
builder.setOutputDirectory(outFolder);
builder.addPlugin(new CrawlOverview());
// We want to use two browsers simultaneously.
builder.setBrowserConfig(new BrowserConfiguration(BrowserType.FIREFOX, 2));
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}