Package com.crawljax.core.configuration

Examples of com.crawljax.core.configuration.CrawljaxConfiguration$CrawljaxConfigurationBuilder


    CrawlSpecification spec = new CrawlSpecification(Utils.URL);
    spec.setMaximumStates(5);
    spec.clickDefaultElements();

    CrawljaxConfiguration config = new CrawljaxConfiguration();
    config.setBrowser(BrowserType.firefox);
    config.setCrawlSpecification(spec);

    SaveCrawlSession saveCrawlSessionPlugin = new SaveCrawlSession(Utils.SESSION_XML);
    saveCrawlSessionPlugin.setOutputFolder(Utils.OUTPUTFOLDER);
    config.addPlugin(saveCrawlSessionPlugin);

    try {
      CrawljaxController controller = new CrawljaxController(config);
      controller.run();
    } catch (ConfigurationException e) {
View Full Code Here


  private MetricRegistry registry;

  @Before
  public void setup() {
    registry = new MetricRegistry();
    CrawljaxConfiguration config = CrawljaxConfiguration.builderFor("http://localhost")
            .addPlugin(domChange, browserCreatedPlugin,
                    fireEventFailedPlugin, invariantViolationPlugin, newStatePlugin,
                    onRevisitStatePlugin,
                    urlLoadPlugin, postCrawlingPlugin, prestatePlugin).build();
    plugins = new Plugins(config, registry);
View Full Code Here

    when(formHandlerFactory.newFormHandler(browser)).thenReturn(formHandler);
    url = new URL("http://example.com");
    when(browser.getCurrentUrl()).thenReturn(url.toExternalForm());
    when(sessionProvider.get()).thenReturn(session);

    CrawljaxConfiguration config = Mockito.spy(CrawljaxConfiguration.builderFor(url).build());
    stateComparator = new StateComparator(config.getCrawlRules());

    when(extractor.extract(target)).thenReturn(ImmutableList.of(action));
    when(graphProvider.get()).thenReturn(graph);

    context =
View Full Code Here

  public void testIframeExclusions() throws CrawljaxException {
    CrawljaxConfigurationBuilder builder = setupConfig();
    builder.crawlRules().dontCrawlFrame("frame1");
    builder.crawlRules().dontCrawlFrame("sub");
    builder.crawlRules().dontCrawlFrame("frame0");
    CrawljaxConfiguration config = builder.build();
    crawljax = new CrawljaxRunner(config);
    CrawlSession session = crawljax.call();
    assertThat(session.getStateFlowGraph(), hasEdges(3));
    assertThat(session.getStateFlowGraph(), hasStates(4));
  }
View Full Code Here

  public void testExtract() throws InterruptedException, CrawljaxException {
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
    builder.crawlRules().click("a");
    builder.crawlRules().clickOnce(true);
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
View Full Code Here

    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
    builder.crawlRules().click("a");
    builder.crawlRules().dontClick("div").withAttribute("id", "menubar");
    builder.crawlRules().clickOnce(true);
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());

    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
View Full Code Here

    server.before();
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration
                    .builderFor(server.getSiteUrl().toExternalForm() + "iframe/");
    builder.crawlRules().click("a");
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(new URL(server.getSiteUrl().toExternalForm() + "iframe/"));
    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
View Full Code Here

   * Make sure On new State Plugin executed.
   */
  @Test
  public void testOnNewStatePlugin() {
    hit = false;
    CrawljaxConfiguration config = CrawljaxConfiguration.builderFor(
            "http://localhost").addPlugin(new OnNewStatePlugin() {

      @Override
      public void onNewState(CrawlerContext context, StateVertex state) {
        hit = true;
View Full Code Here

  public static void main(String[] args) {

    try {
      // configure the crawling engine
      CrawljaxConfiguration config = getConfig();

      // add your plugin
      config.addPlugin(new SamplePlugin());

      // initilize and run Crawljax
      CrawljaxController crawljax = new CrawljaxController(config);
      crawljax.run();
View Full Code Here

  /**
   * Configure Crawljax to crawl Google.
   */
  private static CrawljaxConfiguration getConfig() {
    CrawljaxConfiguration config = new CrawljaxConfiguration();
    config.setBrowser(BrowserType.firefox);

    CrawlSpecification crawler = new CrawlSpecification("http://www.google.com");
    crawler.setWaitTimeAfterEvent(500);
    crawler.setWaitTimeAfterReloadUrl(500);

    // click on all anchor tags
    crawler.click("a");
    // and all input tags with "submit"
    crawler.click("input").withAttribute("type", "submit");

    // exclude these
    crawler.dontClick("a").underXPath("//DIV[@id='guser']");
    crawler.dontClick("a").withText("Language Tools");

    InputSpecification inputSpec = new InputSpecification();
    inputSpec.field("q").setValue("Crawljax");
    crawler.setInputSpecification(inputSpec);

    // Constrain the crawl to Google (no other web sites)
    crawler.addCrawlCondition("Only crawl Google", new UrlCondition("google"));

    // limit the crawling scope
    crawler.setMaximumStates(6);
    crawler.setDepth(2);

    config.setCrawlSpecification(crawler);

    return config;
  }
View Full Code Here

TOP

Related Classes of com.crawljax.core.configuration.CrawljaxConfiguration$CrawljaxConfigurationBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.