Package com.crawljax.core

Source Code of com.crawljax.core.CandidateElementExtractorTest

package com.crawljax.core;

import static org.hamcrest.collection.IsCollectionWithSize.hasSize;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;

import java.io.IOException;
import java.net.URL;
import java.util.List;

import org.junit.After;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.runners.MockitoJUnitRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.crawljax.browser.EmbeddedBrowser;
import com.crawljax.browser.WebDriverBrowserBuilder;
import com.crawljax.condition.ConditionTypeChecker;
import com.crawljax.condition.crawlcondition.CrawlCondition;
import com.crawljax.condition.eventablecondition.EventableConditionChecker;
import com.crawljax.core.configuration.CrawljaxConfiguration;
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
import com.crawljax.core.plugin.Plugins;
import com.crawljax.core.state.StateMachine;
import com.crawljax.core.state.StateVertex;
import com.crawljax.forms.FormHandler;
import com.crawljax.test.BrowserTest;
import com.crawljax.test.RunWithWebServer;
import com.google.common.io.Resources;

@Category(BrowserTest.class)
@RunWith(MockitoJUnitRunner.class)
public class CandidateElementExtractorTest {

  private static final Logger LOG = LoggerFactory
          .getLogger(CandidateElementExtractorTest.class);
  private static final StateVertex DUMMY_STATE = StateMachine.createIndex("http://localhost",
          "", "");

  @Mock
  private Plugins plugins;

  @ClassRule
  public static final RunWithWebServer DEMO_SITE_SERVER = new RunWithWebServer("/demo-site");
  private EmbeddedBrowser browser;

  @After
  public void tearDown() {
    if (browser != null) {
      browser.closeOtherWindows();
      browser.close();
    }
  }

  @Test
  public void testExtract() throws InterruptedException, CrawljaxException {
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
    builder.crawlRules().click("a");
    builder.crawlRules().clickOnce(true);
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

    assertNotNull(candidates);
    assertEquals(15, candidates.size());

  }

  private CandidateElementExtractor newElementExtractor(CrawljaxConfiguration config) {
    browser = new WebDriverBrowserBuilder(config, plugins).get();
    FormHandler formHandler = new FormHandler(browser, config.getCrawlRules());

    EventableConditionChecker eventableConditionChecker =
            new EventableConditionChecker(config.getCrawlRules());
    ConditionTypeChecker<CrawlCondition> crawlConditionChecker =
            new ConditionTypeChecker<>(config.getCrawlRules().getPreCrawlConfig()
                    .getCrawlConditions());
    ExtractorManager checker =
            new CandidateElementManager(eventableConditionChecker, crawlConditionChecker);
    CandidateElementExtractor extractor =
            new CandidateElementExtractor(checker, browser, formHandler, config);

    return extractor;
  }

  @Test
  public void testExtractExclude() throws Exception {
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
    builder.crawlRules().click("a");
    builder.crawlRules().dontClick("div").withAttribute("id", "menubar");
    builder.crawlRules().clickOnce(true);
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());

    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

    assertNotNull(candidates);
    assertThat(candidates, hasSize(11));

  }

  @Test
  public void testExtractIframeContents() throws Exception {
    RunWithWebServer server = new RunWithWebServer("/site");
    server.before();
    CrawljaxConfigurationBuilder builder = CrawljaxConfiguration
            .builderFor(server.getSiteUrl().toExternalForm() + "iframe/");
    builder.crawlRules().click("a");
    CrawljaxConfiguration config = builder.build();

    CandidateElementExtractor extractor = newElementExtractor(config);
    browser.goToUrl(new URL(server.getSiteUrl().toExternalForm() + "iframe/"));
    List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

    for (CandidateElement e : candidates) {
      LOG.debug("candidate: " + e.getUniqueString());
    }

    server.after();

    assertNotNull(extractor);
    assertNotNull(candidates);
    assertThat(candidates, hasSize(9));

  }

  @Test
  public void whenNoFollowExternalUrlDoNotFollow() throws IOException {
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor("http://example.com");
    builder.crawlRules().click("a");
    CrawljaxConfiguration config = builder.build();
    CandidateElementExtractor extractor = newElementExtractor(config);

    List<CandidateElement> extract = extractFromTestFile(extractor);

    assertThat(config.getCrawlRules().followExternalLinks(), is(false));
    assertThat(extract, hasSize(2));
  }

  @Test
  public void whenFollowExternalUrlDoFollow() throws IOException {
    CrawljaxConfigurationBuilder builder =
            CrawljaxConfiguration.builderFor("http://example.com");
    builder.crawlRules().click("a");
    builder.crawlRules().followExternalLinks(true);
    CrawljaxConfiguration config = builder.build();
    CandidateElementExtractor extractor = newElementExtractor(config);

    List<CandidateElement> extract = extractFromTestFile(extractor);

    assertThat(config.getCrawlRules().followExternalLinks(), is(true));
    assertThat(extract, hasSize(3));
  }

  private List<CandidateElement> extractFromTestFile(CandidateElementExtractor extractor) {
    StateVertex currentState = Mockito.mock(StateVertex.class);
    String file = "/candidateElementExtractorTest/domWithOneExternalAndTwoInternal.html";
    URL dom = Resources.getResource(getClass(), file);
    browser.goToUrl(dom);
    List<CandidateElement> extract = extractor.extract(currentState);
    return extract;
  }

}
TOP

Related Classes of com.crawljax.core.CandidateElementExtractorTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.