Source Code of com.ontometrics.scraper.extraction.TextExtractorTest

package com.ontometrics.scraper.extraction;


import static com.ontometrics.scraper.HtmlSample.ProgramDetailPage;
import static com.ontometrics.scraper.extraction.HtmlExtractor.html;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;


import net.htmlparser.jericho.Source;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class TextExtractorTest {


  private static final Logger log = LoggerFactory.getLogger(TextExtractorTest.class);


  private String eligibilityClassName = "fld_applicant_eligibility";


  @Test
  public void getTextFromElement() {
    String text = new TextExtractor().source(
        html().url(ProgramDetailPage.getUrl()).ofClass(eligibilityClassName, 1)).getText();


    log.info("text from class: {} is: {}", eligibilityClassName, text);


  }


    @Test
    public void canGetSourceThenChainWithHtmlExtractor(){
        Source source = html().url(ProgramDetailPage.getUrl()).getSource();


        log.info("source: {}", source.toString());
        assertThat(source, notNullValue());


        String text = new TextExtractor().source(html().source(source).ofClass(eligibilityClassName, 1)).getText();


        log.info("text extracted from prefetched source: {}", text);


        assertThat(text, is("Applicant Eligibilty: 39-Anyone/general public/ 52-Libraries/lnformation/Statistics"));


    }


  @Test
  public void canGetAttributeOfElement() {
    String className = "lst-pp";
    Object attributeName = "onchange";
    String text = new TextExtractor()
        .source(html()
            .url(ProgramDetailPage.getUrl())
            .ofClass(eligibilityClassName, 1)
            .attribute(attributeName))
        .getText();


    log.info("attribute {} from class: {} is: {}", new Object[] { className, attributeName, text });


  }


}
Source Code of com.ontometrics.scraper.extraction.TextExtractorTest

Related Classes of com.ontometrics.scraper.extraction.TextExtractorTest