Examples of org.htmlparser.lexer.Page

org.htmlparser.lexer.Page
Represents the contents of an HTML page. Contains the source of characters and an index of positions of line separators (actually the first character position on the next line).

    return getMatchingTags(filter).size();
  }


  private NodeList getMatchingTags(NodeFilter filter) throws Exception {
    String html = examiner.html();
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList list = parser.parse(null);
    NodeList matches = list.extractAllNodesThatMatch(filter, true);
    return matches;
  }

View Full Code Here

    return createSlimTables(tableScanner);
  }


  private NodeList makeNodeList(TestPage pageToTest) {
    String html = pageToTest.getHtml();
    Parser parser = new Parser(new Lexer(new Page(html)));
    try {
      return parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }

View Full Code Here

    if (page == null || page.equals(""))
      page = "<i>This page intentionally left blank.</i>";


    NodeList htmlTree;
    try {
      Parser parser = new Parser(new Lexer(new Page(page)));
      htmlTree = parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }
    scanForTables(htmlTree);

View Full Code Here

public class HtmlParserToolsTest {


  @Test
  public void shoudlMakeExactCopy() throws ParserException, CloneNotSupportedException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);


    NodeList cloneTree = deepClone(tree);


    assertEquals(html, cloneTree.toHtml());

View Full Code Here

  }


  @Test
  public void shouldAlsoCloneAttributes() throws ParserException, CloneNotSupportedException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);


    NodeList cloneTree = deepClone(tree);


    assertSame(Div.class, cloneTree.elementAt(0).getClass());

View Full Code Here

  }


  @Test
  public void flatCloneShouldJustGiveACopyOfANode() throws ParserException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);


    Node copy = flatClone(tree.elementAt(0));


    assertNull(copy.getParent());

View Full Code Here

    // jspExec is null for attribute rewrite tests.
    context.setJspExec(jspExec);
    
    // and finally, parse, using the special lexer that knows how to
    // handle javascript blocks containing unescaped HTML entities:
    Page lexPage = new Page(bais,charSet);
    Lexer lexer = new Lexer(lexPage);
    Lexer.STRICT_REMARKS = false;
      ContextAwareLexer lex = new ContextAwareLexer(lexer, context);


      Node node;

View Full Code Here

          "<a href=\"http://example.com/api?a=1&amp;b=2&c=3&#34;\">anchor</a>" +
          "</body>" +
          "</html>";
      byte[] bytes = html.getBytes();
      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
      Page page = new Page(bais, "UTF-8");
      Lexer lexer = new Lexer(page);
      Node node;
      while ((node = lexer.nextNode()) != null) {
        if (node instanceof Tag) {
          Tag tag = (Tag)node;

View Full Code Here

    public void testHtmlParser_CDATA() throws Exception {
      final String html = "<![CDATA[aaaa\nbbbb]]>";


      byte[] bytes = html.getBytes();
      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
      Page page = new Page(bais, "UTF-8");
      Lexer lexer = new Lexer(page);
      Node node;


      node = lexer.nextNode();
      // HTMLParser returns CDATA section as TagNode

View Full Code Here

    context.setJspExec(jspExec);




    // and finally, parse, using the special lexer that knows how to
    // handle javascript blocks containing unescaped HTML entities:
    Page lexPage = new Page(decodedResource,charSet);
    Lexer lexer = new Lexer(lexPage);
    Lexer.STRICT_REMARKS = false;
    ContextAwareLexer lex = new ContextAwareLexer(lexer, context);
    Node node;
    try {

View Full Code Here

0 1 2 3

TOP

Related Classes of org.htmlparser.lexer.Page

com.google.gdt.eclipse.designer.util.Utils

fitnesse.fixtures.PageDriver

fitnesse.testsystems.slim.HtmlSlimTestSystem

fitnesse.testsystems.slim.HtmlTableScanner

fitnesse.util.HtmlParserToolsTest

net.sf.regain.crawler.preparator.HtmlPreparator

org.apache.isis.viewer.scimpi.dispatcher.processor.HtmlFileParser

org.archive.wayback.archivalurl.ArchivalUrlSAXRewriteReplayRenderer

org.archive.wayback.archivalurl.FastArchivalUrlReplayParseEventHandlerTest

org.archive.wayback.resourcestore.indexer.HTTPRecordAnnotater

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.