Package org.htmlparser.lexer

Examples of org.htmlparser.lexer.Page


    return getMatchingTags(filter).size();
  }

  private NodeList getMatchingTags(NodeFilter filter) throws Exception {
    String html = examiner.html();
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList list = parser.parse(null);
    NodeList matches = list.extractAllNodesThatMatch(filter, true);
    return matches;
  }
View Full Code Here


    return createSlimTables(tableScanner);
  }

  private NodeList makeNodeList(TestPage pageToTest) {
    String html = pageToTest.getHtml();
    Parser parser = new Parser(new Lexer(new Page(html)));
    try {
      return parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }
View Full Code Here

    if (page == null || page.equals(""))
      page = "<i>This page intentionally left blank.</i>";

    NodeList htmlTree;
    try {
      Parser parser = new Parser(new Lexer(new Page(page)));
      htmlTree = parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }
    scanForTables(htmlTree);
View Full Code Here

public class HtmlParserToolsTest {

  @Test
  public void shoudlMakeExactCopy() throws ParserException, CloneNotSupportedException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);

    NodeList cloneTree = deepClone(tree);

    assertEquals(html, cloneTree.toHtml());
View Full Code Here

  }

  @Test
  public void shouldAlsoCloneAttributes() throws ParserException, CloneNotSupportedException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);

    NodeList cloneTree = deepClone(tree);

    assertSame(Div.class, cloneTree.elementAt(0).getClass());
View Full Code Here

  }

  @Test
  public void flatCloneShouldJustGiveACopyOfANode() throws ParserException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);

    Node copy = flatClone(tree.elementAt(0));

    assertNull(copy.getParent());
View Full Code Here

    // jspExec is null for attribute rewrite tests.
    context.setJspExec(jspExec);
   
    // and finally, parse, using the special lexer that knows how to
    // handle javascript blocks containing unescaped HTML entities:
    Page lexPage = new Page(bais,charSet);
    Lexer lexer = new Lexer(lexPage);
    Lexer.STRICT_REMARKS = false;
      ContextAwareLexer lex = new ContextAwareLexer(lexer, context);

      Node node;
View Full Code Here

          "<a href=\"http://example.com/api?a=1&amp;b=2&c=3&#34;\">anchor</a>" +
          "</body>" +
          "</html>";
      byte[] bytes = html.getBytes();
      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
      Page page = new Page(bais, "UTF-8");
      Lexer lexer = new Lexer(page);
      Node node;
      while ((node = lexer.nextNode()) != null) {
        if (node instanceof Tag) {
          Tag tag = (Tag)node;
View Full Code Here

    public void testHtmlParser_CDATA() throws Exception {
      final String html = "<![CDATA[aaaa\nbbbb]]>";

      byte[] bytes = html.getBytes();
      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
      Page page = new Page(bais, "UTF-8");
      Lexer lexer = new Lexer(page);
      Node node;

      node = lexer.nextNode();
      // HTMLParser returns CDATA section as TagNode
View Full Code Here

    context.setJspExec(jspExec);


    // and finally, parse, using the special lexer that knows how to
    // handle javascript blocks containing unescaped HTML entities:
    Page lexPage = new Page(decodedResource,charSet);
    Lexer lexer = new Lexer(lexPage);
    Lexer.STRICT_REMARKS = false;
    ContextAwareLexer lex = new ContextAwareLexer(lexer, context);
    Node node;
    try {
View Full Code Here

TOP

Related Classes of org.htmlparser.lexer.Page

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.