Package org.htmlparser.visitors

Examples of org.htmlparser.visitors.TextExtractingVisitor


     */
    public String extractText(String htmlPath) {
        String result = "";
        try {
            Parser parser = new Parser (htmlPath);
            TextExtractingVisitor visitor = new TextExtractingVisitor ();
            parser.visitAllNodesWith (visitor);
            result = visitor.getExtractedText();

        } catch (ParserException e1) {
        }
        return result;
    }
View Full Code Here


    if(htmlCode == null){
      throw new NullPointerException("Input HTML code string is NULL");
    }
   
    Parser parser = Parser.createParser(htmlCode,"UTF-8");
    TextExtractingVisitor visitor = new TextExtractingVisitor();
    try {
      parser.visitAllNodesWith(visitor);
      return visitor.getExtractedText();
    } catch (ParserException e) {
      logger.debug("HTML parsing error: " + htmlCode, e);
    }
    return "";
  }
View Full Code Here

    }

    public void testSimpleVisit() throws Exception
    {
        createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World",
            visitor.getExtractedText());
    }
View Full Code Here

    public void testSimpleVisitWithRegisteredScanners() throws Exception
    {
        createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
        parser.registerScanners();
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World",
            visitor.getExtractedText());
    }
View Full Code Here

    }

    public void testVisitHtmlWithSpecialChars() throws Exception
    {
        createParser("<BODY>Hello World&nbsp;&nbsp;</BODY>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World  ",
            visitor.getExtractedText());
    }
View Full Code Here

    }

    public void testVisitHtmlWithPreTags() throws Exception
    {
        createParser("Some text with &nbsp;<pre>this &nbsp; should be preserved</pre>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Some text with  this &nbsp; should be preserved",
            visitor.getExtractedText());
    }
View Full Code Here

    super(name);
  }

  public void testSimpleVisit() throws Exception {
    createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
    TextExtractingVisitor visitor = new TextExtractingVisitor();
    parser.visitAllNodesWith(visitor);
    assertStringEquals("extracted text", "Hello World", visitor.getExtractedText());
  }
View Full Code Here

  }

  public void testSimpleVisitWithRegisteredScanners() throws Exception {
    createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
    parser.registerScanners();
    TextExtractingVisitor visitor = new TextExtractingVisitor();
    parser.visitAllNodesWith(visitor);
    assertStringEquals("extracted text", "Hello World", visitor.getExtractedText());
  }
View Full Code Here

    assertStringEquals("extracted text", "Hello World", visitor.getExtractedText());
  }

  public void testVisitHtmlWithSpecialChars() throws Exception {
    createParser("<BODY>Hello World&nbsp;&nbsp;</BODY>");
    TextExtractingVisitor visitor = new TextExtractingVisitor();
    parser.visitAllNodesWith(visitor);
    assertStringEquals("extracted text", "Hello World  ", visitor.getExtractedText());
  }
View Full Code Here

    assertStringEquals("extracted text", "Hello World  ", visitor.getExtractedText());
  }

  public void testVisitHtmlWithPreTags() throws Exception {
    createParser("Some text with &nbsp;<pre>this &nbsp; should be preserved</pre>");
    TextExtractingVisitor visitor = new TextExtractingVisitor();
    parser.visitAllNodesWith(visitor);
    assertStringEquals("extracted text", "Some text with  this &nbsp; should be preserved", visitor
        .getExtractedText());
  }
View Full Code Here

TOP

Related Classes of org.htmlparser.visitors.TextExtractingVisitor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.