Package net.htmlparser.jericho

Examples of net.htmlparser.jericho.Renderer$AlternateTextElementHandler


   * @param html
   * @return
   */
  public static String html2text(String html) {

    Renderer renderer = new Renderer(new Source(html)) {

      {
        setIncludeHyperlinkURLs(true);
      }

      @Override
      public String renderHyperlinkURL(StartTag startTag) {
        final String href = startTag.getAttributeValue("href");
        if (href == null || href.equals("#") || href.startsWith("javascript:")) {
          return null;
        }
        // TODO customize?
        return '<' + href + '>';
      }

    };

    String text = renderer.toString();

    String newLine = renderer.getNewLine();

    // strip leading new lines
    while (text != (text = StringUtils.stripLeading(text, newLine))) {
    }

View Full Code Here


    }
    return output.toString();
  }

  private String cleanHTML(String input) {
    Renderer renderer=new Source(input).getRenderer();

    // We don't want it to be wrapped
    renderer.setMaxLineLength(Integer.MAX_VALUE);
    renderer.setIncludeHyperlinkURLs(false);
   
    return renderer.toString();
  }
View Full Code Here

     * @return
     */
    public static String htmlToText(String html) {
        Source htmlSource = new Source(html);
        Segment htmlSeg = new Segment(htmlSource, 0, htmlSource.length());
        Renderer htmlRend = new Renderer(htmlSeg);
        return htmlRend.toString();
    }
View Full Code Here

    {
        final Source source = new Source ( html );
        final Tag[] tags = source.fullSequentialParse ();
        if ( tags.length > 0 )
        {
            final Renderer renderer = source.getRenderer ();
            renderer.setIncludeHyperlinkURLs ( false );
            renderer.setIncludeAlternateText ( false );
            renderer.setDecorateFontStyles ( false );
            renderer.setMaxLineLength ( Integer.MAX_VALUE );
            renderer.setBlockIndentSize ( 4 );
            renderer.setConvertNonBreakingSpaces ( false );
            renderer.setNewLine ( lineSeparator );
            return renderer.toString ();
        }
        else
        {
            return html;
        }
View Full Code Here

    }
   
    // Extract text from the source, nicely formatted with whitespace and
    // newlines where appropriate.
    private String renderHTMLAsPlainText(Source source) {
        Renderer renderer = source.getRenderer();
        renderer.setNewLine("\n");
        renderer.setIncludeHyperlinkURLs(false);
        renderer.setDecorateFontStyles(false);
        renderer.setIncludeAlternateText(false);
        return renderer.toString();
    }
View Full Code Here

TOP

Related Classes of net.htmlparser.jericho.Renderer$AlternateTextElementHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.