Package org.htmlcleaner

Examples of org.htmlcleaner.TagNode


    props.setAllowHtmlInsideAttributes( false );
    props.setPruneTags( "style, script" );

    HtmlCleaner cleaner = new HtmlCleaner( props );
    try {
      TagNode node = cleaner.clean( new StringReader( content ) );

      TagNode[] nodes = node.getElementsByName( "a", true );
      for (TagNode tagnode : nodes) {
        tagnode.removeAttribute( "target" );
        tagnode.addAttribute( "target", "_blank" );
      }
      nodes = node.getElementsByName( "img", true );
      for (TagNode tagnode : nodes) {
        tagnode.removeAttribute( "src" );
        tagnode.addAttribute( "src", "NO_IMAGE" );
      }
      System.out.println( node.getText() );

      return cleaner.getInnerHtml( node );

    } catch (IOException e) {
View Full Code Here


    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
    String result = "";

    try {
      TagNode rootNode = cleaner.clean( new StringReader( messageText ) );

      TagNode[] nodes = rootNode.getElementsByName( "a", true );
      if ( nodes != null && nodes.length > 0 ) {
        for (TagNode tagnode : nodes) {
          tagnode.removeAttribute( "target" );
          tagnode.addAttribute( "target", "_blank" );
        }
      }

      nodes = rootNode.getElementsByName( "img", true );
      if ( nodes != null && nodes.length > 0 ) {
        hasImages[0] = true;
        if ( !loadImages ) {
          for (TagNode tagnode : nodes) {
            tagnode.removeAttribute( "src" );
View Full Code Here

  public static String convertHtml2PlainText( String htmlText ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );

    try {
      TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
      return rootNode.getText().toString();
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }
View Full Code Here

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
    String result = "";

    try {
      TagNode rootNode = cleaner.clean( new StringReader( messageText ) );

      TagNode[] nodes = rootNode.getElementsByName( "a", true );
      if ( nodes != null && nodes.length > 0 ) {
        for (TagNode tagnode : nodes) {
          tagnode.removeAttribute( "target" );
          tagnode.addAttribute( "target", "_blank" );
        }
      }

      nodes = rootNode.getElementsByName( "img", true );
      if ( nodes != null && nodes.length > 0 ) {
        hasImages[0] = true;
        if ( !loadImages ) {
          for (TagNode tagnode : nodes) {
            tagnode.removeAttribute( "src" );
View Full Code Here

  public static String convertHtml2PlainText( String htmlText ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );

    try {
      TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
      return rootNode.getText().toString();
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }
View Full Code Here

 
  /* (non-Javadoc)
   * @see org.apache.wookie.util.html.IHtmlProcessor#injectScript(java.lang.String)
   */
  public void injectScript(String script) {
    TagNode js = new TagNode(SCRIPT_TAG);
    js.addAttribute(TYPE_ATTRIBUTE, TYPE_ATTRIBUTE_VALUE);
    js.addAttribute(SRC_ATTRIBUTE, script);
    headNode.addChild(js);
  }
View Full Code Here

  /* (non-Javadoc)
   * @see org.apache.wookie.util.html.IHtmlProcessor#injectStylesheet(java.lang.String)
   */
  public void injectStylesheet(String stylesheet) {
    TagNode js = new TagNode(LINK_TAG);
    js.addAttribute(TYPE_ATTRIBUTE, CSS_TYPE_ATTRIBUTE_VALUE);
    js.addAttribute(REL_ATTRIBUTE, CSS_REL_ATTRIBUTE_VALUE);
    js.addAttribute(HREF_ATTRIBUTE, stylesheet);
    headNode.addChild(js);
  }
View Full Code Here

   
    //
    // Check if the page already has a META http-equiv=content-type tag,
    // if it doesn't create one and add it to the head node
    //
    TagNode meta = headNode.findElementByAttValue("http-equiv", "content-type", true, false);
    if (meta == null) {
      meta = new TagNode(META_TAG);
      meta.addAttribute("http-equiv", "Content-Type");
      headNode.getChildren().add(0, meta);
    }
    //
    // Force UTF into lowercase
    //
    if (charset.equals("UTF-8")) charset = "utf-8";
   
    //
    // Override the charset and content-type values for the
    // META http-equiv=content-type tag
    //
    meta.addAttribute("content", type + ";charset=" + charset);
  }
View Full Code Here

      //
      // Check for charset overrides in the HTML start page
      //
      HtmlCleaner cleaner = new HtmlCleaner();
      TagNode httpEquivNode = cleaner.clean(get.getResponseBodyAsStream()).findElementByAttValue("http-equiv", "content-type", true, false);
      if (httpEquivNode != null && httpEquivNode.hasAttribute("content")){
        String value = httpEquivNode.getAttributeByName("content");
        int offset = value.indexOf("charset=");
        if (offset >= -1){
            charset = value.substring(offset+8).toUpperCase();
        }
      }
View Full Code Here

    
    props.setTranslateSpecialEntities(true);
    props.setTransResCharsToNCR(true);
    props.setOmitComments(true);
    
    final TagNode tagNode = new HtmlCleaner(props).clean(xml);
    final String cleansData = new CompactXmlSerializer(props).getAsString(tagNode);
    
    final SAXParserFactory spf = SAXParserFactory.newInstance();
      spf.setNamespaceAware(true);
     
View Full Code Here

TOP

Related Classes of org.htmlcleaner.TagNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.