Package org.htmlcleaner

Examples of org.htmlcleaner.CleanerProperties


      try {
        createHtmlCleanerIfNeeded();

        TagNode node = cleaner.clean(new ByteArrayInputStream(field.getBytes()));

        Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
        XPath xpa = XPathFactory.newInstance().newXPath();       
       
        NodeList res = (NodeList)xpa.evaluate(script, doc, XPathConstants.NODESET);

        if (0 == res.getLength()) { // No match, just return "", unlike regex we don't want anything if we don't match...
View Full Code Here


 
  private void createHtmlCleanerIfNeeded()
  {
    if (null == cleaner) {
      cleaner = new HtmlCleaner();
      CleanerProperties props = cleaner.getProperties();
      props.setAllowHtmlInsideAttributes(true);
      props.setAllowMultiWordAttributes(true);
      props.setRecognizeUnicodeChars(true);
      props.setOmitComments(true);
      props.setTreatUnknownTagsAsContent(false);
      props.setTranslateSpecialEntities(true);
      props.setTransResCharsToNCR(true);
      props.setNamespacesAware(false);
    }   
  }
View Full Code Here

  private void init() {
   
    // Initialize HTMLCleaner
    cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    props.setAllowHtmlInsideAttributes(true);
    props.setAllowMultiWordAttributes(true);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);
    props.setNamespacesAware(false);
   
    // Initialize DomSerializer
    domSerializer = new DomSerializer(props);
   
    // Initialize xml parser   
View Full Code Here

    }
  }

  private static String htmlCleaner( String content ) {

    CleanerProperties props = new CleanerProperties();
    props.setUseCdataForScriptAndStyle( false );
    props.setAllowHtmlInsideAttributes( false );
    props.setPruneTags( "style, script" );

    HtmlCleaner cleaner = new HtmlCleaner( props );
    try {
      TagNode node = cleaner.clean( new StringReader( content ) );
View Full Code Here

   
    return res;
  }
 
  public static void processXml(final ContentHandler handler, final String xml) throws IOException {
    final CleanerProperties props = new CleanerProperties();
    
    props.setTranslateSpecialEntities(true);
    props.setTransResCharsToNCR(true);
    props.setOmitComments(true);
    
    final TagNode tagNode = new HtmlCleaner(props).clean(xml);
    final String cleansData = new CompactXmlSerializer(props).getAsString(tagNode);
    
    final SAXParserFactory spf = SAXParserFactory.newInstance();
View Full Code Here

     *
     * @return The root Node of the resulting DOM
     */
    public static Node markupAsDOM(WicketTester tester) throws ParserConfigurationException
    {
        CleanerProperties props = new CleanerProperties();
        props.setNamespacesAware(false);
       
        HtmlCleaner cleaner = new HtmlCleaner(props);
        return new DomSerializer(props, true).createDOM(cleaner.clean(document(tester)));
    }
View Full Code Here

    private Logger log = LoggerFactory.getLogger(CleanHtmlFunction.class);

    public CleanHtmlFunction() {
        this.cleaner = new HtmlCleaner();
        CleanerProperties p = cleaner.getProperties();
        p.setOmitComments(true);
        p.setTranslateSpecialEntities(true);
        p.setTransResCharsToNCR(true);

        // remove all tags that contain uninteresting content
        p.setPruneTags("style,script,form,object,audio,video");
    }
View Full Code Here

    private Logger log = LoggerFactory.getLogger(CleanHtmlFunction.class);

    public CleanHtmlFunction() {
        this.cleaner = new HtmlCleaner();
        CleanerProperties p = cleaner.getProperties();
        p.setOmitComments(true);
        p.setTranslateSpecialEntities(true);
        p.setTransResCharsToNCR(true);

        // remove all tags that contain uninteresting content
        p.setPruneTags("style,script,form,object,audio,video");
    }
View Full Code Here

      throw new IOException(e);
    }

    // Try to convert html to xhtml
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties xhtmlProperties = cleaner.getProperties();
    TagNode xhtmlNode = cleaner.clean(html);
    if (xhtmlNode == null) {
      logger.warn("Error creating well-formed document from page {}", resource);
      return;
    }
View Full Code Here

TOP

Related Classes of org.htmlcleaner.CleanerProperties

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.