Package org.htmlcleaner

Examples of org.htmlcleaner.CleanerProperties


        // 1. get standard map
        // 2. goto (x,y)
//        String page = HttpServer.getHttpServer().getMapPage( coordX, coordY);
       
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        props.setRecognizeUnicodeChars( true);
        TagNode node = cleaner.clean( new File("tests/karte.htm"));
//        TagNode node = cleaner.clean( page);

        return getSquareUrl2( node);
    }
View Full Code Here


        // 1. get overview/innerview pantalla actual
//        String page =  HttpServer.getHttpServer().httpGetPage( url.toString().replace( "&", "&"));
        Thread.sleep( (long) (1000 + 2000*Math.random()));
       
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        props.setRecognizeUnicodeChars( true);
        TagNode node = cleaner.clean( new File("tests/karte_village.htm"));
//        TagNode node = cleaner.clean( page);

        return getSquareType2( node);

View Full Code Here

  {   
    FileWriter writer = null;
    writer = new FileWriter("resultats", false);
    String ajout = new String();
     
    CleanerProperties props = new CleanerProperties();
    props.setAdvancedXmlEscape(true);
    props.setTransResCharsToNCR(true);
    props.setTranslateSpecialEntities(true);
    props.setTransSpecialEntitiesToNCR(true);
   
    HtmlCleaner cleaner = new HtmlCleaner(props);
     
    TagNode node = cleaner.clean(new URL(lien));
    //System.out.println("Title: " + ((TagNode)(node.evaluateXPath("//title")[0])).getText());
View Full Code Here

    }
  }
 
  public static void parser_site_spe(String lien) throws MalformedURLException, IOException, XPatherException
  {
    CleanerProperties props = new CleanerProperties();
    props.setTranslateSpecialEntities(true);
    props.setTransResCharsToNCR(true);
    props.setTransSpecialEntitiesToNCR(true);
    props.setOmitComments(true);
     
    HtmlCleaner cleaner = new HtmlCleaner(props);

    TagNode node = cleaner.clean(new URL(lien));
   
View Full Code Here

  public static void parser_final (String lien) throws MalformedURLException, IOException, XPatherException
  {       
    FileWriter writer = null;
    writer = new FileWriter("resultats_final", false);
     
    CleanerProperties props = new CleanerProperties();
    props.setTranslateSpecialEntities(true);
    props.setTransResCharsToNCR(true);
    props.setTransSpecialEntitiesToNCR(true);
    props.setOmitComments(true);
     
    HtmlCleaner cleaner = new HtmlCleaner(props);
     
    int i=0;
    TagNode node = cleaner.clean(new URL(lien));
View Full Code Here

    @Override
    public String select(String text) {
        try {
            HtmlCleaner htmlCleaner = new HtmlCleaner();
            TagNode tagNode = htmlCleaner.clean(text);
            Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            Object result;
            try {
                result = xPathExpression.evaluate(document, XPathConstants.NODESET);
            } catch (XPathExpressionException e) {
                result = xPathExpression.evaluate(document, XPathConstants.STRING);
View Full Code Here

    public List<String> selectList(String text) {
        List<String> results = new ArrayList<String>();
        try {
            HtmlCleaner htmlCleaner = new HtmlCleaner();
            TagNode tagNode = htmlCleaner.clean(text);
            Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            Object result;
            try {
                result = xPathExpression.evaluate(document, XPathConstants.NODESET);
            } catch (XPathExpressionException e) {
                result = xPathExpression.evaluate(document, XPathConstants.STRING);
View Full Code Here

  public static final String DEFAULT_HTML_INPUT_ENCODING = "Windows-1251";
 
  public static List<TOCReference> parseHhc(InputStream hhcFile, Resources resources) throws IOException, ParserConfigurationException,  XPathExpressionException {
    HtmlCleaner htmlCleaner = new HtmlCleaner();
    CleanerProperties props = htmlCleaner.getProperties();
    TagNode node = htmlCleaner.clean(hhcFile);
    Document hhcDocument = new DomSerializer(props).createDOM(node);
    XPath xpath = XPathFactory.newInstance().newXPath();
    Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument
        .getDocumentElement(), XPathConstants.NODE);
View Full Code Here

    this.htmlCleaner = createHtmlCleaner();
  }

  private static HtmlCleaner createHtmlCleaner() {
    HtmlCleaner result = new HtmlCleaner();
    CleanerProperties cleanerProperties = result.getProperties();
    cleanerProperties.setOmitXmlDeclaration(true);
    cleanerProperties.setOmitDoctypeDeclaration(false);
    cleanerProperties.setRecognizeUnicodeChars(true);
    cleanerProperties.setTranslateSpecialEntities(false);
    cleanerProperties.setIgnoreQuestAndExclam(true);
    cleanerProperties.setUseEmptyElementTags(false);
    return result;
  }
View Full Code Here

         
          TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes()));
         
          //NewCode : Only use html cleaner for cleansing
          //use JAXP for full Xpath lib
          Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
         
 
          String extraRegex = extractRegexFromXpath(xpath);
 
          if (extraRegex != null)
View Full Code Here

TOP

Related Classes of org.htmlcleaner.CleanerProperties

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.