Package org.vietspider.html.renderer

Examples of org.vietspider.html.renderer.TextRenderer


    HTMLExtractor extractor  = new HTMLExtractor();
   
    NodePath nodePath  = pathParser.toPath("BODY");
    HTMLNode body = extractor.lookNode(doc.getRoot(), nodePath);
   
    TextRenderer renderer = new TextRenderer(body, TextRenderer.HANDLER);
    String value = renderer.getTextValue().toString();
    return value.trim().split("\n");
  }
View Full Code Here


  }
 
  public List<HTMLNode> removeLinks(HTMLNode root, LinkNodeChecker linkNodeChecker) {
    List<HTMLNode> values = new ArrayList<HTMLNode>();
   
    TextRenderer renderer = new TextRenderer(root, TextRenderer.RENDERER);
    StringBuilder builder = renderer.getTextValue();
   
    int start = 0;
    if(!checkFromStart) start = builder.indexOf("\n\n");
    int end = builder.indexOf("\n\n", start+2);
   
    while(end > -1) {
      List<HTMLNode> nodes = handle(linkNodeChecker, renderer.getNodePositions(start, end));
      if(nodes != null) values.addAll(nodes);
      start = end;
      end = builder.indexOf("\n\n", start+2);
    }
    List<HTMLNode> nodes = handle(linkNodeChecker, renderer.getNodePositions(start, builder.length()));
    if(nodes != null) values.addAll(nodes);
   
    return values;
  }
View Full Code Here

//    }
    return searchUpper(parent, names);
  }
 
  public boolean isValidText(HTMLNode node, int size) {
    TextRenderer textRenderer = new TextRenderer(node, null);
    TextCounter counter = new TextCounter();
//    System.out.println(textRenderer.getTextValue());
//    System.out.println(counter.count(textRenderer.getTextValue()));
    if(counter.countSentence(textRenderer.getTextValue()) > size) return false;
    return true;
  }
View Full Code Here

TOP

Related Classes of org.vietspider.html.renderer.TextRenderer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.