Package org.vietspider.html.path2

Examples of org.vietspider.html.path2.NodePathParser.toPath()


        if(pageNode != null) {
          pagePath = "TABLE[0]";
          extractPaths.add(pathParser.toPath(pageNode).toString());
        }
       
        String threadPath = pathParser.toPath(node).toString();
       
        HTMLNode userNode = searchUserNode(node);
        if(userNode != null) {
          userPath = pathParser.toPath(userNode).toString();
         
View Full Code Here


       
        String threadPath = pathParser.toPath(node).toString();
       
        HTMLNode userNode = searchUserNode(node);
        if(userNode != null) {
          userPath = pathParser.toPath(userNode).toString();
         
          String path = userPath.substring(threadPath.length());
          int index = path.indexOf('[');
          if(index > -1) {
            path = path.substring(0, index+1) + "*" + path.substring(index+2, path.length());
View Full Code Here

        if(userPath == null) return;
       
       
        HTMLNode postNode = searchContentNode(node, "post_message");
        if(postNode != null) {
          postPath = pathParser.toPath(postNode).toString();
         
          String path = postPath.substring(threadPath.length());
          int index = path.indexOf('[');
          if(index > -1) {
            path = path.substring(0, index+1) + "*" + path.substring(index+2, path.length());
View Full Code Here

  private HTMLNode searchTitleNode(HTMLNode root, HTMLNode node) {
    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
    String title = "title";
    try {
      NodePath nodePath  = pathParser.toPath("HEAD.TITLE");
      HTMLNode titleNode = extractor.lookNode(root, nodePath);
      if(titleNode.hasChildren()) {
        title  = titleNode.getChild(0).getTextValue();
      }
    } catch (Exception e) {
View Full Code Here

  private String [] build(List<NodeImpl> tokens) throws Exception {
    HTMLDocument doc = new HTMLParser2().createDocument(tokens);
    NodePathParser pathParser = new NodePathParser();
    HTMLExtractor extractor  = new HTMLExtractor();
   
    NodePath nodePath  = pathParser.toPath("BODY");
    HTMLNode body = extractor.lookNode(doc.getRoot(), nodePath);
   
    TextRenderer renderer = new TextRenderer(body, TextRenderer.HANDLER);
    String value = renderer.getTextValue().toString();
    return value.trim().split("\n");
View Full Code Here

   

    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();

    NodePath nodePath  = pathParser.toPath("BODY");
    return extractor.lookNode(document.getRoot(), nodePath);
  }
 
  private ContentRenderer createContentRenderer(HTMLNode body) {
    List<HTMLNode> contents = searchNodes(body, Name.A);
View Full Code Here

   

    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();

    NodePath nodePath  = pathParser.toPath("BODY");
    return extractor.lookNode(document.getRoot(), nodePath);
  }
 
}
View Full Code Here

      path  = path.substring(0, path.length() - 2);
    } else  if(path.endsWith("[")) {
      path  = path.substring(0, path.length() - 1);
    }
    try {
      NodePath nodePath = pathParser.toPath(path);
      return extractor.lookNode(explorer.getDocument().getRoot(), nodePath);
    } catch (Exception e) {
    }
    return null;
  }
View Full Code Here

 
  public NodePath findNodeByText(HTMLNode node, String start, String end) throws Exception {
    RefsDecoder decoder = new RefsDecoder();
    NodePathParser pathParser = new NodePathParser();
    TextHandler textHandler = new TextHandler();
    if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
    start = textHandler.trim(start);
    HTMLNode startNode = textHandler.findByText(node, start, decoder);
    if(end == null || end.trim().length() == 0) {
      return startNode != null ?  pathParser.toPath(startNode) : pathParser.toPath(node);
    }
View Full Code Here

    TextHandler textHandler = new TextHandler();
    if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
    start = textHandler.trim(start);
    HTMLNode startNode = textHandler.findByText(node, start, decoder);
    if(end == null || end.trim().length() == 0) {
      return startNode != null ?  pathParser.toPath(startNode) : pathParser.toPath(node);
    }
    end = textHandler.trim(end);
   
    HTMLNode endNode = textHandler.findByText(node, end, decoder);   
    if(endNode == null)
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.