Package org.vietspider.html.path2

Examples of org.vietspider.html.path2.DocumentExtractor


        if(document != null) handler.createTreeItem(tree, document);
        new AutoSelectDataNode2(document, url, handler, tree);
       
        if(paths.length < 1) {
          HTMLExtractor extractor  = new HTMLExtractor();
          NodePathParser pathParser = new NodePathParser();
          if(hyperlinkUtil == null) hyperlinkUtil = new HyperLinkUtil();
          HTMLNode header = null;
          HTMLNode body = null;
          try {
            NodePath nodePath  = pathParser.toPath("HEAD");
            header = extractor.lookNode(document.getRoot(), nodePath);
            nodePath  = pathParser.toPath("BODY");
            body = extractor.lookNode(document.getRoot(), nodePath);
          } catch (Exception e) {
            ClientLog.getInstance().setException(getShell(), e);
          }
         
View Full Code Here


    txtPath.setText(txt);
   
    treeAddButton.computeShowArea(item);
   
    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
   
    try {
      NodePath nodePath  = pathParser.toPath(txt);
      HTMLNode node = extractor.lookNode(document.getRoot(), nodePath);
      if(node == null) return;
      if(node.isNode(Name.CONTENT)
          || node.isNode(Name.COMMENT)
          || node.isNode(Name.UNKNOWN)) {
        browser.setText(node.getTextValue());
        return;
      }
     
      NodePath headerPath  = pathParser.toPath("HEAD");
      HTMLNode header = extractor.lookNode(document.getRoot(), headerPath);
   
      if(toolbar.isShowAll()) {
        if(hyperlinkUtil == null) hyperlinkUtil = new HyperLinkUtil();
        try {
          String address = toolbar.getText();
          URL home = new URL(address);
         
          hyperlinkUtil.createFullNormalLink(node, home);
          hyperlinkUtil.createFullImageLink(node, home);
          HashMap<String, String> map = new HashMap<String,String>();
          map.put("link","href");
          map.put("script","src");
          hyperlinkUtil.createFullLink(header, map, home, null);
        } catch(MalformedURLException me) {
        } catch (Exception e) {
          ClientLog.getInstance().setException(getShell(), e);
        }
      }
     
      StringBuilder builder = new StringBuilder();
      builder.append("<html>");
      builder.append("<head>");
      if(toolbar.isShowAll() && header != null && header.getChildren() != null) {
        for(HTMLNode ele : header.getChildren()){
          builder.append(ele.getTextValue()).append('\n');
        }
      }
//      String baseHref = HTMLExplorer.class.getResource("").toString();
//      builder.append("<base href=\""+baseHref+"\">");
      if(node.isNode(Name.BODY)) {
        HTMLNode body = null;
        try {
          nodePath  = pathParser.toPath("BODY");
          body = extractor.lookNode(document.getRoot(), nodePath);
        } catch (Exception e) {
          ClientLog.getInstance().setException(getShell(), e);
        }
       
View Full Code Here

  void viewItem(){
    TreeItem[] items = tree.getSelection();
    if( items == null || items.length  < 1) return;
    int x = getShell().getLocation().x+120, y = getShell().getLocation().y +130;
    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
    for(TreeItem item : items) {
      String pathIndex = handler.getConfig(item);
      try {
        NodeInfoViewer viewer = new NodeInfoViewer(getShell(), x, y);
        NodePath nodePath = pathParser.toPath(pathIndex);
        HTMLNode node = extractor.lookNode(document.getRoot(), nodePath);
        viewer.setNode(node);  
        x += 10;
        y += 10;
        nodeViewers.add(viewer);
View Full Code Here


  public List<String> traverseTree(int style, String[] paths) throws Exception {
//    String [] paths = cboPath.getItems();
    List<String> removePaths = new ArrayList<String>();
    NodePathParser pathParser = new NodePathParser();
    if(paths != null) {
      short selectType = PathConfirmDialog.YES;
      for(String path : paths) {
        NodePath nodePath = pathParser.toPath(path);
        selectType = handler.traverseTree(tree, nodePath, path, style, selectType);
        if(selectType == PathConfirmDialog.YES
            || selectType == PathConfirmDialog.YES_TO_ALL) {
          removePaths.add(path);
        }
View Full Code Here

      extractPaths.add(pathParser.toPath(list.get(i)).toString());
    }
  }
 
  private void searchDataNode(HTMLNode root) {*/
    NodePathParser pathParser = new NodePathParser();
    NodeIterator iterator = root.iterator();
  
    while(iterator.hasNext()) {
      HTMLNode node = iterator.next();
      if(!node.isNode(Name.DIV)) continue;
      Attributes attributes = node.getAttributes();
      Attribute attribute = attributes.get("id");
      if(attribute == null) continue;
      String value = attribute.getValue();
      if(value == null) continue;
     
      if("posts".equalsIgnoreCase(value)) {
        HTMLNode titleNode = searchTitleNode(root, node);
        if(titleNode != null) {
          titlePath = titleNode.getName()+"[0]";
          extractPaths.add(pathParser.toPath(titleNode).toString());
        }
       
        HTMLNode pageNode = searchPageNode(node);
        if(pageNode == null) pageNode = searchPageNode2(root);
        if(pageNode != null) {
          pagePath = "TABLE[0]";
          extractPaths.add(pathParser.toPath(pageNode).toString());
        }
       
        String threadPath = pathParser.toPath(node).toString();
       
        HTMLNode userNode = searchUserNode(node);
        if(userNode != null) {
          userPath = pathParser.toPath(userNode).toString();
         
          String path = userPath.substring(threadPath.length());
          int index = path.indexOf('[');
          if(index > -1) {
            path = path.substring(0, index+1) + "*" + path.substring(index+2, path.length());
          }
          if(titleNode != null && titleNode.isNode(Name.DIV)) {
            userPath = "DIV[1]" + path;
          } else {
            userPath = "DIV[0]" + path;
          }
          index = userPath.lastIndexOf("TD[");
          if(index > 0) {
            int end = userPath.indexOf(']', index);
            if(end > 0) {
              userPath = userPath.substring(0, index+3) + "i<2"+ userPath.substring(end);
            }
          }
        }
        if(userPath == null) return;
       
       
        HTMLNode postNode = searchContentNode(node, "post_message");
        if(postNode != null) {
          postPath = pathParser.toPath(postNode).toString();
         
          String path = postPath.substring(threadPath.length());
          int index = path.indexOf('[');
          if(index > -1) {
            path = path.substring(0, index+1) + "*" + path.substring(index+2, path.length());
View Full Code Here

    return upParent(node.getParent(), names);
  }
 
  private HTMLNode searchTitleNode(HTMLNode root, HTMLNode node) {
    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
    String title = "title";
    try {
      NodePath nodePath  = pathParser.toPath("HEAD.TITLE");
      HTMLNode titleNode = extractor.lookNode(root, nodePath);
      if(titleNode.hasChildren()) {
        title  = titleNode.getChild(0).getTextValue();
      }
    } catch (Exception e) {
View Full Code Here

    afters = build(tokens);
  }
 
  private String [] build(List<NodeImpl> tokens) throws Exception {
    HTMLDocument doc = new HTMLParser2().createDocument(tokens);
    NodePathParser pathParser = new NodePathParser();
    HTMLExtractor extractor  = new HTMLExtractor();
   
    NodePath nodePath  = pathParser.toPath("BODY");
    HTMLNode body = extractor.lookNode(doc.getRoot(), nodePath);
   
    TextRenderer renderer = new TextRenderer(body, TextRenderer.HANDLER);
    String value = renderer.getTextValue().toString();
    return value.trim().split("\n");
View Full Code Here

      chars =  java.text.Normalizer.normalize(new String(chars), Normalizer.Form.NFC).toCharArray();
      node.setValue(chars);             
   

    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();

    NodePath nodePath  = pathParser.toPath("BODY");
    return extractor.lookNode(document.getRoot(), nodePath);
  }
View Full Code Here

      chars =  java.text.Normalizer.normalize(new String(chars), Normalizer.Form.NFC).toCharArray();
      node.setValue(chars);             
   

    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();

    NodePath nodePath  = pathParser.toPath("BODY");
    return extractor.lookNode(document.getRoot(), nodePath);
  }
View Full Code Here

    if(popup != null) popup.dispose();
  }*/
 
  private HTMLNode createNode(String path) {
    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
    if(path.endsWith("[.")) {
      path  = path.substring(0, path.length() - 2);
    } else  if(path.endsWith("[")) {
      path  = path.substring(0, path.length() - 1);
    }
    try {
      NodePath nodePath = pathParser.toPath(path);
      return extractor.lookNode(explorer.getDocument().getRoot(), nodePath);
    } catch (Exception e) {
    }
    return null;
  }
View Full Code Here

TOP

Related Classes of org.vietspider.html.path2.DocumentExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.