Package org.vietspider.html

Examples of org.vietspider.html.HTMLNode


    }
  }

  public void searchNodes(NodeIterator iterator, List<HTMLNode> nodes, Name name) {
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(name)) nodes.add(n);
    }
  } 
View Full Code Here


  }


  public void autoSelect(HTMLDocument document, String url) throws Exception {
    ContentRegionSearcher2 searcher = new ContentRegionSearcher2();
    HTMLNode nodes = searcher.extractContent(document, url, false);

    NodePathParser pathParser = new NodePathParser();
    //    for(int i = 0; i < nodes.size(); i++) {
    NodePath path = pathParser.toPath(nodes);  
    if(path == null) return;
View Full Code Here

       
        if(paths.length < 1) {
          HTMLExtractor extractor  = new HTMLExtractor();
          NodePathParser pathParser = new NodePathParser();
          if(hyperlinkUtil == null) hyperlinkUtil = new HyperLinkUtil();
          HTMLNode header = null;
          HTMLNode body = null;
          try {
            NodePath nodePath  = pathParser.toPath("HEAD");
            header = extractor.lookNode(document.getRoot(), nodePath);
            nodePath  = pathParser.toPath("BODY");
            body = extractor.lookNode(document.getRoot(), nodePath);
View Full Code Here

    HTMLExtractor extractor  = new HTMLExtractor();
    NodePathParser pathParser = new NodePathParser();
   
    try {
      NodePath nodePath  = pathParser.toPath(txt);
      HTMLNode node = extractor.lookNode(document.getRoot(), nodePath);
      if(node == null) return;
      if(node.isNode(Name.CONTENT)
          || node.isNode(Name.COMMENT)
          || node.isNode(Name.UNKNOWN)) {
        browser.setText(node.getTextValue());
        return;
      }
     
      NodePath headerPath  = pathParser.toPath("HEAD");
      HTMLNode header = extractor.lookNode(document.getRoot(), headerPath);
   
      if(toolbar.isShowAll()) {
        if(hyperlinkUtil == null) hyperlinkUtil = new HyperLinkUtil();
        try {
          String address = toolbar.getText();
          URL home = new URL(address);
         
          hyperlinkUtil.createFullNormalLink(node, home);
          hyperlinkUtil.createFullImageLink(node, home);
          HashMap<String, String> map = new HashMap<String,String>();
          map.put("link","href");
          map.put("script","src");
          hyperlinkUtil.createFullLink(header, map, home, null);
        } catch(MalformedURLException me) {
        } catch (Exception e) {
          ClientLog.getInstance().setException(getShell(), e);
        }
      }
     
      StringBuilder builder = new StringBuilder();
      builder.append("<html>");
      builder.append("<head>");
      if(toolbar.isShowAll() && header != null && header.getChildren() != null) {
        for(HTMLNode ele : header.getChildren()){
          builder.append(ele.getTextValue()).append('\n');
        }
      }
//      String baseHref = HTMLExplorer.class.getResource("").toString();
//      builder.append("<base href=\""+baseHref+"\">");
      if(node.isNode(Name.BODY)) {
        HTMLNode body = null;
        try {
          nodePath  = pathParser.toPath("BODY");
          body = extractor.lookNode(document.getRoot(), nodePath);
        } catch (Exception e) {
          ClientLog.getInstance().setException(getShell(), e);
View Full Code Here

    for(TreeItem item : items) {
      String pathIndex = handler.getConfig(item);
      try {
        NodeInfoViewer viewer = new NodeInfoViewer(getShell(), x, y);
        NodePath nodePath = pathParser.toPath(pathIndex);
        HTMLNode node = extractor.lookNode(document.getRoot(), nodePath);
        viewer.setNode(node);  
        x += 10;
        y += 10;
        nodeViewers.add(viewer);
      } catch(Exception exp) {
View Full Code Here

//    System.out.println("==================");
//    System.out.println(value.indexOf(txt));
    if(value.indexOf(txt) < 0return null;
    List<HTMLNode> children  = node.getChildrenNode();
    for(HTMLNode ele : children){
      HTMLNode newNode = findByText(ele, txt, decoder);
      if(newNode != null) return newNode;
    }
    return node;
  }
View Full Code Here

  public StringBuilder getTextContent(HTMLNode node){
    StringBuilder value = new StringBuilder();
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.CONTENT)) {
        value.append(n.getValue());   
      }   
    }
    /*if(node.getConfig().name() == Name.CONTENT){
      value.append(node.getValue());   
    }   
View Full Code Here

    URL url = new URL("http://java.sun.com/");
    HTMLDocument document = new HTMLParser2().createDocument(url.openStream(), "utf-8");
   
    NodeIterator iterator =  document.getRoot().iterator();
    while(iterator.hasNext()) {
      HTMLNode node = iterator.next();
      if(node.isNode(Name.SCRIPT)) {
        if(node.hasChildren() && node.getChildren().size() > 0) {
          System.out.println("===================================================");
          System.out.println(node.getChild(0).getTextValue());
        }
        continue;
      }
      Attributes attributes = node.getAttributes();
      for(int i = 0; i < attributes.size(); i++) {
        String value = attributes.get(i).getValue();
        if(attributes.get(i).getName().startsWith("on")
            || value.toLowerCase().startsWith("javascript")) {
          System.out.println("===================================================");
View Full Code Here

 
  public static synchronized void updateDocument(
      HTMLDocument document, List<String> jsDocWriters) throws Exception {

    List<HTMLNode> jsScripts = new ArrayList<HTMLNode>();
    HTMLNode root = document.getRoot();
    if(root.isNode(Name.SCRIPT)) {
      jsScripts.add(root);
    } else {
      searchScriptNode(root.iterator(), jsScripts);
    }
   
    for(int i = 0 ; i < jsScripts.size(); i++) {
      List<HTMLNode> childen = jsScripts.get(i).getChildren();
      if(childen.size() < 1) continue;
View Full Code Here

      replace(jsScripts.get(i), new HTMLParser2().createDocument(builder.toString()));
    }
  }
 
  private static void replace(HTMLNode script, HTMLDocument doc) {
    HTMLNode parent = script.getParent();
    if(parent == null) return;
    List<HTMLNode> childen = parent.getChildren();
    if(childen ==  null) return;
    for(int i = 0; i < childen.size(); i++) {
      if(childen.get(i) != script) continue;
      List<HTMLNode> values = getNodes(doc);
      if(values.size() > 0) {
        parent.setChild(i, values.get(0));
//        childen.set(i, values.get(0));
        for(int k = 1; k < values.size(); k++) {
          parent.addChild(i+k, values.get(k));
//          childen.add(i+k, values.get(k));
        }
      }
     
      return;
View Full Code Here

TOP

Related Classes of org.vietspider.html.HTMLNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.