Package org.vietspider.html.parser

Examples of org.vietspider.html.parser.NodeImpl


      nextPath = nextPath.substring(commonPath.length());
      nextPath = nextPath.substring(1, nextPath.indexOf('.', 1));
      int nextIdx = Integer.parseInt(nextPath);

      HTMLNode newCommonNode =
        new NodeImpl(commonNode.getValue(), commonNode.getName(), TypeToken.TAG);
      List<HTMLNode> children = commonNode.getChildren();
      for (i = idx; i < nextIdx; i++) {
        newCommonNode.addChild(children.get(i));
      }
      return newCommonNode;
    } catch (Exception e) {
      // LogService.getInstance().setMessage(e.toString());
      return root;
View Full Code Here


      // if(idx > 1) path = path.substring(2, idx);
      // if(path.charAt(0) == '.') path = path.substring(1);
      // if(path.trim().isEmpty()) return root;
      idx = Integer.parseInt(path);

      HTMLNode newCommonNode = new NodeImpl(commonNode.getValue(), commonNode.getName(),
          TypeToken.TAG);
      List<HTMLNode> children = commonNode.getChildren();
      for (i = idx; i < children.size(); i++) {
        newCommonNode.addChild(children.get(i));
        // children.get(i).setParent(newCommonNode);
      }
      return newCommonNode;
    } catch (Exception e) {
      StringBuilder builder = new StringBuilder();
View Full Code Here

  @Deprecated()
  public List<String> scanScriptLink(List<NodeImpl> tokens) {
    IdentifierAttributeHandler handler = new IdentifierAttributeHandler(null, null,  "*", "onclick");
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      handler.handle(token);
    }
   
    List<String> list = handler.getValues();
    handler = new IdentifierAttributeHandler(list, jsVerifier, "a", "href");
   
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      handler.handle(token);
    }
   
    return list;
  }
View Full Code Here

 
  public synchronized List<String> scanSiteLink(List<NodeImpl> tokens) {
    LinkAttributeHandler handler = new LinkAttributeHandler(null, siteLinkVerifier, linkAttributeMap);
//    MapAttributeHandler handler = new MapAttributeHandler(null, siteLinkVerifier, linkAttributeMap);
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      handler.handle(token);
    }
    return handler.getValues();
  }
View Full Code Here

    }
  }
 
  public  synchronized void createFullNormalLink(List<NodeImpl> tokens, URL home) {
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl nodeImpl = tokens.get(i);
      if(!nodeImpl.isTag()) continue;
      createFullSingleLink(nodeImpl, pageAttributeFullMap, home, normalLinkVerifier);
    }
  }
View Full Code Here

  public TextRendererBak (HTMLNode root, List<HTMLNode> contents, int type, boolean constain) {
    List<HTMLNode> tokens =  new ArrayList<HTMLNode>();
//    root.buildTokens(tokens);

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = (NodeImpl)tokens.get(i);
      switch (node.getName()) {
      case CONTENT:
        char [] chars = node.getValue();
        if(!isEmpty(chars)) {
          if(isValid(contents, node, constain)) {
            int start = builder.length();
            for(int k = 0; k < chars.length; k++) {
              builder.append(chars[k] == '\n' ? ' ' : chars[k]);
            }
            int end = builder.length();
            positions.add(new NodePosition(node, start, end));
          }
        }
        break;
      case IMG:
        positions.add(new NodePosition(node, -1, -1));
        break;
      case H1:
      case H2:
      case H3:
      case H4:
      case H5:
      case H6:
      case TR:
      case TABLE:
      case TD:
      case P:
      case DIV:
      case BR:
        if(!isEndWithNewLine(builder)) {
          builder.append('\n');
          if(type == RENDERER) builder.append('\n');
        }
        break;
      case SCRIPT:
      case STYLE:
        if(node.getType() == TypeToken.TAG) i++;
        break;
      default:
        if(builder.length() > 0) {
          char c = builder.charAt(builder.length()-1);
          if(!(Character.isWhitespace(c)
View Full Code Here

  }
 
  public List<String> scanScriptLink(List<NodeImpl> tokens) {
    List<String> values  = new ArrayList<String>();
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      getAttributes(token, values, "*", "onclick", null);
      values.addAll(getAttributes(token, null, "a", "href", jsVerifier));
    }
    return values;
  }
View Full Code Here

  }
 
  public synchronized List<String> getSiteLink(List<NodeImpl> tokens) {
    List<String> values  = new ArrayList<String>();
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      getAttributes(token, values, linkAttributeMap, siteLinkVerifier);      
    }
    return values;
  }
View Full Code Here

//    resources.add(new Resource("link", "href"));
//    resources.add(new Resource("script", "src"));
    downloadResources(address, tokens, resources);

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG || !token.isNode(Name.A)) continue;
      Attributes attributes = token.getAttributes();
      Attribute attribute = attributes.get("href");
      if(attribute == null) continue;
      String link  = attribute.getValue();
      if(link == null || link.trim().length() < 1) continue;
      link  = urlUtils.createURL(parent, link);
View Full Code Here

    builder.append('>');
  }

  public void downloadResources(String referer, List<NodeImpl> tokens, List<Resource> resources) {
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG) continue;
      for(Resource resource : resources) {
        if(!token.isNode(resource.tag))  continue;
        try {
          Attributes attributes = token.getAttributes();
          for(Attribute attribute : attributes) {
            if(!attribute.getName().equalsIgnoreCase(resource.attr))  continue;
            String rscName = toName(attribute.getValue());
            File file = new File(folder, rscName);
            String link = urlUtils.createURL(url, attribute.getValue());
View Full Code Here

TOP

Related Classes of org.vietspider.html.parser.NodeImpl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.