Package org.vietspider.token.attribute

Examples of org.vietspider.token.attribute.Attributes


 
  private void createFullSingleLink(HTMLNode node,
      String nodeName, String attrName, URL home, ValueVerifier verifier)   {
    Attribute attr = null;
    if(node.isNode(nodeName) || (nodeName.length() == 1 || nodeName.charAt(0) == '*')) {
      Attributes attrs = node.getAttributes()
      int idx = attrs.indexOf(attrName);
      if(idx < 0return;
      attr = attrs.get(idx);
      String value = attr.getValue();
      if(verifier != null && !verifier.verify(value)) return;
      value  = urlCreator.createURL(home, value);     
      attr.setValue(value);     
      attrs.set(attr);
    }
  }
View Full Code Here


    NodeIterator iterator = root.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(!n.isNode(Name.META)) continue;
      if(n.isNode(Name.BODY)) break;
      Attributes attributes = n.getAttributes();
      Attribute attribute = attributes.get("http-equiv");
      if(attribute == null || attribute.getValue() == null) continue;

      if(!"content-type".equalsIgnoreCase(attribute.getValue().trim())) continue ;

      attribute = attributes.get("content");
      if(attribute == null) continue;
      String link = attribute.getValue();
      if(link == null) continue;
      int index = link.toLowerCase().indexOf("=");
View Full Code Here

    return list;
  }
 
  public Attribute getAttribute(HTMLNode node, String nodeName, String attrName){
    if(node.isNode(nodeName) || (nodeName.length() == 1 && nodeName.charAt(0) == '*')){
      Attributes attrs = node.getAttributes();  
      int idx = attrs.indexOf(attrName);
      if(idx > -1) return attrs.get(idx);
    }
    return null;
  }
View Full Code Here

    Set<String> keys = map.keySet();
    Iterator<String> iter = keys.iterator();
    while(iter.hasNext()){
      String key = iter.next();
      if(node.isNode(key) || (key.length() == 1 && key.charAt(0) == '*')){
        Attributes attrs = node.getAttributes();  
        int idx = attrs.indexOf(map.get(key));
        if(idx > -1) return attrs.get(idx);
      }
    }
    return null;
  }
View Full Code Here

    downloadResources(address, tokens, resources);

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG || !token.isNode(Name.A)) continue;
      Attributes attributes = token.getAttributes();
      Attribute attribute = attributes.get("href");
      if(attribute == null) continue;
      String link  = attribute.getValue();
      if(link == null || link.trim().length() < 1) continue;
      link  = urlUtils.createURL(parent, link);
      String subName = "";
      try {
        URL subUrl = new URL(link);
        String temp = subUrl.getPath();
        if(temp != null) subName += temp;
        temp = subUrl.getQuery();
        if(temp != null) subName += "_"+temp;
        subName = toName(subName);
      }catch (Exception e) {
      }
      if(subName == null || subName.trim().length() < 1) subName = toName(link);
      try{
        crawl(new URL(address), link, subName, level+1, depth);
      }catch (Exception e) {
        System.err.println(e);
      }
      attribute.setValue(subName+".html");
      attributes.set(attribute);
    }

   
    StringBuilder contentBuilder = new StringBuilder();
    for(int i = 0; i < tokens.size(); i++) {
View Full Code Here

      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG) continue;
      for(Resource resource : resources) {
        if(!token.isNode(resource.tag))  continue;
        try {
          Attributes attributes = token.getAttributes();
          for(Attribute attribute : attributes) {
            if(!attribute.getName().equalsIgnoreCase(resource.attr))  continue;
            String rscName = toName(attribute.getValue());
            File file = new File(folder, rscName);
            String link = urlUtils.createURL(url, attribute.getValue());
            System.out.println("downloading "+link+" ...");
            if(!file.exists()) loadResource(referer, link, file);
            attribute.setValue(rscName);
            attributes.set(attribute);
          }
        } catch (Exception e) {
          System.err.println(e);
        }
      }
View Full Code Here

    builder.append("style=\"width: 80px;\" type='text' ");
    builder.append(" src='resources/2D-47.gif'");
    node.setValue(builder.toString().toCharArray());
   
   
    Attributes attrs = AttributeParser.parse(node);
    for(Attribute attr : attrs){
      System.out.println(attr.getName()+" : "+attr.getValue());
    }
  }
View Full Code Here

public class RemoveAttribute {

  static void clean(HTMLNode node, Map<String, String[]> map){
    Iterator<String> iter = map.keySet().iterator();
    String key, value [];
    Attributes attributes = node.getAttributes();
    while(iter.hasNext()){
      key = iter.next();
      if(key.equals("*") || node.isNode(key)){
        value = map.get(key);
        for(String ele : value){
          if(attributes.contains(ele)){
            attributes.remove(ele);
          }
        }
      }
    }
    List<HTMLNode> children = node.getChildren();
View Full Code Here

   
    NodeIterator iterator = document.getRoot().iterator();
    while(iterator.hasNext()) {
      HTMLNode node = iterator.next();
      if(node.isNode(nodeName)) {
        Attributes attributes = node.getAttributes();
        Attribute attribute = attributes.get(attrName);
        if(attribute == null) continue;
        attrValues.add(attribute.getValue());
      }
    }
    return attrValues;
View Full Code Here

TOP

Related Classes of org.vietspider.token.attribute.Attributes

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.