Package org.vietspider.html.parser

Examples of org.vietspider.html.parser.NodeImpl$NodeIteratorImpl


//    resources.add(new Resource("link", "href"));
//    resources.add(new Resource("script", "src"));
    downloadResources(address, tokens, resources);

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG || !token.isNode(Name.A)) continue;
      Attributes attributes = token.getAttributes();
      Attribute attribute = attributes.get("href");
      if(attribute == null) continue;
      String link  = attribute.getValue();
      if(link == null || link.trim().length() < 1) continue;
      link  = urlUtils.createURL(parent, link);
View Full Code Here


    builder.append('>');
  }

  public void downloadResources(String referer, List<NodeImpl> tokens, List<Resource> resources) {
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl token = tokens.get(i);
      if(token.getType() != TypeToken.TAG) continue;
      for(Resource resource : resources) {
        if(!token.isNode(resource.tag))  continue;
        try {
          Attributes attributes = token.getAttributes();
          for(Attribute attribute : attributes) {
            if(!attribute.getName().equalsIgnoreCase(resource.attr))  continue;
            String rscName = toName(attribute.getValue());
            File file = new File(folder, rscName);
            String link = urlUtils.createURL(url, attribute.getValue());
View Full Code Here

  public List<Form> searchForm(List<NodeImpl>  tokens) {
    List<Form> forms = new ArrayList<Form>();
   
    Form form = null;
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
     
      if(node.isNode(Name.FORM)) {
//        System.out.println("== > thay "+ node + " : "+ node.isOpen()  );
        if(node.getType() == TypeToken.TAG) {
          if(form != null) forms.add(form);
          form = new Form();
          Attributes attributes = node.getAttributes();
          Attribute attribute = attributes.get(NAME_ATTR);
          if(attribute != null) form.setName(attribute.getValue());
          attribute = attributes.get("method");
          if(attribute != null) form.setMethod(attribute.getValue());
          attribute = attributes.get(ACTION);
          if(attribute != null) form.setAction(attribute.getValue());
        } else {
          if(form != null) forms.add(form);
          form = null;
        }
      } else if(node.isNode(Name.INPUT)
          || node.isNode(Name.TEXTAREA)
          || node.isNode(Name.SELECT)) {       
        if(form == null) continue;
       
        Attributes attributes = node.getAttributes();
        Attribute attribute = attributes.get(NAME_ATTR);
        if(attribute == null) attribute = attributes.get(ID_ATTR);;
        if(attribute == null) continue;
        String name  = attribute.getValue();
        if(name == null || name.trim().isEmpty()) continue;
View Full Code Here

      message = "Error: Can't parse tokens!";
      return ERROR;
    }

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(!node.isNode(Name.INPUT)) continue;
      String value = getAttribute(node, TYPE_ATTR);
      if(value == null) continue;
      if(value.equalsIgnoreCase(PASSWORD)) return LOGIN;
    }

    int i = 0;
    for(;i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
     
      if(node.isNode(Name.FORM)) {
        if(formName == null) break;       
        Attributes attrs = node.getAttributes();
        String name = getAttribute(attrs, NAME_ATTR);
        if(formName.equalsIgnoreCase(name)) break;
      }
    }

    HTMLNode form = null;
    List<HTMLNode> inputs = new ArrayList<HTMLNode>();

    for(; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(node.isNode(Name.FORM)) {
        if(node.isOpen()) {
          form = node;
        } else {
          break;
        }
      } else if(node.isNode(Name.INPUT)
          || node.isNode(Name.TEXTAREA)
          || node.isNode(Name.SELECT)) {       
        inputs.add(node);
      }
    }
   
    if(form == null || inputs.size() < 1) {
View Full Code Here

    char [] chars = CharsDecoder.decode(charset, data, 0, data.length);
    List<NodeImpl> tokens  = parser.createTokens(chars);
    if(tokens == null) return true;

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(!node.isNode(Name.INPUT)) continue;
      String value = getAttribute(node, TYPE_ATTR);
      if(value == null) continue;
      if(value.equalsIgnoreCase(PASSWORD)) return true;
    }
    return false;
View Full Code Here

      message = "Can't parse tokens!";
      return ERROR;
    }

    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(!node.isNode(Name.INPUT)) continue;
      String value = getAttribute(node, TYPE_ATTR);
      if(value == null) continue;
      if(value.equalsIgnoreCase(PASSWORD)) return LOGIN;
    }

    int i = 0;
    for(;i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(node.isNode(Name.FORM)) break;
    }

    HTMLNode form = null;
    List<HTMLNode> inputs = new ArrayList<HTMLNode>();

    for(; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(node.isNode(Name.FORM)) {
        if(node.isOpen()) {
          form = node;
        } else {
          break;
        }
      } else if(node.isNode(Name.INPUT)
          || node.isNode(Name.TEXTAREA)
          || node.isNode(Name.SELECT)) {       
        inputs.add(node);
      }
    }
    if(form == null || inputs.size() < 1) {
      message = "form not found";
View Full Code Here

          if(type == RENDERER) builder.append('\n');
        }
        break;
      case SCRIPT:
      case STYLE:
        NodeImpl nodeImpl = (NodeImpl) node;
        if(nodeImpl.getType() == TypeToken.TAG && iterator.hasNext()) iterator.next();
        break;
      default:
        if(builder.length() > 0) {
          char c = builder.charAt(builder.length()-1);
          if(!(Character.isWhitespace(c)
View Full Code Here

    if(start == -1) {
      throw new UnknownHostException("Not found login form. Please check login address: "+loginUrl);
    }

    for(; start > -1; start--) {
      NodeImpl node = tokens.get(start);
      if(node.isNode(Name.FORM)) break;
    }

    HTMLNode form = null;
    boolean md5 = false;
    List<HTMLNode> inputs = new ArrayList<HTMLNode>();
    String formValue = null;

    for(int i = start; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(node.isNode(Name.FORM)) {
        if(node.isOpen()) {
          if(!md5) {
            String value = new String(node.getValue());
            md5 = value.toLowerCase().indexOf("md5") > -1;
          }
          form = node;
          formValue = new String(form.getValue()).toLowerCase();
        } else {
          break;
        }
      } else if(node.isNode(Name.INPUT)) {
        if(!md5 && formValue != null) {
          md5 = formValue.indexOf("md5") > -1;
        }
        inputs.add(node);
      }
View Full Code Here

    return buffer.toString();
  }
 
  private int searchPasswordField(List<NodeImpl> tokens) {
    for(int i = 0; i < tokens.size(); i++) {
      NodeImpl node = tokens.get(i);
      if(!node.isNode(Name.INPUT)) continue;
      String value = getAttribute(node, TYPE_ATTR);
      if(value == null) continue;
      if(value.equalsIgnoreCase(PASSWORD)) return i;
    }
    return -1;
View Full Code Here

      case TD:
        separateBlock(builder, node, 2, wrappers);
        break;
      case SCRIPT:
      case STYLE:
        NodeImpl nodeImpl = (NodeImpl) node;
        if(nodeImpl.getType() == TypeToken.TAG && iterator.hasNext()) iterator.next();
        break;
      default:
        if(builder.length() > 0) {
          char c = builder.charAt(builder.length()-1);
          if(!(Character.isWhitespace(c)
View Full Code Here

TOP

Related Classes of org.vietspider.html.parser.NodeImpl$NodeIteratorImpl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.