Package org.eweb4j.spiderman.task

Examples of org.eweb4j.spiderman.task.Task


   
    //记录已经访问过该url,下次不要重复访问它
    visitedUrls.add(nextUrl);
   
    //解析nextPage
    Task nextTask = new Task(nextUrl, task.url, task.site, 0);
    Model nextModel = new Model();
    List<Field> isAlsoParseInNextPageFields = target.getModel().getIsAlsoParseInNextPageFields();
    if (isAlsoParseInNextPageFields == null || isAlsoParseInNextPageFields.isEmpty())
      return ;
   
View Full Code Here


    if (this.site.db == null)
      return null;
   
    Collection<Task> validTasks = new ArrayList<Task>();
    for (String url : newUrls){
      Task newTask = new Task(url, task.url, site, 10);
      try {
        Target tgt = Util.isTargetUrl(newTask);
        boolean isFromSourceUrl = SourceUrlChecker.checkSourceUrl(site.getTargets().getSourceRules(), newTask.sourceUrl);
        //如果是目标url,但不是来自来源url,跳过
        if (tgt != null && !isFromSourceUrl){
View Full Code Here

    if (visitedUrls.contains(nextUrl)){
      return ;
    }
   
    //解析nextPage,找出里面的目标URL
    Task nextTask = new Task(nextUrl, task.url, task.site, 0);
   
    FetchRequest req = new FetchRequest();
    req.setUrl(nextUrl);
    FetchResult fr = task.site.fetcher.fetch(req);
    if (fr == null || fr.getPage() == null)
View Full Code Here

TOP

Related Classes of org.eweb4j.spiderman.task.Task

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.