// System.out.println("parse.next->"+page.getUrl());
Model mdl = rule.getNextPage();
if (mdl == null)
return ;
Target tgt = new Target();
tgt.setName(target.getName());
tgt.setModel(mdl);
//解析Model获得next URL
// System.out.println("page--!!!!!!----->"+page.getUrl());
Collection<String> nextUrls = UrlUtils.digUrls(page, task, rule, tgt, listener, finalFields);
// System.out.println("visitedUrls-->>>>>>>>>>>>!!!!!!!!!!!!!!" + visitedUrls);
// System.out.println("nextUrls-->>>>>>>>>>>>!!!!!!!!!!!!!!" + nextUrls);
if (nextUrls == null || nextUrls.isEmpty())
return ;
String nextUrl = new ArrayList<String>(nextUrls).get(0);
if (nextUrl == null || nextUrl.trim().length() == 0)
return ;
if (visitedUrls.contains(nextUrl)){
return ;
}
FetchRequest req = new FetchRequest();
req.setUrl(nextUrl);
FetchResult fr = task.site.fetcher.fetch(req);
if (fr == null || fr.getPage() == null)
return ;
//记录已经访问过该url,下次不要重复访问它
visitedUrls.add(nextUrl);
//解析nextPage
Task nextTask = new Task(nextUrl, task.url, task.site, 0);
Model nextModel = new Model();
List<Field> isAlsoParseInNextPageFields = target.getModel().getIsAlsoParseInNextPageFields();
if (isAlsoParseInNextPageFields == null || isAlsoParseInNextPageFields.isEmpty())
return ;
nextModel.getField().addAll(isAlsoParseInNextPageFields);
tgt.setModel(nextModel);
ModelParser parser = new ModelParser(nextTask, tgt, listener);
Page nextPageResult = fr.getPage();
List<Map<String, Object>> nextMaps = parser.parse(nextPageResult);
if (nextMaps == null)