CrawlDatum crawldatum = new CrawlDatum();
String url = item.datum.getUrl();
crawldatum.setUrl(url);
Request request = requestFactory.createRequest(url);
Response response = null;
for (int i = 0; i <= retry; i++) {
if (i > 0) {
LogUtils.getLogger().info("retry " + i + "th " + url);
}
try {
response = request.getResponse(crawldatum);
break;
} catch (Exception ex) {
}
}
crawldatum.setStatus(CrawlDatum.STATUS_DB_FETCHED);
crawldatum.setFetchTime(System.currentTimeMillis());
Page page = new Page();
page.setUrl(url);
page.setFetchTime(crawldatum.getFetchTime());
if (response == null) {
LogUtils.getLogger().info("failed " + url);
HandlerUtils.sendMessage(handler, new Message(Fetcher.FETCH_FAILED, page), true);
continue;
}
page.setResponse(response);
LogUtils.getLogger().info("fetch " + url);
String contentType = response.getContentType();
if (parsing) {
try {
Parser parser = parserFactory.createParser(url, contentType);
if (parser != null) {
ParseResult parseresult = parser.getParse(page);
page.setParseResult(parseresult);
}
} catch (Exception ex) {
LogUtils.getLogger().info("Exception", ex);
}
}
if (needUpdateDb) {
try {
dbUpdater.getSegmentWriter().wrtieFetch(crawldatum);
if (isContentStored) {
Content content = new Content();
content.setUrl(url);
if (response.getContent() != null) {
content.setContent(response.getContent());
} else {
content.setContent(new byte[0]);
}
content.setContentType(contentType);
dbUpdater.getSegmentWriter().wrtieContent(content);