ClientConfiguration.getDefault().getAcceptCharset());
httpClient.setReferrer(referrer);
try {
spider.fireRetrievalStarted(uri);
HttpResponse cached = spider.retrieveFromCache(uri);
long ifModifiedSince = 0;
if (cached != null) {
ifModifiedSince = cached.getDate();
} else {
if (Spider.isOffline()) {
logger.warning("Could not retrieve " + uri + " from HTTP cache.");
return;
}
}
HttpResponse response;
if (Spider.isOffline()) {
response = cached;
logger.info(response.getStatusCode() + " " + response.getStatusMessage() + ": " + uri);
} else {
response = httpClient.doGet(uri, ifModifiedSince, spider.cookieStore);
logger.info(response.getStatusCode() + " " + response.getStatusMessage() + ": " + uri);
if (response.getStatusCode() == 304) {
response = cached;
}
}
if (!spider.isRunning()) {
return;
}
if (response.getStatusCode() == 200) {
if (response != cached && !Spider.isOffline() && spider.jxlDocument.isUseHTTPCache()) {
spider.storeInCache(uri, response);
}
String redirectionURI = null;
if (response.isRedirected()) {
redirectionURI = response.getRedirectionURL();
logger.fine(uri + ": redirected to " + redirectionURI);
if (spider.jxlDocument.getUri().equals(uri)) {
// Correct the starting URI
logger.fine("Resetting starting URI to " + redirectionURI);
spider.jxlDocument.setStartingURI(URI.create(redirectionURI));
}
}
ContentType contentType = new ContentType(response.getContentType());
if (!spider.jxlDocument.isAutoDetectInputEncoding()) {
contentType = contentType.derive(spider.jxlDocument.getInputEncoding());
}
Resource resource = new Resource(uri, redirectionURI, contentType, response.getContent(), level,
embedded);
if (spider.jxlDocument.isAutoDetectInputEncoding()) {
resource.scanForContentType();
}
spider.parse(resource);