if (now - lastFetchTime < politenessDelay) {
Thread.sleep(politenessDelay - (now - lastFetchTime));
}
lastFetchTime = (new Date()).getTime();
}
HttpResponse response = httpclient.execute(get);
entity = response.getEntity();
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode != HttpStatus.SC_NOT_FOUND) {
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
Header header = response.getFirstHeader("Location");
if (header != null) {
String movedToUrl = header.getValue();
page.getWebURL().setURL(movedToUrl);
} else {
page.getWebURL().setURL(null);
}
return PageFetchStatus.Moved;
}
logger.info("Failed: " + response.getStatusLine().toString() + ", while fetching " + toFetchURL);
} else if (show404Pages) {
logger.info("Not Found: " + toFetchURL + " (Link found in doc#: "
+ page.getWebURL().getParentDocid() + ")");
}
return response.getStatusLine().getStatusCode();
}
String uri = get.getURI().toString();
if (!uri.equals(toFetchURL)) {
if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) {
int newdocid = DocIDServer.getDocID(uri);
if (newdocid != -1) {
if (newdocid > 0) {
return PageFetchStatus.RedirectedPageIsSeen;
}
WebURL webURL = new WebURL();
webURL.setURL(uri);
webURL.setDocid(DocIDServer.getNewDocID(uri));
page.setWebURL(webURL);
}
}
}
if (entity != null) {
long size = entity.getContentLength();
if (size == -1) {
Header length = response.getLastHeader("Content-Length");
if (length == null) {
length = response.getLastHeader("Content-length");
}
if (length != null) {
size = Integer.parseInt(length.getValue());
} else {
size = -1;