int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
if (statusCode != HttpStatus.SC_NOT_FOUND) {
if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
Header header = response.getFirstHeader("Location");
if (header != null) {
String movedToUrl = header.getValue();
page.getWebURL().setURL(movedToUrl);
} else {
page.getWebURL().setURL(null);
}
return PageFetchStatus.Moved;
}
logger.info("Failed: " + response.getStatusLine().toString() + ", while fetching " + toFetchURL);
} else if (show404Pages) {
logger.info("Not Found: " + toFetchURL + " (Link found in doc#: "
+ page.getWebURL().getParentDocid() + ")");
}
return response.getStatusLine().getStatusCode();
}
String uri = get.getURI().toString();
if (!uri.equals(toFetchURL)) {
if (!URLCanonicalizer.getCanonicalURL(uri).equals(toFetchURL)) {
int newdocid = DocIDServer.getDocID(uri);
if (newdocid != -1) {
if (newdocid > 0) {
return PageFetchStatus.RedirectedPageIsSeen;
}
WebURL webURL = new WebURL();
webURL.setURL(uri);
webURL.setDocid(DocIDServer.getNewDocID(uri));
page.setWebURL(webURL);
}
}
}
if (entity != null) {
long size = entity.getContentLength();
if (size == -1) {
Header length = response.getLastHeader("Content-Length");
if (length == null) {
length = response.getLastHeader("Content-length");
}
if (length != null) {
size = Integer.parseInt(length.getValue());
} else {
size = -1;
}
}
if (size > MAX_DOWNLOAD_SIZE) {
entity.consumeContent();
return PageFetchStatus.PageTooBig;
}
boolean isBinary = false;
Header type = entity.getContentType();
if (type != null) {
String typeStr = type.getValue().toLowerCase();
if (typeStr.contains("image") || typeStr.contains("audio") || typeStr.contains("video")) {
isBinary = true;
if (ignoreIfBinary) {
return PageFetchStatus.PageIsBinary;
}