* @return the loaded entity in a Response object
* @throws IOException
*/
private Response loadInternal(final Request request, CacheStrategy cacheStrategy, final long maxFileSize, final boolean checkBlacklist) throws IOException {
// get the protocol of the next URL
final DigestURI url = request.url();
if (url.isFile() || url.isSMB()) cacheStrategy = CacheStrategy.NOCACHE; // load just from the file system
final String protocol = url.getProtocol();
final String host = url.getHost();
// check if we have the page in the cache
final CrawlProfile crawlProfile = this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
if (crawlProfile != null && cacheStrategy != CacheStrategy.NOCACHE) {
// we have passed a first test if caching is allowed
// now see if there is a cache entry
final ResponseHeader cachedResponse = (url.isLocal()) ? null : Cache.getResponseHeader(url.hash());
final byte[] content = (cachedResponse == null) ? null : Cache.getContent(url.hash());
if (cachedResponse != null && content != null) {
// yes we have the content
// create request header values and a response object because we need that
// in case that we want to return the cached content in the next step
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
final Response response = new Response(
request,
requestHeader,
cachedResponse,
"200",