}
// handle outgoing cookies
handleOutgoingCookies(requestHeader, host, ip);
prepareRequestHeader(conProp, requestHeader, hostlow);
ResponseHeader cachedResponseHeader = Cache.getResponseHeader(url.hash());
// why are files unzipped upon arrival? why not zip all files in cache?
// This follows from the following premises
// (a) no file shall be unzip-ed more than once to prevent unnecessary computing time
// (b) old cache entries shall be comparable with refill-entries to detect/distinguish case 3+4
// (c) the indexing mechanism needs files unzip-ed, a schedule could do that later
// case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped
// and the newly arrival would be zipped and would have to be unzipped upon load. But then the
// scheduler is superfluous. Therefore the only reminding case is
// (d) cached files shall be either all zipped or unzipped
// case d contradicts with a, because files need to be unzipped for indexing. Therefore
// the only remaining case is to unzip files right upon load. Thats what we do here.
// finally use existing cache if appropriate
// here we must decide weather or not to save the data
// to a cache
// we distinguish four CACHE STATE cases:
// 1. cache fill
// 2. cache fresh - no refill
// 3. cache stale - refill - necessary
// 4. cache stale - refill - superfluous
// in two of these cases we trigger a scheduler to handle newly arrived files:
// case 1 and case 3
if (cachedResponseHeader == null) {
if (log.isFinest()) log.logFinest(reqID + " page not in cache: fulfill request from web");
fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond);
} else {
final Request request = new Request(
null,
url,
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
"",
cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
0,
0,
0,
0);