modifyProxyHeaders(requestHeader, httpVer);
final String connectHost = hostPart(host, port, yAddress);
final String getUrl = "http://"+ connectHost + remotePath;
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
// send request
try {
client.GET(getUrl);
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader);
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
// determine if it's an internal error of the httpc
if (responseHeader.isEmpty()) {
throw new Exception(client.getHttpResponse().getStatusLine().toString());
}
final ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond);
// the cache does either not exist or is (supposed to be) stale
long sizeBeforeDelete = -1;
if (cachedResponseHeader != null) {
// delete the cache
ResponseHeader rh = Cache.getResponseHeader(url.hash());
if (rh != null && (sizeBeforeDelete = rh.getContentLength()) == 0) {
byte[] b = Cache.getContent(url.hash());
if (b != null) sizeBeforeDelete = b.length;
}
Cache.delete(url);
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
}
// reserver cache entry
final Request request = new Request(
null,
url,
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
"",
responseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
0,
0,
0,
sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
// handle incoming cookies
handleIncomingCookies(responseHeader, host, ip);
// prepareResponseHeader(responseHeader, res.getHttpVer());
prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString());
// sending the respond header back to the client
if (chunkedOut != null) {
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
}
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
HTTPDemon.sendRespondHeader(
conProp,
respond,
httpVer,
client.getHttpResponse().getStatusLine().getStatusCode(),
client.getHttpResponse().getStatusLine().toString(), // status text
responseHeader);
if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) {
final OutputStream outStream = chunkedOut != null ? chunkedOut : respond;
final Response response = new Response(
request,
requestHeader,
responseHeader,
Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
sb.crawler.defaultProxyProfile
);
final String storeError = response.shallStoreCacheForProxy();
final boolean storeHTCache = response.profile().storeHTCache();
final String supportError = TextParser.supports(response.url(), response.getMimeType());
if (
/*
* Now we store the response into the htcache directory if
* a) the response is cacheable AND
*/
(storeError == null) &&
/*
* b) the user has configured to use the htcache OR
* c) the content should be indexed
*/
((storeHTCache) || (supportError != null))
) {
// we don't write actually into a file, only to RAM, and schedule writing the file.
// int l = res.getResponseHeader().size();
int l = responseHeader.size();
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l);
final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {outStream, byteStream});
// FileUtils.copy(res.getDataAsStream(), toClientAndMemory);
client.writeTo(toClientAndMemory);
// cached bytes
byte[] cacheArray;
if (byteStream.size() > 0) {
cacheArray = byteStream.toByteArray();
} else {
cacheArray = null;
}
if (log.isFine()) log.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
if (sizeBeforeDelete == -1) {
// totally fresh file
response.setContent(cacheArray);
try {
Cache.store(response.url(), response.getResponseHeader(), cacheArray);
sb.toIndexer(response);
} catch (IOException e) {
log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
}
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
} else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) {
// before we came here we deleted a cache entry
cacheArray = null;
//cacheManager.push(cacheEntry); // unnecessary update
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REF_FAIL_HIT");
} else {
// before we came here we deleted a cache entry
response.setContent(cacheArray);
try {
Cache.store(response.url(), response.getResponseHeader(), cacheArray);
sb.toIndexer(response);
} catch (IOException e) {
log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
}
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
}
} else {
// no caching
if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." +
" StoreError=" + ((storeError==null)?"None":storeError) +
" StoreHTCache=" + storeHTCache +
" SupportError=" + supportError);
// FileUtils.copy(res.getDataAsStream(), outStream);
client.writeTo(outStream);
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
}
if (chunkedOut != null) {
chunkedOut.finish();
chunkedOut.flush();
}
} // end hasBody
} catch(SocketException se) {
// if opened ...
// if(res != null) {
// // client cut proxy connection, abort download
// res.abort();
// }
client.finish();
handleProxyException(se,conProp,respond,url);
} finally {
// if opened ...
// if(res != null) {
// // ... close connection
// res.closeStream();
// }
client.finish();
}
} catch (final Exception e) {
handleProxyException(e,conProp,respond,url);
}
}