* @param requestHeader The header lines of the connection from the request
* @param respond the OutputStream to the client
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, net.yacy.cora.protocol.HeaderFramework, java.io.OutputStream)
*/
public static void doGet(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream respond) {
ByteCountOutputStream countedRespond = null;
try {
final int reqID = requestHeader.hashCode();
// remembering the starting time of the request
final Date requestDate = new Date(); // remember the time...
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime()));
if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction();
sb.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
countedRespond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY");
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); // always starts with leading '/'
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given
final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
int pos=0;
int port=0;
DigestURI url = null;
try {
url = new DigestURI(HeaderFramework.getRequestURL(conProp));
if (log.isFine()) log.logFine(reqID +" GET "+ url);
if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader);
//redirector
if (redirectorEnabled){
synchronized(redirectorProcess){
redirectorWriter.println(url.toNormalform(false, true));
redirectorWriter.flush();
}
final String newUrl = redirectorReader.readLine();
if (!newUrl.equals("")) {
try {
url = new DigestURI(newUrl);
} catch(final MalformedURLException e){}//just keep the old one
}
if (log.isFinest()) log.logFinest(reqID +" using redirector to "+ url);
conProp.put(HeaderFramework.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort());
conProp.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath());
requestHeader.put(HeaderFramework.HOST, url.getHost()+":"+url.getPort());
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath());
}
} catch (final MalformedURLException e) {
final String errorMsg = "ERROR: internal error with url generation: host=" +
host + ", port=" + port + ", path=" + path + ", args=" + args;
log.logSevere(errorMsg);
HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e);
return;
}
if ((pos = host.indexOf(':')) < 0) {
port = 80;
} else {
port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos);
}
// check the blacklist
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) {
log.logInfo("AGIS blocking of host '" + hostlow + "'");
HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
return;
}
// handle outgoing cookies
handleOutgoingCookies(requestHeader, host, ip);
prepareRequestHeader(conProp, requestHeader, hostlow);
final ResponseHeader cachedResponseHeader = Cache.getResponseHeader(url.hash());
// why are files unzipped upon arrival? why not zip all files in cache?
// This follows from the following premises
// (a) no file shall be unzip-ed more than once to prevent unnecessary computing time
// (b) old cache entries shall be comparable with refill-entries to detect/distinguish case 3+4
// (c) the indexing mechanism needs files unzip-ed, a schedule could do that later
// case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped
// and the newly arrival would be zipped and would have to be unzipped upon load. But then the
// scheduler is superfluous. Therefore the only reminding case is
// (d) cached files shall be either all zipped or unzipped
// case d contradicts with a, because files need to be unzipped for indexing. Therefore
// the only remaining case is to unzip files right upon load. Thats what we do here.
// finally use existing cache if appropriate
// here we must decide weather or not to save the data
// to a cache
// we distinguish four CACHE STATE cases:
// 1. cache fill
// 2. cache fresh - no refill
// 3. cache stale - refill - necessary
// 4. cache stale - refill - superfluous
// in two of these cases we trigger a scheduler to handle newly arrived files:
// case 1 and case 3
if (cachedResponseHeader == null) {
if (log.isFinest()) log.logFinest(reqID + " page not in cache: fulfill request from web");
fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond);
} else {
final Request request = new Request(
null,
url,
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
"",
cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
0,
0,
0,
0);
final Response response = new Response(
request,
requestHeader,
cachedResponseHeader,
"200 OK",
sb.crawler.defaultProxyProfile
);
final byte[] cacheContent = Cache.getContent(url.hash());
if (cacheContent != null && response.isFreshForProxy()) {
if (log.isFinest()) log.logFinest(reqID + " fulfill request from cache");
fulfillRequestFromCache(conProp, url, requestHeader, cachedResponseHeader, cacheContent, countedRespond);
} else {
if (log.isFinest()) log.logFinest(reqID + " fulfill request from web");
fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond);
}
}
} catch (final Exception e) {
try {
final String exTxt = e.getMessage();
if ((exTxt!=null)&&(exTxt.startsWith("Socket closed"))) {
forceConnectionClose(conProp);
} else if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
final String errorMsg = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage();
HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e);
log.logSevere(errorMsg);
} else {
forceConnectionClose(conProp);
}
} catch (final Exception ee) {
forceConnectionClose(conProp);
}
} finally {
try { if(countedRespond != null) countedRespond.flush(); else if(respond != null) respond.flush(); } catch (final Exception e) {}
if (countedRespond != null) countedRespond.finish();
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_END, Long.valueOf(System.currentTimeMillis()));
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE,(countedRespond != null) ? Long.toString(countedRespond.getCount()) : -1L);
logProxyAccess(conProp);
}
}