}
String requestUrl = reqsb.toString();
// parse the incoming request and make sure it's a valid page request
WebsiteData weblog = null;
WeblogPageRequest pageRequest = null;
try {
pageRequest = new WeblogPageRequest(request);
UserManager userMgr = RollerFactory.getRoller().getUserManager();
weblog = userMgr.getWebsiteByHandle(pageRequest.getWeblogHandle());
if(weblog == null) {
throw new Exception("no weblog named "+pageRequest.getWeblogHandle());
}
} catch(Exception ex) {
// bad url or couldn't obtain weblog
response.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
// determine if this request came from a robot
if (robotPattern != null) {
// If the pattern is present, we check for whether the User-Agent matches,
// and set isRobot if so. Currently, all referral processing, including
// spam check, is skipped for robots identified in this way.
String userAgent = request.getHeader("User-Agent");
isRobot = (userAgent != null && userAgent.length() > 0 && robotPattern.matcher(userAgent).matches());
}
// validate the referrer
if (pageRequest != null && pageRequest.getWeblogHandle() != null && !isRobot) {
RollerContext rctx = RollerContext.getRollerContext();
// Base page URLs, with and without www.
String basePageUrlWWW =
rctx.getAbsoluteContextUrl(request)+"/page/"+weblog.getHandle();
String basePageUrl = basePageUrlWWW;
if ( basePageUrlWWW.startsWith("http://www.") ) {
// chop off the http://www.
basePageUrl = "http://"+basePageUrlWWW.substring(11);
}
// ignore referrers coming from users own blog
if (referrerUrl == null ||
(!referrerUrl.startsWith(basePageUrl) &&
!referrerUrl.startsWith(basePageUrlWWW))) {
String selfSiteFragment = "/page/"+weblog.getHandle();
// validate the referrer
if ( referrerUrl != null ) {
// ignore a Referrer from the persons own blog
if (referrerUrl.indexOf(selfSiteFragment) != -1) {
referrerUrl = null;
ignoreReferrer = true;
} else {
// treat editor referral as direct
int lastSlash = requestUrl.indexOf("/", 8);
if (lastSlash == -1) lastSlash = requestUrl.length();
String requestSite = requestUrl.substring(0, lastSlash);
if (referrerUrl.matches(requestSite + ".*\\.do.*")) {
referrerUrl = null;
} else {
// If referer URL is blacklisted, throw it out
isRefSpammer = SpamChecker.checkReferrer(weblog, referrerUrl);
}
}
}
} else {
mLogger.debug("Ignoring referer = "+referrerUrl);
ignoreReferrer = true;
}
}
// pre-processing complete, let's finish the job
if (isRefSpammer) {
// spammers get a 403 Access Denied
response.sendError(HttpServletResponse.SC_FORBIDDEN);
return;
} else if(!isRobot && !ignoreReferrer) {
// referrer is valid, lets record it
try {
IncomingReferrer referrer = new IncomingReferrer();
referrer.setReferrerUrl(referrerUrl);
referrer.setRequestUrl(requestUrl);
referrer.setWeblogHandle(pageRequest.getWeblogHandle());
referrer.setWeblogAnchor(pageRequest.getWeblogAnchor());
referrer.setWeblogDateString(pageRequest.getWeblogDate());
ReferrerQueueManager refQueue =
RollerFactory.getRoller().getReferrerQueueManager();
refQueue.processReferrer(referrer);
} catch(Exception e) {