public void nutchPerform(ComponentContext tileContext,
HttpServletRequest request, HttpServletResponse response,
ServletContext servletContext) throws ServletException, IOException {
ServiceLocator locator = getServiceLocator(request);
NutchBean bean = locator.getNutchBean();
LOG.info("Cache request from " + request.getRemoteAddr());
Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
Integer.parseInt(request.getParameter("id")));
HitDetails details = bean.getDetails(hit);
String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();
Metadata metaData = bean.getParseData(details).getContentMeta();
String content = null;
String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
if (contentType.startsWith("text/html")) {
// FIXME : it's better to emit the original 'byte' sequence
// with 'charset' set to the value of 'CharEncoding',
// but I don't know how to emit 'byte sequence' in JSP.
// out.getOutputStream().write(bean.getContent(details)) may work,
// but I'm not sure.
String encoding = (String) metaData.get("CharEncodingForConversion");
if (encoding != null) {
try {
content = new String(bean.getContent(details), encoding);
} catch (UnsupportedEncodingException e) {
//fallback to configured charset
content = new String(bean.getContent(details), locator
.getConfiguration().get("parser.character.encoding.default"));
}
} else {
//construct String with system default encoding
content = new String(bean.getContent(details));
}
}
// page content
request.setAttribute("content", content);