/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package ivory.server;
import ivory.core.RetrievalEnvironment;
import ivory.core.util.XMLTools;
import ivory.smrf.model.builder.MRFBuilder;
import ivory.smrf.model.importance.ConceptImportanceModel;
import ivory.smrf.retrieval.Accumulator;
import ivory.smrf.retrieval.QueryRunner;
import ivory.smrf.retrieval.ThreadedQueryRunner;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.servlet.Context;
import org.mortbay.jetty.servlet.ServletHolder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.collection.DocumentForwardIndex;
import edu.umd.cloud9.collection.Indexable;
/**
* @author Tamer Elsayed
* @author Jimmy Lin
*/
public class RetrievalServer {
private static final Logger sLogger = Logger.getLogger(RetrievalServer.class);
/*{
sLogger.setLevel(Level.INFO);
}*/
private QueryRunner mQueryRunner;
private RetrievalEnvironment mEnv=null;
private DocnoMapping mDocnoMapping;
private DocumentForwardIndex<Indexable> mForwardIndex;
private String mSid;
public void initialize(String sid, String config, FileSystem fs) {
System.out.println("$$ Initializing RetrievalServer for \"" + sid + "\"...");
sLogger.info("Initializing RetrievalServer for \"" + sid + "\"...");
mSid = sid;
Document d = null;
try {
d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(
fs.open(new Path(config)));
} catch (Exception e) {
e.printStackTrace();
}
String indexPath = null;
String findexPath = null;
NodeList servers = d.getElementsByTagName("server");
for (int i = 0; i < servers.getLength(); i++) {
Node node = servers.item(i);
// pick out the correct server id
String id = XMLTools.getAttributeValue(node, "id", null);
if (!id.equals(sid))
continue;
sLogger.info(" - sid: " + sid);
NodeList children = node.getChildNodes();
for (int j = 0; j < children.getLength(); j++) {
Node child = children.item(j);
if ("index".equals(child.getNodeName())) {
sLogger.info(" - index: " + child.getTextContent().trim());
indexPath = child.getTextContent().trim();
if(mEnv == null){
try {
mEnv = new RetrievalEnvironment(indexPath, fs);
mEnv.initialize(true);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException();
}
}
}
if ("findex".equals(child.getNodeName())) {
sLogger.info(" - findex: " + child.getTextContent().trim());
// initialize forward index
findexPath = child.getTextContent().trim();
}
if ("docscore".equals(child.getNodeName())) {
sLogger.info(" - docscore: " + child.getTextContent().trim());
String type = XMLTools.getAttributeValue(child, "type", "");
String provider = XMLTools.getAttributeValue(child, "provider", "");
String path = child.getTextContent();
if (type.equals("") || provider.equals("") || path.equals("")) {
throw new RuntimeException("Invalid docscore!");
}
System.out.println("$$ Loading docscore: type=" + type + ", provider=" +
provider + ", path="
+ path);
sLogger.info("Loading docscore: type=" + type + ", provider=" +
provider + ", path="
+ path);
if(mEnv == null){
try {
mEnv = new RetrievalEnvironment(indexPath, fs);
mEnv.initialize(true);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException();
}
}
mEnv.loadDocScore(type, provider, path);
}
if("importancemodel".equals(child.getNodeName())) {
sLogger.info(" - importancemodel: " + child.getTextContent().trim());
String importanceModelId = XMLTools.getAttributeValue(child, "id", null);
if(importanceModelId == null) {
throw new RuntimeException("Invalid importance model!");
}
ConceptImportanceModel importanceModel = null;
try {
importanceModel = ConceptImportanceModel.get(child);
}
catch(Exception e) {
throw new RuntimeException(e);
}
if(mEnv == null){
try {
mEnv = new RetrievalEnvironment(indexPath, fs);
mEnv.initialize(true);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException();
}
}
mEnv.addImportanceModel(importanceModelId, importanceModel);
}
}
if (indexPath == null) {
throw new RuntimeException("Error: must specify an index location!");
}
if (findexPath == null)
sLogger.warn("forward index not specified: will not be able to access documents.");
}
if(mEnv == null){
try {
mEnv = new RetrievalEnvironment(indexPath, fs);
mEnv.initialize(true);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException();
}
}
try {
Node modelNode = d.getElementsByTagName("model").item(0);
MRFBuilder builder = MRFBuilder.get(mEnv, modelNode.cloneNode(true));
// Set the default number of hits to 2000 because that's what we had in our
// official TREC 2009 web track runs; otherwise, IF merging approach
// will give slightly different results, so we won't be able to
// replicate results...
//mQueryRunner = new ThreadedQueryRunner(builder, null, 1, 2000);
int hits = Integer.parseInt(XMLTools.getAttributeValue(modelNode, "hits", 2000+""));
mQueryRunner = new ThreadedQueryRunner(builder, null, 1, hits);
// load docno/docid mapping
try {
mDocnoMapping = (DocnoMapping) Class.forName(
mEnv.readDocnoMappingClass()).newInstance();
mDocnoMapping.loadMapping(mEnv.getDocnoMappingData(), fs);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Error initializing DocnoMapping!");
}
// load document forward index
if (findexPath != null) {
FSDataInputStream in = fs.open(new Path(findexPath));
String indexClass = in.readUTF();
in.close();
try {
mForwardIndex = (DocumentForwardIndex<Indexable>) Class.forName(indexClass)
.newInstance();
mForwardIndex.loadIndex(new Path(findexPath), new Path(mEnv.getDocnoMappingData().toString()), fs);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Error initializing forward index!");
}
}
sLogger.info("RetrievalServer successfully initialized.");
} catch (Exception e) {
e.printStackTrace();
}
}
public void start(int port) {
sLogger.info("Staring server...");
Server server = new Server(port);
Context root = new Context(server, "/", Context.SESSIONS);
root.addServlet(
new ServletHolder(new QueryBrokerServlet(mQueryRunner, mEnv, mDocnoMapping)),
QueryBrokerServlet.ACTION);
root.addServlet(
new ServletHolder(new QueryDirectServlet(mQueryRunner, mEnv, mDocnoMapping)),
QueryDirectServlet.ACTION);
root.addServlet(new ServletHolder(new FetchDocnoServlet(mForwardIndex)),
FetchDocnoServlet.ACTION);
root.addServlet(new ServletHolder(new HomeServlet(mSid)), "/");
try {
server.start();
sLogger.info("Server successfully started!");
} catch (Exception e) {
sLogger.info("Server fails to start!");
e.printStackTrace();
}
}
public RetrievalServer() {
}
private static String join(String[] terms, String sep) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < terms.length; i++) {
sb.append(terms[i]);
if (i < terms.length - 1)
sb.append(sep);
}
return sb.toString();
}
public static class HomeServlet extends HttpServlet {
private static final long serialVersionUID = 7368950575963429946L;
private String mSid;
public HomeServlet(String sid) {
mSid = sid;
}
protected void doGet(HttpServletRequest httpServletRequest,
HttpServletResponse httpServletResponse) throws ServletException, IOException {
httpServletResponse.setContentType("text/html");
PrintWriter out = httpServletResponse.getWriter();
out.println("<html><head><title>Ivory Search Interface: " + mSid + "</title><head>");
out.println("<body>");
out.println("<h3>Run a query on " + mSid + ":</h3>");
out.println("<form method=\"post\" action=\"" + QueryDirectServlet.ACTION + "\">");
out.println("<input type=\"text\" name=\"" + QueryDirectServlet.QUERY_FIELD
+ "\" size=\"60\" />");
out.println("<input type=\"submit\" value=\"Run query!\" />");
out.println("</form>");
out.println("</p>");
out.print("</body></html>\n");
out.close();
}
}
public static class QueryDirectServlet extends HttpServlet {
public static final String ACTION = "/DirectQuery";
public static final String QUERY_FIELD = "query";
private static final long serialVersionUID = -5998786589277554550L;
private QueryRunner mQueryRunner = null;
private RetrievalEnvironment mEnv = null;
private DocnoMapping mDocnoMapping = null;
public QueryDirectServlet(QueryRunner queryRunner, RetrievalEnvironment env,
DocnoMapping mapping) {
mQueryRunner = queryRunner;
mEnv = env;
mDocnoMapping = mapping;
}
public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
IOException {
doPost(req, res);
}
public void doPost(HttpServletRequest req, HttpServletResponse res)
throws ServletException, IOException {
sLogger.info("Triggered servlet for direct querying");
res.setContentType("text/html");
PrintWriter out = res.getWriter();
String query = null;
if (req.getParameterValues(QUERY_FIELD) != null)
query = req.getParameterValues(QUERY_FIELD)[0];
sLogger.info("Raw query: " + query);
long startTime = System.currentTimeMillis();
String[] queryTokens = mEnv.tokenize(query);
sLogger.info("Tokenized query: " + join(queryTokens, " "));
// run the query
Accumulator[] results = mQueryRunner.runQuery(queryTokens);
long endTime = System.currentTimeMillis();
sLogger.info("query execution time (ms): " + (endTime - startTime));
StringBuffer sb = new StringBuffer();
sb.append("<html><head><title>Server Results</title></head>\n<body>");
sb.append("<ol>");
for (Accumulator a : results) {
sb.append("<li>docno " + a.docno + ", docid <a href="
+ FetchDocnoServlet.formatRequestURL(a.docno) + ">"
+ mDocnoMapping.getDocid(a.docno) + "</a> (" + a.score + ")</li>\n");
}
sb.append("</ol>");
sb.append("</body></html>\n");
out.print(sb.toString());
out.close();
}
}
public static class QueryBrokerServlet extends HttpServlet {
private static final long serialVersionUID = -5998786589277554550L;
public static final String ACTION = "/BrokerQuery";
public static final String QUERY_FIELD = "query";
private QueryRunner mQueryRunner = null;
private RetrievalEnvironment mEnv = null;
private DocnoMapping mDocnoMapping = null;
public QueryBrokerServlet(QueryRunner queryRunner, RetrievalEnvironment env,
DocnoMapping docnoMapping) {
mQueryRunner = queryRunner;
mEnv = env;
mDocnoMapping = docnoMapping;
}
public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
IOException {
doPost(req, res);
}
public void doPost(HttpServletRequest req, HttpServletResponse res)
throws ServletException, IOException {
sLogger.info("Broker triggered servlet for running queries");
res.setContentType("text/html");
String query = null;
if (req.getParameterValues(QUERY_FIELD) != null)
query = req.getParameterValues(QUERY_FIELD)[0];
long startTime = System.currentTimeMillis();
String[] queryTokens = mEnv.tokenize(query);
sLogger.info("Tokenized query: " + join(queryTokens, " "));
// run the query
Accumulator[] results = mQueryRunner.runQuery(queryTokens);
long endTime = System.currentTimeMillis();
sLogger.info("query execution time (ms): " + (endTime - startTime));
StringBuffer sb = new StringBuffer();
for (Accumulator a : results) {
sb.append(a.docno + "\t" + a.score + "\t" + mDocnoMapping.getDocid(a.docno) + "\t");
}
PrintWriter out = res.getWriter();
out.print(sb.toString().trim());
out.close();
}
}
public static class FetchDocnoServlet extends HttpServlet {
static final long serialVersionUID = 3986721097L;
public static final String ACTION = "/fetch_docno";
public static final String DOCNO = "docno";
private DocumentForwardIndex<Indexable> mForwardIndex;
public FetchDocnoServlet(DocumentForwardIndex<Indexable> findex) {
mForwardIndex = findex;
}
public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
IOException {
doPost(req, res);
}
public void doPost(HttpServletRequest req, HttpServletResponse res)
throws ServletException, IOException {
sLogger.info("triggered servlet for fetching document by docno");
if (mForwardIndex == null) {
res.setContentType("text/html");
PrintWriter out = res.getWriter();
out.print("<html><head><title>Service Unavailable</title><head>\n");
out.print("<body>\n");
out.print("<h3>No document access is available!</h3>\n");
out.print("</body></html>\n");
out.close();
}
int docno = 0;
try {
if (req.getParameterValues(DOCNO) != null)
docno = Integer.parseInt(req.getParameterValues(DOCNO)[0]);
Indexable doc = mForwardIndex.getDocument(docno);
if (doc != null) {
sLogger.info("fetched: " + doc.getDocid() + " = docno " + docno);
res.setContentType(doc.getDisplayContentType());
PrintWriter out = res.getWriter();
out.print(doc.getDisplayContent());
out.close();
} else {
throw new Exception();
}
} catch (Exception e) {
sLogger.info("trapped error fetching " + docno);
res.setContentType("text/html");
PrintWriter out = res.getWriter();
out.print("<html><head><title>Invalid docno!</title><head>\n");
out.print("<body>\n");
out.print("<h1>Error!</h1>\n");
out.print("<h3>Invalid doc: " + docno + "</h3>\n");
out.print("</body></html>\n");
out.close();
}
}
public static String formatRequestURL(int docno) {
return ACTION + "?" + DOCNO + "=" + new Integer(docno).toString();
}
}
}