Package ivory.server

Source Code of ivory.server.RetrievalServer

/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package ivory.server;

import ivory.core.RetrievalEnvironment;
import ivory.core.util.XMLTools;
import ivory.smrf.model.builder.MRFBuilder;
import ivory.smrf.model.importance.ConceptImportanceModel;
import ivory.smrf.retrieval.Accumulator;
import ivory.smrf.retrieval.QueryRunner;
import ivory.smrf.retrieval.ThreadedQueryRunner;

import java.io.IOException;
import java.io.PrintWriter;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.servlet.Context;
import org.mortbay.jetty.servlet.ServletHolder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.collection.DocumentForwardIndex;
import edu.umd.cloud9.collection.Indexable;

/**
* @author Tamer Elsayed
* @author Jimmy Lin
*/
public class RetrievalServer {
  private static final Logger sLogger = Logger.getLogger(RetrievalServer.class);
  /*{
    sLogger.setLevel(Level.INFO);
  }*/

  private QueryRunner mQueryRunner;
  private RetrievalEnvironment mEnv=null;
  private DocnoMapping mDocnoMapping;
  private DocumentForwardIndex<Indexable> mForwardIndex;
  private String mSid;

  public void initialize(String sid, String config, FileSystem fs) {
    System.out.println("$$ Initializing RetrievalServer for \"" + sid + "\"...");
    sLogger.info("Initializing RetrievalServer for \"" + sid + "\"...");

    mSid = sid;

    Document d = null;
    try {
      d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(
          fs.open(new Path(config)));
    } catch (Exception e) {
      e.printStackTrace();
    }

    String indexPath = null;
    String findexPath = null;

    NodeList servers = d.getElementsByTagName("server");
    for (int i = 0; i < servers.getLength(); i++) {
      Node node = servers.item(i);

      // pick out the correct server id
      String id = XMLTools.getAttributeValue(node, "id", null);
      if (!id.equals(sid))
        continue;

      sLogger.info(" - sid: " + sid);

      NodeList children = node.getChildNodes();
      for (int j = 0; j < children.getLength(); j++) {
        Node child = children.item(j);
        if ("index".equals(child.getNodeName())) {
          sLogger.info(" - index: " + child.getTextContent().trim());

          indexPath = child.getTextContent().trim();
          if(mEnv == null){
            try {
              mEnv = new RetrievalEnvironment(indexPath, fs);
              mEnv.initialize(true);
            } catch (Exception e) {
              e.printStackTrace();
              throw new RuntimeException();
            }
          }
        }

        if ("findex".equals(child.getNodeName())) {
          sLogger.info(" - findex: " + child.getTextContent().trim());

          // initialize forward index
          findexPath = child.getTextContent().trim();
        }

        if ("docscore".equals(child.getNodeName())) {
          sLogger.info(" - docscore: " + child.getTextContent().trim());

          String type = XMLTools.getAttributeValue(child, "type", "");
          String provider = XMLTools.getAttributeValue(child, "provider", "");
          String path = child.getTextContent();

          if (type.equals("") || provider.equals("") || path.equals("")) {
            throw new RuntimeException("Invalid docscore!");
          }
          System.out.println("$$ Loading docscore: type=" + type + ", provider=" +
              provider + ", path="
              + path);
          sLogger.info("Loading docscore: type=" + type + ", provider=" +
              provider + ", path="
              + path);

          if(mEnv == null){
            try {
              mEnv = new RetrievalEnvironment(indexPath, fs);
              mEnv.initialize(true);
            } catch (Exception e) {
              e.printStackTrace();
              throw new RuntimeException();
            }
          }
          mEnv.loadDocScore(type, provider, path);
        }
       
        if("importancemodel".equals(child.getNodeName())) {
          sLogger.info(" - importancemodel: " + child.getTextContent().trim());
         
          String importanceModelId = XMLTools.getAttributeValue(child, "id", null);
          if(importanceModelId == null) {
            throw new RuntimeException("Invalid importance model!");
          }
         
          ConceptImportanceModel importanceModel = null;
          try {
            importanceModel = ConceptImportanceModel.get(child);
          }
          catch(Exception e) {
            throw new RuntimeException(e);
          }
         
          if(mEnv == null){
            try {
              mEnv = new RetrievalEnvironment(indexPath, fs);
              mEnv.initialize(true);
            } catch (Exception e) {
              e.printStackTrace();
              throw new RuntimeException();
            }
          }
          mEnv.addImportanceModel(importanceModelId, importanceModel);
        }

      }

      if (indexPath == null) {
        throw new RuntimeException("Error: must specify an index location!");
      }

      if (findexPath == null)
        sLogger.warn("forward index not specified: will not be able to access documents.");
    }
    if(mEnv == null){
      try {
        mEnv = new RetrievalEnvironment(indexPath, fs);
        mEnv.initialize(true);
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException();
      }
    }

    try {
      Node modelNode = d.getElementsByTagName("model").item(0);

      MRFBuilder builder = MRFBuilder.get(mEnv, modelNode.cloneNode(true));

     
      // Set the default number of hits to 2000 because that's what we had in our
      // official TREC 2009 web track runs; otherwise, IF merging approach
      // will give slightly different results, so we won't be able to
      // replicate results...
      //mQueryRunner = new ThreadedQueryRunner(builder, null, 1, 2000);
      int hits = Integer.parseInt(XMLTools.getAttributeValue(modelNode, "hits", 2000+""));
      mQueryRunner = new ThreadedQueryRunner(builder, null, 1, hits);

      // load docno/docid mapping
      try {
        mDocnoMapping = (DocnoMapping) Class.forName(
            mEnv.readDocnoMappingClass()).newInstance();

        mDocnoMapping.loadMapping(mEnv.getDocnoMappingData(), fs);
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error initializing DocnoMapping!");
      }

      // load document forward index
      if (findexPath != null) {
        FSDataInputStream in = fs.open(new Path(findexPath));
        String indexClass = in.readUTF();
        in.close();

        try {
          mForwardIndex = (DocumentForwardIndex<Indexable>) Class.forName(indexClass)
          .newInstance();
          mForwardIndex.loadIndex(new Path(findexPath), new Path(mEnv.getDocnoMappingData().toString()), fs);
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error initializing forward index!");
        }

      }

      sLogger.info("RetrievalServer successfully initialized.");
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  public void start(int port) {
    sLogger.info("Staring server...");

    Server server = new Server(port);
    Context root = new Context(server, "/", Context.SESSIONS);
    root.addServlet(
        new ServletHolder(new QueryBrokerServlet(mQueryRunner, mEnv, mDocnoMapping)),
        QueryBrokerServlet.ACTION);
    root.addServlet(
        new ServletHolder(new QueryDirectServlet(mQueryRunner, mEnv, mDocnoMapping)),
        QueryDirectServlet.ACTION);
    root.addServlet(new ServletHolder(new FetchDocnoServlet(mForwardIndex)),
        FetchDocnoServlet.ACTION);
    root.addServlet(new ServletHolder(new HomeServlet(mSid)), "/");

    try {
      server.start();
      sLogger.info("Server successfully started!");
    } catch (Exception e) {
      sLogger.info("Server fails to start!");
      e.printStackTrace();
    }
  }

  public RetrievalServer() {
  }

  private static String join(String[] terms, String sep) {
    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < terms.length; i++) {
      sb.append(terms[i]);
      if (i < terms.length - 1)
        sb.append(sep);
    }

    return sb.toString();
  }

  public static class HomeServlet extends HttpServlet {
    private static final long serialVersionUID = 7368950575963429946L;

    private String mSid;

    public HomeServlet(String sid) {
      mSid = sid;
    }

    protected void doGet(HttpServletRequest httpServletRequest,
        HttpServletResponse httpServletResponse) throws ServletException, IOException {
      httpServletResponse.setContentType("text/html");
      PrintWriter out = httpServletResponse.getWriter();

      out.println("<html><head><title>Ivory Search Interface: " + mSid + "</title><head>");
      out.println("<body>");
      out.println("<h3>Run a query on " + mSid + ":</h3>");
      out.println("<form method=\"post\" action=\"" + QueryDirectServlet.ACTION + "\">");
      out.println("<input type=\"text\" name=\"" + QueryDirectServlet.QUERY_FIELD
          + "\" size=\"60\" />");
      out.println("<input type=\"submit\" value=\"Run query!\" />");
      out.println("</form>");
      out.println("</p>");

      out.print("</body></html>\n");

      out.close();
    }
  }

  public static class QueryDirectServlet extends HttpServlet {
    public static final String ACTION = "/DirectQuery";
    public static final String QUERY_FIELD = "query";

    private static final long serialVersionUID = -5998786589277554550L;

    private QueryRunner mQueryRunner = null;
    private RetrievalEnvironment mEnv = null;
    private DocnoMapping mDocnoMapping = null;

    public QueryDirectServlet(QueryRunner queryRunner, RetrievalEnvironment env,
        DocnoMapping mapping) {
      mQueryRunner = queryRunner;
      mEnv = env;
      mDocnoMapping = mapping;
    }

    public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
    IOException {
      doPost(req, res);
    }

    public void doPost(HttpServletRequest req, HttpServletResponse res)
    throws ServletException, IOException {
      sLogger.info("Triggered servlet for direct querying");
      res.setContentType("text/html");
      PrintWriter out = res.getWriter();

      String query = null;
      if (req.getParameterValues(QUERY_FIELD) != null)
        query = req.getParameterValues(QUERY_FIELD)[0];

      sLogger.info("Raw query: " + query);

      long startTime = System.currentTimeMillis();

      String[] queryTokens = mEnv.tokenize(query);
      sLogger.info("Tokenized query: " + join(queryTokens, " "));

      // run the query
      Accumulator[] results = mQueryRunner.runQuery(queryTokens);
      long endTime = System.currentTimeMillis();

      sLogger.info("query execution time (ms): " + (endTime - startTime));

      StringBuffer sb = new StringBuffer();
      sb.append("<html><head><title>Server Results</title></head>\n<body>");

      sb.append("<ol>");
      for (Accumulator a : results) {
        sb.append("<li>docno " + a.docno + ", docid <a href="
            + FetchDocnoServlet.formatRequestURL(a.docno) + ">"
            + mDocnoMapping.getDocid(a.docno) + "</a> (" + a.score + ")</li>\n");
      }
      sb.append("</ol>");
      sb.append("</body></html>\n");

      out.print(sb.toString());

      out.close();
    }

  }

  public static class QueryBrokerServlet extends HttpServlet {
    private static final long serialVersionUID = -5998786589277554550L;

    public static final String ACTION = "/BrokerQuery";
    public static final String QUERY_FIELD = "query";

    private QueryRunner mQueryRunner = null;
    private RetrievalEnvironment mEnv = null;
    private DocnoMapping mDocnoMapping = null;

    public QueryBrokerServlet(QueryRunner queryRunner, RetrievalEnvironment env,
        DocnoMapping docnoMapping) {
      mQueryRunner = queryRunner;
      mEnv = env;
      mDocnoMapping = docnoMapping;
    }

    public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
    IOException {
      doPost(req, res);
    }

    public void doPost(HttpServletRequest req, HttpServletResponse res)
    throws ServletException, IOException {
      sLogger.info("Broker triggered servlet for running queries");
      res.setContentType("text/html");

      String query = null;
      if (req.getParameterValues(QUERY_FIELD) != null)
        query = req.getParameterValues(QUERY_FIELD)[0];

      long startTime = System.currentTimeMillis();

      String[] queryTokens = mEnv.tokenize(query);
      sLogger.info("Tokenized query: " + join(queryTokens, " "));

      // run the query
      Accumulator[] results = mQueryRunner.runQuery(queryTokens);
      long endTime = System.currentTimeMillis();

      sLogger.info("query execution time (ms): " + (endTime - startTime));

      StringBuffer sb = new StringBuffer();
      for (Accumulator a : results) {
        sb.append(a.docno + "\t" + a.score + "\t" + mDocnoMapping.getDocid(a.docno) + "\t");
      }
      PrintWriter out = res.getWriter();
      out.print(sb.toString().trim());
      out.close();
    }
  }

  public static class FetchDocnoServlet extends HttpServlet {
    static final long serialVersionUID = 3986721097L;

    public static final String ACTION = "/fetch_docno";
    public static final String DOCNO = "docno";

    private DocumentForwardIndex<Indexable> mForwardIndex;

    public FetchDocnoServlet(DocumentForwardIndex<Indexable> findex) {
      mForwardIndex = findex;
    }

    public void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException,
    IOException {
      doPost(req, res);
    }

    public void doPost(HttpServletRequest req, HttpServletResponse res)
    throws ServletException, IOException {
      sLogger.info("triggered servlet for fetching document by docno");

      if (mForwardIndex == null) {
        res.setContentType("text/html");

        PrintWriter out = res.getWriter();
        out.print("<html><head><title>Service Unavailable</title><head>\n");
        out.print("<body>\n");
        out.print("<h3>No document access is available!</h3>\n");
        out.print("</body></html>\n");
        out.close();
      }

      int docno = 0;
      try {
        if (req.getParameterValues(DOCNO) != null)
          docno = Integer.parseInt(req.getParameterValues(DOCNO)[0]);

        Indexable doc = mForwardIndex.getDocument(docno);

        if (doc != null) {
          sLogger.info("fetched: " + doc.getDocid() + " = docno " + docno);
          res.setContentType(doc.getDisplayContentType());

          PrintWriter out = res.getWriter();
          out.print(doc.getDisplayContent());
          out.close();
        } else {
          throw new Exception();
        }
      } catch (Exception e) {
        sLogger.info("trapped error fetching " + docno);
        res.setContentType("text/html");

        PrintWriter out = res.getWriter();
        out.print("<html><head><title>Invalid docno!</title><head>\n");
        out.print("<body>\n");
        out.print("<h1>Error!</h1>\n");
        out.print("<h3>Invalid doc: " + docno + "</h3>\n");
        out.print("</body></html>\n");
        out.close();
      }
    }

    public static String formatRequestURL(int docno) {
      return ACTION + "?" + DOCNO + "=" + new Integer(docno).toString();
    }
  }

}
TOP

Related Classes of ivory.server.RetrievalServer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.