Package org.apache.nutch.webapp.servlet

Source Code of org.apache.nutch.webapp.servlet.OpenSearchServlet

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.nutch.webapp.servlet;

import java.io.IOException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.Hits;
import org.apache.nutch.searcher.NutchBean;
import org.apache.nutch.searcher.Query;
import org.apache.nutch.searcher.Summary;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

/**
* Present search results using A9's OpenSearch extensions to RSS, plus a few
* Nutch-specific extensions.
*/
public class OpenSearchServlet extends NutchHttpServlet {
  private static final int DEFAULT_NUMBER_OF_HITS = 10;

  private static final long serialVersionUID = 1L;

  private static final Map NS_MAP = new HashMap();

  static {
    NS_MAP.put("opensearch", "http://a9.com/-/spec/opensearchrss/1.0/");
    NS_MAP.put("nutch", "http://www.nutch.org/opensearchrss/1.0/");
  }

  private static final Set SKIP_DETAILS = new HashSet();
  static {
    SKIP_DETAILS.add("url"); // redundant with RSS link
    SKIP_DETAILS.add("title"); // redundant with RSS title
  }

  public void init(ServletConfig conf) throws ServletException {
    super.init(conf);
  }

  public void doGet(HttpServletRequest request, HttpServletResponse response)
      throws ServletException, IOException {

    NutchBean.LOG.info("query request from " + request.getRemoteAddr());

    // get parameters from request
    request.setCharacterEncoding("UTF-8");
    String queryString = request.getParameter("query");
    if (queryString == null) {
      queryString = "";
    }
    String urlQuery = URLEncoder.encode(queryString, "UTF-8");

    int start = 0; // first hit to display
    String startString = request.getParameter("start");
    if (startString != null) {
      start = Integer.parseInt(startString);
    }

    int hitsPerPage = DEFAULT_NUMBER_OF_HITS; // number of hits to display
    String hitsString = request.getParameter("hitsPerPage");
    if (hitsString != null) {
      hitsPerPage = Integer.parseInt(hitsString);
    }

    String sort = request.getParameter("sort");
    boolean reverse = sort != null
        && "true".equals(request.getParameter("reverse"));

    // De-Duplicate handling. Look for duplicates field and for how many
    // duplicates per results to return. Default duplicates field is 'site'
    // and duplicates per results default is '2'.
    String dedupField = request.getParameter("dedupField");
    if (dedupField == null || dedupField.length() == 0) {
      dedupField = "site";
    }
    int hitsPerDup = 2;
    String hitsPerDupString = request.getParameter("hitsPerDup");
    if (hitsPerDupString != null && hitsPerDupString.length() > 0) {
      hitsPerDup = Integer.parseInt(hitsPerDupString);
    } else {
      // If 'hitsPerSite' present, use that value.
      String hitsPerSiteString = request.getParameter("hitsPerSite");
      if (hitsPerSiteString != null && hitsPerSiteString.length() > 0) {
        hitsPerDup = Integer.parseInt(hitsPerSiteString);
      }
    }

    // Make up query string for use later drawing the 'rss' logo.
    String params = "&hitsPerPage="
        + hitsPerPage
        + (sort == null ? "" : "&sort=" + sort
            + (reverse ? "&reverse=true" : "")
            + (dedupField == null ? "" : "&dedupField=" + dedupField));

    Query query = Query.parse(queryString, getServiceLocator()
        .getConfiguration());
    NutchBean.LOG.info("query: " + queryString);

    // execute the query
    Hits hits;
    try {
      hits = getServiceLocator().getNutchBean().search(query,
          start + hitsPerPage, hitsPerDup, dedupField, sort, reverse);
    } catch (IOException e) {
      NutchBean.LOG.warn("Search Error", e);
      hits = new Hits(0, new Hit[0]);
    }

    NutchBean.LOG.info("total hits: " + hits.getTotal());

    // generate xml results
    int end = (int) Math.min(hits.getLength(), start + hitsPerPage);
    int length = end - start;

    Hit[] show = hits.getHits(start, end - start);
    HitDetails[] details = getServiceLocator().getNutchBean().getDetails(show);
    Summary[] summaries = getServiceLocator().getNutchBean().getSummary(
        details, query);

    String requestUrl = request.getRequestURL().toString();
    String base = requestUrl.substring(0, requestUrl.lastIndexOf('/'));

    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      factory.setNamespaceAware(true);
      Document doc = factory.newDocumentBuilder().newDocument();

      Element rss = addNode(doc, doc, "rss");
      addAttribute(doc, rss, "version", "2.0");
      addAttribute(doc, rss, "xmlns:opensearch", (String) NS_MAP
          .get("opensearch"));
      addAttribute(doc, rss, "xmlns:nutch", (String) NS_MAP.get("nutch"));

      Element channel = addNode(doc, rss, "channel");

      addNode(doc, channel, "title", "Nutch: " + queryString);
      addNode(doc, channel, "description", "Nutch search results for query: "
          + queryString);
      addNode(doc, channel, "link", base + "/search.jsp" + "?query=" + urlQuery
          + "&start=" + start + "&hitsPerDup=" + hitsPerDup + params);

      addNode(doc, channel, "opensearch", "totalResults", "" + hits.getTotal());
      addNode(doc, channel, "opensearch", "startIndex", "" + start);
      addNode(doc, channel, "opensearch", "itemsPerPage", "" + hitsPerPage);

      addNode(doc, channel, "nutch", "query", queryString);

      if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show
          || (!hits.totalIsExact() && (hits.getLength() > start + hitsPerPage))) {
        addNode(doc, channel, "nutch", "nextPage", requestUrl + "?query="
            + urlQuery + "&start=" + end + "&hitsPerDup=" + hitsPerDup + params);
      }

      if ((!hits.totalIsExact() && (hits.getLength() <= start + hitsPerPage))) {
        addNode(doc, channel, "nutch", "showAllHits", requestUrl + "?query="
            + urlQuery + "&hitsPerDup=" + 0 + params);
      }

      for (int i = 0; i < length; i++) {
        Hit hit = show[i];
        HitDetails detail = details[i];
        String title = detail.getValue("title");
        String url = detail.getValue("url");
        String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();

        if (title == null || title.equals("")) {
          // use url for docs w/o title
          title = url;
        }

        Element item = addNode(doc, channel, "item");

        addNode(doc, item, "title", title);
        addNode(doc, item, "description", summaries[i].toString());
        addNode(doc, item, "link", url);

        addNode(doc, item, "nutch", "site", hit.getDedupValue());

        addNode(doc, item, "nutch", "cache", base + "/cached.do?" + id);
        addNode(doc, item, "nutch", "explain", base + "/explain.do?" + id
            + "&query=" + urlQuery);

        if (hit.moreFromDupExcluded()) {
          addNode(doc, item, "nutch", "moreFromSite", requestUrl
              + "?query="
              + URLEncoder.encode("site:" + hit.getDedupValue() + " "
                  + queryString, "UTF-8") + "&hitsPerSite=" + 0 + params);
        }

        for (int j = 0; j < detail.getLength(); j++) { // add all from detail
          String field = detail.getField(j);
          if (!SKIP_DETAILS.contains(field)) {
            addNode(doc, item, "nutch", field, detail.getValue(j));
          }
        }
      }

      // dump DOM tree

      DOMSource source = new DOMSource(doc);
      TransformerFactory transFactory = TransformerFactory.newInstance();
      Transformer transformer = transFactory.newTransformer();
      transformer.setOutputProperty("indent", "yes");
      StreamResult result = new StreamResult(response.getOutputStream());
      response.setContentType("text/xml");
      transformer.transform(source, result);

    } catch (javax.xml.parsers.ParserConfigurationException e) {
      throw new ServletException(e);
    } catch (javax.xml.transform.TransformerException e) {
      throw new ServletException(e);
    }

  }

  private static Element addNode(Document doc, Node parent, String name) {
    Element child = doc.createElement(name);
    parent.appendChild(child);
    return child;
  }

  private static void addNode(Document doc, Node parent, String name,
      String text) {
    Element child = doc.createElement(name);
    child.appendChild(doc.createTextNode(text));
    parent.appendChild(child);
  }

  private static void addNode(Document doc, Node parent, String ns,
      String name, String text) {
    Element child = doc.createElementNS((String) NS_MAP.get(ns), ns + ":"
        + name);
    child.appendChild(doc.createTextNode(text));
    parent.appendChild(child);
  }

  private static void addAttribute(Document doc, Element node, String name,
      String value) {
    Attr attribute = doc.createAttribute(name);
    attribute.setValue(value);
    node.getAttributes().setNamedItem(attribute);
  }

}
TOP

Related Classes of org.apache.nutch.webapp.servlet.OpenSearchServlet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.