Package org.apache.nutch.webapp.controller

Source Code of org.apache.nutch.webapp.controller.ClusteringController

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.webapp.controller;

import java.io.IOException;

import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.nutch.clustering.ClusteringPresearchExtension;
import org.apache.nutch.clustering.Clusters;
import org.apache.nutch.clustering.HitsCluster;
import org.apache.nutch.clustering.OnlineClusterer;
import org.apache.nutch.clustering.OnlineClustererFactory;
import org.apache.nutch.plugin.PluginRuntimeException;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.Summary;
import org.apache.nutch.webapp.common.ServiceLocator;
import org.apache.nutch.webapp.common.Startable;
import org.apache.struts.tiles.ComponentContext;

public class ClusteringController extends NutchController implements Startable {

  public static final String REQ_ATTR_CLUSTERS = "clusters";

  static OnlineClusterer clusterer = null;

  public void nutchPerform(ComponentContext tileContext,
      HttpServletRequest request, HttpServletResponse response,
      ServletContext servletContext) throws ServletException, IOException {

    ServiceLocator locator = getServiceLocator(request);

    if (ClusteringPresearchExtension.isClusteringActive(locator)) {

      // display top N clusters and top Q documents inside them.
      int N = locator.getConfiguration().getInt(
          "extension.clustering.cluster-count", 10);
      int Q = locator.getConfiguration().getInt(
          "extension.clustering.cluster-top-documents-count", 3);
      int maxLabels = 2;

      HitDetails[] details = locator.getSearch().getDetails();
      Summary[] summaries = locator.getSearch().getSummaries();

      HitsCluster[] clusters = null;
      if (clusterer != null) {
        final long clusteringStart = System.currentTimeMillis();
        try {
          clusters = clusterer.clusterHits(details, Summary
              .toStrings(summaries));
          final long clusteringDuration = System.currentTimeMillis()
              - clusteringStart;
          LOG.info("Clustering took: " + clusteringDuration + " milliseconds.");

        } catch (Exception e) {
          LOG.info("Could not do clustering???" + e);
          return;
        }
      }

      // set new limit if fever than N results
      N = Math.min(N, clusters.length);

      // set to request
      Clusters clusterResult = new Clusters(clusters, N, Q, maxLabels);
      request.setAttribute(REQ_ATTR_CLUSTERS, clusterResult);
    }
  }

  public void start(ServletContext servletContext) {
    ServiceLocator locator = getServiceLocator(servletContext);
    try {
      clusterer = new OnlineClustererFactory(locator.getConfiguration())
          .getOnlineClusterer();
    } catch (PluginRuntimeException e) {
      LOG.info("Could not initialize Clusterer, is the plugin enabled?");
      return;
    }
  }
}
TOP

Related Classes of org.apache.nutch.webapp.controller.ClusteringController

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.