Source Code of org.apache.nutch.searcher.LuceneQueryOptimizer$TimeExceeded

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.nutch.searcher;


import org.apache.lucene.search.*;
import org.apache.lucene.search.queries.PwaSortQuery;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.index.Term;
import org.apache.lucene.misc.ChainedFilter;
import org.apache.nutch.searcher.DistributedSearch;
import org.apache.nutch.global.Global;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;


import java.util.LinkedHashMap;
import java.util.Map;
import java.util.ArrayList;


import java.io.IOException;




/** Utility which converts certain query clauses into {@link QueryFilter}s and
 * caches these.  Only required clauses whose boost is zero are converted to
 * cached filters.  Range queries are converted to range filters.  This
 * accellerates query constraints like date, language, document format, etc.,
 * which do not affect ranking but might otherwise slow search considerably. */
class LuceneQueryOptimizer {


  public static final Log LOG = LogFactory.getLog(LuceneQueryOptimizer.class);
  
  // This thread provides a pseudo-clock service to all searching
  // threads, so that they can count elapsed time with less overhead than
  // repeatedly calling System.currentTimeMillis.
  private TimerThread timerThread = null;


  private static class TimerThread extends Thread {
    private int tick;
    // NOTE: we can avoid explicit synchronization here for several reasons:
    // * updates to 32-bit-sized variables are atomic
    // * only single thread modifies this value
    // * use of volatile keyword ensures that it does not reside in
    //   a register, but in main memory (so that changes are visible to
    //   other threads).
    // * visibility of changes does not need to be instantanous, we can
    //   afford losing a tick or two.
    //
    // See section 17 of the Java Language Specification for details.
    public volatile int timeCounter = 0;


    boolean running = true;


    public TimerThread(int tick) {
      super("LQO timer thread");
      this.tick = tick;
      this.setDaemon(true);
    }


    public void run() {
      while(running) {
        timeCounter++;
        try {
          Thread.sleep(tick);
        } 
        catch (InterruptedException ie) 
        {
          // ignore
        };
      }
    }
  }


  private void initTimerThread(int p) {
    if (timerThread == null || !timerThread.isAlive()) {
      timerThread = new TimerThread(p);
      timerThread.start();
    }
  }
  
  private static class TimeExceeded extends RuntimeException {
    public long maxTime;
    private int maxDoc;
    
    public TimeExceeded(long maxTime, int maxDoc) {
      super("Exceeded search time: " + maxTime + " ms.");
      this.maxTime = maxTime;
      this.maxDoc = maxDoc;
    }
  }


  
  private static class LimitedCollector extends TopDocCollector {
    private int maxHits;
    private int maxTicks;
    private int startTicks;
    private TimerThread timer;
    private int curTicks;


    public LimitedCollector(int numHits, int maxHits, int maxTicks, TimerThread timer, boolean reverse) {
      super(numHits, reverse);
      this.maxHits = maxHits;
      this.maxTicks = maxTicks;
      if (timer != null) {
      this.timer = timer;
        this.startTicks = timer.timeCounter;
      }
    }


    public void collect(int doc, float score) {
      if (maxHits > 0 && getTotalHits() >= maxHits) {
        throw new LimitExceeded(doc);
      }
      if (timer != null) {
        curTicks = timer.timeCounter;
        // overflow check
        if (curTicks < startTicks) curTicks += Integer.MAX_VALUE;
        if (curTicks - startTicks > maxTicks) {
          throw new TimeExceeded(timer.tick * (curTicks - startTicks), doc);
        }
      }
      super.collect(doc, score);
    }
  }  
  
  private static class LimitExceeded extends RuntimeException {
    private int maxDoc;
    public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; }    
  }
  
  
  private float threshold;
  private int maxFulltextMatchesRanked;
  private int tickLength;
  private int maxTickCount;  
  private int timeoutResponse;
  private String cacheType;
  
  
  /**
   * Construct an optimizer that caches and uses filters for required clauses
   * whose boost is zero.
   * 
   * @param cacheSize
   *          the number of QueryFilters to cache
   * @param threshold
   *          the fraction of documents which must contain a term
   */
  public LuceneQueryOptimizer(Configuration conf) {
    final int cacheSize = conf.getInt("searcher.filter.cache.size", 16);
    this.threshold = conf.getFloat("searcher.filter.cache.threshold", 0.05f);       
    this.tickLength = conf.getInt("searcher.max.time.tick_length", 200);
    this.maxTickCount = conf.getInt("searcher.max.time.tick_count", -1);
    this.maxFulltextMatchesRanked = conf.getInt(Global.MAX_FULLTEXT_MATCHES_RANKED, -1);
    this.timeoutResponse = conf.getInt(Global.TIMEOUT_INDEX_SERVERS_RESPONSE, -1);
    if (timeoutResponse>0) { 
      this.maxTickCount=timeoutResponse;
      this.tickLength=1000;
    }       
    if (this.maxTickCount > 0) {
      initTimerThread(this.tickLength);
    }       
  }


  public TopDocs optimize(BooleanQuery original, Searcher searcher, int numHits, String sortField, boolean reverse) throws IOException {
    BooleanQuery query = new BooleanQuery(); 
    Filter filter = null;


    BooleanClause[] clauses = original.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      BooleanClause c = clauses[i];
      if (c.isRequired() && c.getQuery().getBoost() == 0.0f) {   // boost is zero


          if (c.getQuery() instanceof TermQuery     // TermQuery
              && (searcher.docFreq(((TermQuery)c.getQuery()).getTerm()) / (float)searcher.maxDoc()) < threshold) { // beneath threshold
            query.add(c);                          
          }          
          else if (c.getQuery() instanceof RangeQuery) { // RangeQuery        
            query.add(c);             
          }       
      }
      else {
        query.add(c);                               // query it
      }
    }
    
    query.setFunctions(original.getFunctions());  
    if (sortField!=null) { // to sort result by sortField
      query.add(new PwaSortQuery(sortField,reverse), BooleanClause.Occur.MUST); 
    }
    
    // print query
    LOG.info("Query:"+query.toString());   
    
    // no hit limit
    if (this.maxFulltextMatchesRanked <= 0 && timerThread == null)  {
      return searcher.search(query, filter, numHits);
    }


    // hits limited in time or in count -- use a LimitedCollector
    LimitedCollector collector = new LimitedCollector(numHits, maxFulltextMatchesRanked, maxTickCount, timerThread, (sortField!=null) ? !reverse : reverse);
    LimitExceeded exceeded = null;
    TimeExceeded timeExceeded = null;
    try {
      searcher.search(query, filter, collector);
    } 
    catch (LimitExceeded le) {
      exceeded = le;
    }
    catch (TimeExceeded te) {
      timeExceeded = te;
    }
    TopDocs results = collector.topDocs();
    if (exceeded != null) {                     // limit was exceeded
      results.totalHits = (int)(results.totalHits*(searcher.maxDoc()/(float)exceeded.maxDoc)); // estimate totalHits
    } 
    else if (timeExceeded != null) {
      results.totalHits = (int)(results.totalHits * (searcher.maxDoc()/(float)timeExceeded.maxDoc));
    }
    return results;              
  }
  


  /** 
   * @param numHits number of top results
   * @param maxFulltextMatchesRanked number of matched documents for ranking
   */
  public TopDocs optimize(BooleanQuery original, Searcher searcher, int numHits, int maxFulltextMatchesRanked, String sortField, boolean reverse) throws IOException {
    if (maxFulltextMatchesRanked!=NutchBean.MATCHED_DOCS_CONST_IGNORE) {
      this.maxFulltextMatchesRanked=maxFulltextMatchesRanked;
    }
    return optimize(original, searcher, numHits, sortField, reverse);
  }


}
Source Code of org.apache.nutch.searcher.LuceneQueryOptimizer$TimeExceeded

Related Classes of org.apache.nutch.searcher.LuceneQueryOptimizer$TimeExceeded