Package org.apache.solr.handler.component

Source Code of org.apache.solr.handler.component.SpellCheckComponent$SpellCheckerListener

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.handler.component;

import java.io.IOException;
import java.io.StringReader;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.*;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* A SearchComponent implementation which provides support for spell checking
* and suggestions using the Lucene contributed SpellChecker.
*
* <p>
* Refer to http://wiki.apache.org/solr/SpellCheckComponent for more details
* </p>
*
* @since solr 1.3
*/
public class SpellCheckComponent extends SearchComponent implements SolrCoreAware, SpellingParams {
  private static final Logger LOG = LoggerFactory.getLogger(SpellCheckComponent.class);

  public static final boolean DEFAULT_ONLY_MORE_POPULAR = false;

  /**
   * Base name for all spell checker query parameters. This name is also used to
   * register this component with SearchHandler.
   */
  public static final String COMPONENT_NAME = "spellcheck";

  @SuppressWarnings("unchecked")
  protected NamedList initParams;
 

  /**
   * Key is the dictionary, value is the SpellChecker for that dictionary name
   */
  protected Map<String, SolrSpellChecker> spellCheckers = new ConcurrentHashMap<String, SolrSpellChecker>();

  protected QueryConverter queryConverter;

  @Override
  @SuppressWarnings("unchecked")
  public void init(NamedList args) {
    super.init(args);
    this.initParams = args;
  }

  @Override
  @SuppressWarnings("unchecked")
  public void prepare(ResponseBuilder rb) throws IOException {

    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    SolrSpellChecker spellChecker = getSpellChecker(params);
    if (params.getBool(SPELLCHECK_BUILD, false)) {
      spellChecker.build(rb.req.getCore(), rb.req.getSearcher());
      rb.rsp.add("command", "build");
    } else if (params.getBool(SPELLCHECK_RELOAD, false)) {
      spellChecker.reload(rb.req.getCore(), rb.req.getSearcher());
      rb.rsp.add("command", "reload");
    }
  }

  @Override
  @SuppressWarnings("unchecked")
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
      return;
    }
    boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
    String q = params.get(SPELLCHECK_Q);
    SolrSpellChecker spellChecker = getSpellChecker(params);
    Collection<Token> tokens = null;
    if (q != null) {
      //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
      tokens = getTokens(q, spellChecker.getQueryAnalyzer());
    } else {
      q = rb.getQueryString();
      if (q == null) {
        q = params.get(CommonParams.Q);
      }
      tokens = queryConverter.convert(q);
    }
    if (tokens != null && tokens.isEmpty() == false) {
      if (spellChecker != null) {
        int count = params.getInt(SPELLCHECK_COUNT, 1);
        boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR,
            DEFAULT_ONLY_MORE_POPULAR);
        boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS,
            false);
        NamedList response = new SimpleOrderedMap();
        IndexReader reader = rb.req.getSearcher().getReader();
        boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
        float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
        SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
        SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
                accuracy, customParams);                      
        SpellingResult spellingResult = spellChecker.getSuggestions(options);
        if (spellingResult != null) {
          NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate);         
          if (collate) {           
            addCollationsToResponse(params, spellingResult, rb, q, suggestions);
          }
          response.add("suggestions", suggestions);
          rb.rsp.add("spellcheck", response);
        }

      } else {
        throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
            "Specified dictionary does not exist.");
      }
    }
  }
 
  @SuppressWarnings("unchecked")
  protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
      NamedList response) {
    int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
    int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
    int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
    boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
    boolean shard = params.getBool(ShardParams.IS_SHARD, false);

    SpellCheckCollator collator = new SpellCheckCollator();
    List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations);
    //by sorting here we guarantee a non-distributed request returns all
    //results in the same order as a distributed request would,
    //even in cases when the internal rank is the same.
    Collections.sort(collations);
   
    for (SpellCheckCollation collation : collations) {
      if (collationExtendedResults) {
        NamedList extendedResult = new NamedList();
        extendedResult.add("collationQuery", collation.getCollationQuery());
        extendedResult.add("hits", collation.getHits());
        extendedResult.add("misspellingsAndCorrections", collation.getMisspellingsAndCorrections());
        if(maxCollationTries>0 && shard)
        {
          extendedResult.add("collationInternalRank", collation.getInternalRank());
        }
        response.add("collation", extendedResult);
      } else {
        response.add("collation", collation.getCollationQuery());
        if(maxCollationTries>0 && shard)
        {
          response.add("collationInternalRank", collation.getInternalRank());
        }
      }
    }
  }

  /**
   * For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
   * XXXX=YYYY as a param to the custom param list
   * @param params The original SolrParams
   * @return The new Params
   */
  protected SolrParams getCustomParams(String dictionary, SolrParams params, boolean shardRequest) {
    ModifiableSolrParams result = new ModifiableSolrParams();
    Iterator<String> iter = params.getParameterNamesIterator();
    String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
    while (iter.hasNext()){
      String nxt = iter.next();
      if (nxt.startsWith(prefix)){
        result.add(nxt.substring(prefix.length()), params.getParams(nxt));
      }
    }
    if(shardRequest)
    {
      result.add(ShardParams.IS_SHARD, "true");
    }
    return result;
  }


  @Override
  public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
    SolrParams params = rb.req.getParams();
    // Turn on spellcheck only only when retrieving fields
    if (!params.getBool(COMPONENT_NAME, false)) return;
    if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
      // fetch at least 5 suggestions from each shard
      int count = sreq.params.getInt(SPELLCHECK_COUNT, 1);
      if (count < 5count = 5;
      sreq.params.set(SPELLCHECK_COUNT, count);
      sreq.params.set("spellcheck", "true");
    } else  {
      sreq.params.set("spellcheck", "false");
    }
  }

  @Override
  @SuppressWarnings({"unchecked", "deprecation"})
  public void finishStage(ResponseBuilder rb) {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false) || rb.stage != ResponseBuilder.STAGE_GET_FIELDS)
      return;

    boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
    boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
    boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
    int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
    int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);

    String origQuery = params.get(SPELLCHECK_Q);
    if (origQuery == null) {
      origQuery = rb.getQueryString();
      if (origQuery == null) {
        origQuery = params.get(CommonParams.Q);
      }
    }

    int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
    float min = 0.5f;
    StringDistance sd = null;
    int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
    SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
    if (checker instanceof AbstractLuceneSpellChecker) {
      AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
      min = spellChecker.getAccuracy();
      sd = spellChecker.getStringDistance();
    }
    if (sd == null)
      sd = new LevensteinDistance();

    Collection<Token> tokens = null;
    try {
      tokens = getTokens(origQuery, checker.getQueryAnalyzer());
    } catch (IOException e) {
      LOG.error("Could not get tokens (this should never happen)", e);
    }

    // original token -> corresponding Suggestion object (keep track of start,end)
    Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
    // original token string -> summed up frequency
    Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
    // original token string -> # of shards reporting it as misspelled
    Map<String, Integer> origVsShards = new HashMap<String, Integer>();
    // original token string -> set of alternatives
    // must preserve order because collation algorithm can only work in-order
    Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
    // alternative string -> corresponding SuggestWord object
    Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
    Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
   
    int totalNumberShardResponses = 0;
    for (ShardRequest sreq : rb.finished) {
      for (ShardResponse srsp : sreq.responses) {
        NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
        LOG.info(srsp.getShard() + " " + nl);
        if (nl != null) {
          totalNumberShardResponses++;
          SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
          for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
            origVsSuggestion.put(suggestion.getToken(), suggestion);
            HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
            if (suggested == null) {
              suggested = new HashSet<String>();
              origVsSuggested.put(suggestion.getToken(), suggested);
            }

            // sum up original frequency         
            int origFreq = 0;
            Integer o = origVsFreq.get(suggestion.getToken());
            if (o != nullorigFreq += o;
            origFreq += suggestion.getOriginalFrequency();
            origVsFreq.put(suggestion.getToken(), origFreq);
           
            //# shards reporting
            Integer origShards = origVsShards.get(suggestion.getToken());
            if(origShards==null) {
              origVsShards.put(suggestion.getToken(), 1);
            } else {
              origVsShards.put(suggestion.getToken(), ++origShards);
            }           

            // find best suggestions
            for (int i = 0; i < suggestion.getNumFound(); i++) {
              String alternative = suggestion.getAlternatives().get(i);
              suggested.add(alternative);
              SuggestWord sug = suggestedVsWord.get(alternative);
              if (sug == null)  {
                sug = new SuggestWord();
                suggestedVsWord.put(alternative, sug);
              }
              sug.string = alternative;
              // alternative frequency is present only for extendedResults=true
              if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
                Integer freq = suggestion.getAlternativeFrequencies().get(i);
                if (freq != null) sug.freq += freq;
              }
            }
          }
          NamedList suggestions = (NamedList) nl.get("suggestions");
          if(suggestions != null) {
            List<Object> collationList = suggestions.getAll("collation");
            List<Object> collationRankList = suggestions.getAll("collationInternalRank");
            int i=0;
            if(collationList != null) {
              for(Object o : collationList)
              {
                if(o instanceof String)
                {
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) o);
                  if(collationRankList!= null && collationRankList.size()>0)
                  {
                    coll.setInternalRank((Integer) collationRankList.get(i));
                    i++;
                  }
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                } else
                {
                  NamedList expandedCollation = (NamedList) o;                 
                  SpellCheckCollation coll = new SpellCheckCollation();
                  coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
                  coll.setHits((Integer) expandedCollation.get("hits"));
                  if(maxCollationTries>0)
                  {
                    coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
                  }
                  coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
                  SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
                  if(priorColl != null)
                  {
                    coll.setHits(coll.getHits() + priorColl.getHits());
                    coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
                  }
                  collations.put(coll.getCollationQuery(), coll);
                }
              }
            }
          }
        }
      }
    }

    // all shard responses have been collected
    // create token and get top suggestions
    SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
    for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
      String original = entry.getKey();
     
      //Only use this suggestion if all shards reported it as misspelled.
      Integer numShards = origVsShards.get(original);
      if(numShards<totalNumberShardResponses) {
        continue;
      }
     
      HashSet<String> suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
        }
      }

      // create token
      SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());

      // get top 'count' suggestions out of 'sugQueue.size()' candidates
      SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
      // skip the first sugQueue.size() - count elements
      for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
      // now collect the top 'count' responses
      for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--)  {
        suggestions[k] = sugQueue.pop();
      }

      if (extendedResults) {
        Integer o = origVsFreq.get(original);
        if (o != null) result.addFrequency(token, o);
        for (SuggestWord word : suggestions)
          result.add(token, word.string, word.freq);
      } else {
        List<String> words = new ArrayList<String>(sugQueue.size());
        for (SuggestWord word : suggestions) words.add(word.string);
        result.add(token, words);
      }
    }
   
    NamedList response = new SimpleOrderedMap();
    NamedList suggestions = toNamedList(false, result, origQuery, extendedResults, collate);
    if (collate) {
      SpellCheckCollation[] sortedCollations = collations.values().toArray(new SpellCheckCollation[collations.size()]);
      Arrays.sort(sortedCollations);
      int i = 0;
      while (i < maxCollations && i < sortedCollations.length) {
        SpellCheckCollation collation = sortedCollations[i];
        i++;
        if (collationExtendedResults) {
          NamedList extendedResult = new NamedList();
          extendedResult.add("collationQuery", collation.getCollationQuery());
          extendedResult.add("hits", collation.getHits());
          extendedResult.add("misspellingsAndCorrections", collation
              .getMisspellingsAndCorrections());
          suggestions.add("collation", extendedResult);
        } else {
          suggestions.add("collation", collation.getCollationQuery());
        }
      }
    }
   
    response.add("suggestions", suggestions);
    rb.rsp.add("spellcheck", response);
  }

  private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
    ts.reset();
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
   
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPayload(payloadAtt.getPayload());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
      result.add(token);
    }
    ts.end();
    ts.close();
    return result;
  }

  protected SolrSpellChecker getSpellChecker(SolrParams params) {
    return spellCheckers.get(getDictionaryName(params));
  }

  private String getDictionaryName(SolrParams params) {
    String dictName = params.get(SPELLCHECK_DICT);
    if (dictName == null) {
      dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
    }
    return dictName;
  }

  /**
   * @return the spellchecker registered to a given name
   */
  public SolrSpellChecker getSpellChecker(String name) {
    return spellCheckers.get(name);
  }

  protected NamedList toNamedList(boolean shardRequest, SpellingResult spellingResult, String origQuery, boolean extendedResults, boolean collate) {
    NamedList result = new NamedList();
    Map<Token, LinkedHashMap<String, Integer>> suggestions = spellingResult.getSuggestions();
    boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo();
    boolean isCorrectlySpelled = false;
   
    int numSuggestions = 0;
    for(LinkedHashMap<String, Integer> theSuggestion : suggestions.values())
    {
      if(theSuggestion.size()>0)
      {
        numSuggestions++;
      }
    }
   
    // will be flipped to false if any of the suggestions are not in the index and hasFreqInfo is true
    if(numSuggestions > 0) {
      isCorrectlySpelled = true;
    }
   
    for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
      Token inputToken = entry.getKey();
      Map<String, Integer> theSuggestions = entry.getValue();
      if (theSuggestions != null && (theSuggestions.size()>0 || shardRequest)) {
        SimpleOrderedMap suggestionList = new SimpleOrderedMap();
        suggestionList.add("numFound", theSuggestions.size());
        suggestionList.add("startOffset", inputToken.startOffset());
        suggestionList.add("endOffset", inputToken.endOffset());

        // Logical structure of normal (non-extended) results:
        // "suggestion":["alt1","alt2"]
        //
        // Logical structure of the extended results:
        // "suggestion":[
        //     {"word":"alt1","freq":7},
        //     {"word":"alt2","freq":4}
        // ]
        if (extendedResults && hasFreqInfo) {
          suggestionList.add("origFreq", spellingResult.getTokenFrequency(inputToken));

          ArrayList<SimpleOrderedMap> sugs = new ArrayList<SimpleOrderedMap>();
          suggestionList.add("suggestion", sugs);
          for (Map.Entry<String, Integer> suggEntry : theSuggestions.entrySet()) {
            SimpleOrderedMap sugEntry = new SimpleOrderedMap();
            sugEntry.add("word",suggEntry.getKey());
            sugEntry.add("freq",suggEntry.getValue());
            sugs.add(sugEntry);
          }
        } else {
          suggestionList.add("suggestion", theSuggestions.keySet());
        }

        if (hasFreqInfo) {
          isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
        }
        result.add(new String(inputToken.buffer(), 0, inputToken.length()), suggestionList);
      }
    }
    if (hasFreqInfo) {
      result.add("correctlySpelled", isCorrectlySpelled);
    } else if(extendedResults && suggestions.size() == 0) { // if the word is misspelled, its added to suggestions with freqinfo
      result.add("correctlySpelled", true);
    }
    return result;
  }

  public void inform(SolrCore core) {
    if (initParams != null) {
      LOG.info("Initializing spell checkers");
      boolean hasDefault = false;
      for (int i = 0; i < initParams.size(); i++) {
        if (initParams.getName(i).equals("spellchecker")) {
          NamedList spellchecker = (NamedList) initParams.getVal(i);
          String className = (String) spellchecker.get("classname");
          if (className == null)
            className = IndexBasedSpellChecker.class.getName();
          SolrResourceLoader loader = core.getResourceLoader();
          SolrSpellChecker checker = (SolrSpellChecker) loader.newInstance(className);
          if (checker != null) {
            String dictionary = checker.init(spellchecker, core);
            if (dictionary != null) {
              boolean isDefault = dictionary.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME);
              if (isDefault == true && hasDefault == false){
                hasDefault = true;
              } else if (isDefault == true && hasDefault == true){
                throw new RuntimeException("More than one dictionary is missing name.");
              }
              spellCheckers.put(dictionary, checker);
            } else {
              if (hasDefault == false){
                spellCheckers.put(SolrSpellChecker.DEFAULT_DICTIONARY_NAME, checker);
                hasDefault = true;
              } else {
                throw new RuntimeException("More than one dictionary is missing name.");
              }
            }
            // Register event listeners for this SpellChecker
            core.registerFirstSearcherListener(new SpellCheckerListener(core, checker, false, false));
            boolean buildOnCommit = Boolean.parseBoolean((String) spellchecker.get("buildOnCommit"));
            boolean buildOnOptimize = Boolean.parseBoolean((String) spellchecker.get("buildOnOptimize"));
            if (buildOnCommit || buildOnOptimize)   {
              LOG.info("Registering newSearcher listener for spellchecker: " + checker.getDictionaryName());
              core.registerNewSearcherListener(new SpellCheckerListener(core, checker, buildOnCommit, buildOnOptimize));
            }
          } else {
            throw new RuntimeException("Can't load spell checker: " + className);
          }
        }
     }

      Map<String, QueryConverter> queryConverters = new HashMap<String, QueryConverter>();
      core.initPlugins(queryConverters,QueryConverter.class);

      //ensure that there is at least one query converter defined
      if (queryConverters.size() == 0) {
        LOG.warn("No queryConverter defined, using default converter");
        queryConverters.put("queryConverter", new SpellingQueryConverter());
      }

      //there should only be one
      if (queryConverters.size() == 1) {
        queryConverter = queryConverters.values().iterator().next();
        IndexSchema schema = core.getSchema();
        String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType");
        FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
        Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion)
                : fieldType.getQueryAnalyzer();
        //TODO: There's got to be a better way!  Where's Spring when you need it?
        queryConverter.setAnalyzer(analyzer);
      }
    }
  }

  private static class SpellCheckerListener implements SolrEventListener {
    private final SolrCore core;
    private final SolrSpellChecker checker;
    private final boolean buildOnCommit;
    private final boolean buildOnOptimize;

    public SpellCheckerListener(SolrCore core, SolrSpellChecker checker, boolean buildOnCommit, boolean buildOnOptimize) {
      this.core = core;
      this.checker = checker;
      this.buildOnCommit = buildOnCommit;
      this.buildOnOptimize = buildOnOptimize;
    }

    public void init(NamedList args) {
    }

    public void newSearcher(SolrIndexSearcher newSearcher,
                            SolrIndexSearcher currentSearcher) {
      if (currentSearcher == null) {
        // firstSearcher event
        try {
          LOG.info("Loading spell index for spellchecker: "
                  + checker.getDictionaryName());
          checker.reload(core, newSearcher);
        } catch (IOException e) {
          log.error( "Exception in reloading spell check index for spellchecker: " + checker.getDictionaryName(), e);
        }
      } else {
        // newSearcher event
        if (buildOnCommit)  {
          buildSpellIndex(newSearcher);
        } else if (buildOnOptimize) {
          if (newSearcher.getIndexReader().getSequentialSubReaders().length == 1)  {
            buildSpellIndex(newSearcher);
          } else  {
            LOG.info("Index is not optimized therefore skipping building spell check index for: " + checker.getDictionaryName());
          }
        }
      }

    }

    private void buildSpellIndex(SolrIndexSearcher newSearcher) {
      try {
        LOG.info("Building spell index for spell checker: " + checker.getDictionaryName());
        checker.build(core, newSearcher);
      } catch (Exception e) {
        log.error(
                "Exception in building spell check index for spellchecker: " + checker.getDictionaryName(), e);
      }
    }

    public void postCommit() {
    }
  }

  public Map<String, SolrSpellChecker> getSpellCheckers() {
    return Collections.unmodifiableMap(spellCheckers);
  }

  // ///////////////////////////////////////////
  // / SolrInfoMBean
  // //////////////////////////////////////////

  @Override
  public String getDescription() {
    return "A Spell Checker component";
  }

  @Override
  public String getVersion() {
    return "$Revision: 1201265 $";
  }

  @Override
  public String getSourceId() {
    return "$Id: SpellCheckComponent.java 1201265 2011-11-12 14:09:28Z mikemccand $";
  }

  @Override
  public String getSource() {
    return "$URL: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_3_5/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java $";
  }

}
TOP

Related Classes of org.apache.solr.handler.component.SpellCheckComponent$SpellCheckerListener

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.