}
}
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
float min = 0.5f;
StringDistance sd = null;
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
SolrSpellChecker checker = getSpellChecker(rb.req.getParams());
if (checker instanceof AbstractLuceneSpellChecker) {
AbstractLuceneSpellChecker spellChecker = (AbstractLuceneSpellChecker) checker;
min = spellChecker.getAccuracy();
sd = spellChecker.getStringDistance();
}
if (sd == null)
sd = new LevensteinDistance();
Collection<Token> tokens = null;
try {
tokens = getTokens(origQuery, checker.getQueryAnalyzer());
} catch (IOException e) {
LOG.error("Could not get tokens (this should never happen)", e);
}
// original token -> corresponding Suggestion object (keep track of start,end)
Map<String, SpellCheckResponse.Suggestion> origVsSuggestion = new HashMap<String, SpellCheckResponse.Suggestion>();
// original token string -> summed up frequency
Map<String, Integer> origVsFreq = new HashMap<String, Integer>();
// original token string -> # of shards reporting it as misspelled
Map<String, Integer> origVsShards = new HashMap<String, Integer>();
// original token string -> set of alternatives
// must preserve order because collation algorithm can only work in-order
Map<String, HashSet<String>> origVsSuggested = new LinkedHashMap<String, HashSet<String>>();
// alternative string -> corresponding SuggestWord object
Map<String, SuggestWord> suggestedVsWord = new HashMap<String, SuggestWord>();
Map<String, SpellCheckCollation> collations = new HashMap<String, SpellCheckCollation>();
int totalNumberShardResponses = 0;
for (ShardRequest sreq : rb.finished) {
for (ShardResponse srsp : sreq.responses) {
NamedList nl = (NamedList) srsp.getSolrResponse().getResponse().get("spellcheck");
LOG.info(srsp.getShard() + " " + nl);
if (nl != null) {
totalNumberShardResponses++;
SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
origVsSuggestion.put(suggestion.getToken(), suggestion);
HashSet<String> suggested = origVsSuggested.get(suggestion.getToken());
if (suggested == null) {
suggested = new HashSet<String>();
origVsSuggested.put(suggestion.getToken(), suggested);
}
// sum up original frequency
int origFreq = 0;
Integer o = origVsFreq.get(suggestion.getToken());
if (o != null) origFreq += o;
origFreq += suggestion.getOriginalFrequency();
origVsFreq.put(suggestion.getToken(), origFreq);
//# shards reporting
Integer origShards = origVsShards.get(suggestion.getToken());
if(origShards==null) {
origVsShards.put(suggestion.getToken(), 1);
} else {
origVsShards.put(suggestion.getToken(), ++origShards);
}
// find best suggestions
for (int i = 0; i < suggestion.getNumFound(); i++) {
String alternative = suggestion.getAlternatives().get(i);
suggested.add(alternative);
SuggestWord sug = suggestedVsWord.get(alternative);
if (sug == null) {
sug = new SuggestWord();
suggestedVsWord.put(alternative, sug);
}
sug.string = alternative;
// alternative frequency is present only for extendedResults=true
if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
Integer freq = suggestion.getAlternativeFrequencies().get(i);
if (freq != null) sug.freq += freq;
}
}
}
NamedList suggestions = (NamedList) nl.get("suggestions");
if(suggestions != null) {
List<Object> collationList = suggestions.getAll("collation");
List<Object> collationRankList = suggestions.getAll("collationInternalRank");
int i=0;
if(collationList != null) {
for(Object o : collationList)
{
if(o instanceof String)
{
SpellCheckCollation coll = new SpellCheckCollation();
coll.setCollationQuery((String) o);
if(collationRankList!= null && collationRankList.size()>0)
{
coll.setInternalRank((Integer) collationRankList.get(i));
i++;
}
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
if(priorColl != null)
{
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
}
collations.put(coll.getCollationQuery(), coll);
} else
{
NamedList expandedCollation = (NamedList) o;
SpellCheckCollation coll = new SpellCheckCollation();
coll.setCollationQuery((String) expandedCollation.get("collationQuery"));
coll.setHits((Integer) expandedCollation.get("hits"));
if(maxCollationTries>0)
{
coll.setInternalRank((Integer) expandedCollation.get("collationInternalRank"));
}
coll.setMisspellingsAndCorrections((NamedList) expandedCollation.get("misspellingsAndCorrections"));
SpellCheckCollation priorColl = collations.get(coll.getCollationQuery());
if(priorColl != null)
{
coll.setHits(coll.getHits() + priorColl.getHits());
coll.setInternalRank(Math.max(coll.getInternalRank(),priorColl.getInternalRank()));
}
collations.put(coll.getCollationQuery(), coll);
}
}
}
}
}
}
}
// all shard responses have been collected
// create token and get top suggestions
SpellingResult result = new SpellingResult(tokens); //todo: investigate, why does it need tokens beforehand?
for (Map.Entry<String, HashSet<String>> entry : origVsSuggested.entrySet()) {
String original = entry.getKey();
//Only use this suggestion if all shards reported it as misspelled.
Integer numShards = origVsShards.get(original);
if(numShards<totalNumberShardResponses) {
continue;
}
HashSet<String> suggested = entry.getValue();
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
for (String suggestion : suggested) {
SuggestWord sug = suggestedVsWord.get(suggestion);
sug.score = sd.getDistance(original, sug.string);
if (sug.score < min) continue;
sugQueue.insertWithOverflow(sug);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
min = sugQueue.top().score;