*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}