} else {
finalLastToken = BytesRef.deepCopyOf(lastTokenFragment);
}
assert finalLastToken.offset == 0;
CharsRef spare = new CharsRef();
// complete top-N
MinResult<Long> completions[] = null;
try {
// Because we store multiple models in one FST
// (1gram, 2gram, 3gram), we must restrict the
// search so that it only considers the current
// model. For highest order model, this is not
// necessary since all completions in the FST
// must be from this model, but for lower order
// models we have to filter out the higher order
// ones:
// Must do num+seen.size() for queue depth because we may
// reject up to seen.size() paths in acceptResult():
Util.TopNSearcher<Long> searcher = new Util.TopNSearcher<Long>(fst, num, num+seen.size(), weightComparator) {
BytesRef scratchBytes = new BytesRef();
@Override
protected void addIfCompetitive(Util.FSTPath<Long> path) {
if (path.arc.label != separator) {
//System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc);
super.addIfCompetitive(path);
} else {
//System.out.println(" prevent path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc);
}
}
@Override
protected boolean acceptResult(IntsRef input, Long output) {
Util.toBytesRef(input, scratchBytes);
finalLastToken.grow(finalLastToken.length + scratchBytes.length);
int lenSav = finalLastToken.length;
finalLastToken.append(scratchBytes);
//System.out.println(" accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false));
boolean ret = seen.contains(finalLastToken) == false;
finalLastToken.length = lenSav;
return ret;
}
};
// since this search is initialized with a single start node
// it is okay to start with an empty input path here
searcher.addStartPaths(arc, prefixOutput, true, new IntsRef());
completions = searcher.search();
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
int prefixLength = token.length;
BytesRef suffix = new BytesRef(8);
//System.out.println(" " + completions.length + " completions");
nextCompletion:
for (MinResult<Long> completion : completions) {
token.length = prefixLength;
// append suffix
Util.toBytesRef(completion.input, suffix);
token.append(suffix);
//System.out.println(" completion " + token.utf8ToString());
// Skip this path if a higher-order model already
// saw/predicted its last token:
BytesRef lastToken = token;
for(int i=token.length-1;i>=0;i--) {
if (token.bytes[token.offset+i] == separator) {
assert token.length-i-1 > 0;
lastToken = new BytesRef(token.bytes, token.offset+i+1, token.length-i-1);
break;
}
}
if (seen.contains(lastToken)) {
//System.out.println(" skip dup " + lastToken.utf8ToString());
continue nextCompletion;
}
seen.add(BytesRef.deepCopyOf(lastToken));
spare.grow(token.length);
UnicodeUtil.UTF8toUTF16(token, spare);
LookupResult result = new LookupResult(spare.toString(), (long) (Long.MAX_VALUE * backoff * ((double) decodeWeight(completion.output)) / contextCount));
results.add(result);
assert results.size() == seen.size();
//System.out.println(" add result=" + result);
}
backoff *= ALPHA;