final BytesRef utf8Key = new BytesRef(key);
try {
Automaton lookupAutomaton = toLookupAutomaton(key);
final CharsRef spare = new CharsRef();
//System.out.println(" now intersect exactFirst=" + exactFirst);
// Intersect automaton w/ suggest wFST and get all
// prefix starting nodes & their outputs:
//final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
//System.out.println(" prefixPaths: " + prefixPaths.size());
BytesReader bytesReader = fst.getBytesReader();
FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<Pair<Long,BytesRef>>();
final List<LookupResult> results = new ArrayList<LookupResult>();
List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(lookupAutomaton, fst);
if (exactFirst) {
int count = 0;
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
// This node has END_BYTE arc leaving, meaning it's an
// "exact" match:
count++;
}
}
// Searcher just to find the single exact only
// match, if present:
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator);
// NOTE: we could almost get away with only using
// the first start node. The only catch is if
// maxSurfaceFormsPerAnalyzedForm had kicked in and
// pruned our exact match from one of these nodes
// ...:
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
// This node has END_BYTE arc leaving, meaning it's an
// "exact" match:
searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input);
}
}
MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
// NOTE: this is rather inefficient: we enumerate
// every matching "exactly the same analyzed form"
// path, and then do linear scan to see if one of
// these exactly matches the input. It should be
// possible (though hairy) to do something similar
// to getByOutput, since the surface form is encoded
// into the FST output, so we more efficiently hone
// in on the exact surface-form match. Still, I
// suspect very little time is spent in this linear
// seach: it's bounded by how many prefix start
// nodes we have and the
// maxSurfaceFormsPerAnalyzedForm:
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
if (utf8Key.bytesEquals(completion.output.output2)) {
spare.grow(completion.output.output2.length);
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1)));
break;
}
}
if (results.size() == num) {
// That was quick:
return results;
}
}
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
num - results.size(),
num * maxAnalyzedPathsForOneInput,
weightComparator) {
private final Set<BytesRef> seen = new HashSet<BytesRef>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
// Dedup: when the input analyzes to a graph we
// can get duplicate surface forms:
if (seen.contains(output.output2)) {
return false;
}
seen.add(output.output2);
if (!exactFirst) {
return true;
} else {
// In exactFirst mode, don't accept any paths
// matching the surface form since that will
// create duplicate results:
if (utf8Key.bytesEquals(output.output2)) {
// We found exact match, which means we should
// have already found it in the first search:
assert results.size() == 1;
return false;
} else {
return true;
}
}
}
};
prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
}
MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
spare.grow(completion.output.output2.length);
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
// TODO: for fuzzy case would be nice to return
// how many edits were required
//System.out.println(" result=" + result);