// TODO: are we using the best sharing options?
org.apache.lucene.util.fst.Builder<BytesRef> builder =
new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
BytesRef scratch = new BytesRef(64);
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
final Set<Integer> dedupSet;
if (dedup) {
dedupSet = new HashSet<Integer>();
} else {
dedupSet = null;
}
final byte[] spare = new byte[5];
Set<CharsRef> keys = workingSet.keySet();
CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());
final IntsRef scratchIntsRef = new IntsRef();
//System.out.println("fmap.build");
for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
CharsRef input = sortedKeys[keyIdx];
MapEntry output = workingSet.get(input);
int numEntries = output.ords.size();
// output size, assume the worst case
int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
scratch.grow(estimatedSize);
scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
assert scratch.offset == 0;
// now write our output data:
int count = 0;
for (int i = 0; i < numEntries; i++) {
if (dedupSet != null) {
// box once
final Integer ent = output.ords.get(i);
if (dedupSet.contains(ent)) {
continue;
}
dedupSet.add(ent);
}
scratchOutput.writeVInt(output.ords.get(i));
count++;
}
final int pos = scratchOutput.getPosition();
scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
final int pos2 = scratchOutput.getPosition();
final int vIntLen = pos2-pos;
// Move the count + includeOrig to the front of the byte[]:
System.arraycopy(scratch.bytes, pos, spare, 0, vIntLen);
System.arraycopy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
System.arraycopy(spare, 0, scratch.bytes, 0, vIntLen);
if (dedupSet != null) {
dedupSet.clear();
}
scratch.length = scratchOutput.getPosition() - scratch.offset;
//System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
}
FST<BytesRef> fst = builder.finish();