package dovetaildb.bagindex;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Iterator;
import dovetaildb.bagindex.BlueSteelBagIndex.PostingNode;
import dovetaildb.bytes.AbstractBytes;
import dovetaildb.bytes.ArrayBytes;
import dovetaildb.bytes.Bytes;
public class FsBlueSteelBagIndex extends BlueSteelBagIndex {
private static final long serialVersionUID = -8987250800532074302L;
transient BytesInterface data, header;
final boolean sync;
public FsBlueSteelBagIndex(boolean sync) {
this.sync = sync;
public String toString() {
return "FsBlueSteelBagIndex("+homeDir+", rev="+topRevNum+", nextdoc="+nextDocId+", sync="+sync+")";
public long getCurrentRevNum() {
return topRevNum;
public String getHomedir() {
return homeDir;
private FileChannel openAsChannel(String filename) {
File file = new File(homeDir + File.separatorChar + filename);
try {
RandomAccessFile raf = new RandomAccessFile(file, "rw"); // think about "rwd" too?
return raf.getChannel();
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
public void setHomedir(String homeDir) {
File homeDirFile = new File(homeDir);
if (! homeDirFile.exists()) {
try {
header = new MappedBytesInterface(openAsChannel("ddbhdr"));
data = new MappedBytesInterface(openAsChannel("ddbdat"));
if (header.getSize() == 0) {
header.appendBytes(new byte[32]);
MemoryTokenTable emptyRoot =
new MemoryTokenTable(ArrayBytes.EMPTY_BYTES,
new ArrayList<TokenRec>());
FsTokenTable.write(data, emptyRoot);
if (sync) data.force();
} catch (IOException e) {
throw new RuntimeException(e);
final long MAGIC_NUMBER = 4016171645340062763L;
private void parseHeader() {
long firstWord = header.getLong(0);
if (firstWord != MAGIC_NUMBER) {
throw new RuntimeException("Unknown header file in "+homeDir);
long ver = header.getLong(1);
if (ver > 1) throw new RuntimeException("File format version is newer than this code supports");
this.nextDocId = header.getLong(2);
this.topRevNum = header.getLong(3);
private void writeHeader() {
header.putLong(0, MAGIC_NUMBER);
header.putLong(1, 1);
header.putLong(2, nextDocId);
header.putLong(3, topRevNum);
if (sync) header.force();
static final class FsBytes extends AbstractBytes {
BytesInterface data;
long pos;
int len;
public FsBytes(BytesInterface data, long pos, int len) { = data;
this.pos = pos;
this.len = len;
public int get(int position) {
return 0xFF & data.getByte(pos+position);
public int getLength() { return len; }
public Bytes subBytes(int start, int count) {
pos += start;
len = count;
return this;
private static final int MY_CMP_PRIORITY = 5;
public int compareToParts(Bytes other, int o1, int o2, int l1, int l2, int otherCmpPriority) {
if (otherCmpPriority > MY_CMP_PRIORITY) {
return -other.compareToParts(this, o2, o1, l2, l1, MY_CMP_PRIORITY);
} else {
byte[] bytes;
if (other instanceof ArrayBytes) {
bytes = ((ArrayBytes)other).getBackingBytes();
} else {
bytes = other.getBytes();
int minlen = Math.min(l1, l2);
int diff = data.cmp(pos+o1, minlen, bytes, o2);
if (diff != 0) return diff;
return l1 - l2;
public Bytes copy() {
return new FsBytes(data, pos, len);
public Bytes copyInto(Bytes other) {
if (other instanceof FsBytes) {
if (other == this) return this;
FsBytes oBytes = (FsBytes)other; = data;
oBytes.len = len;
oBytes.pos = pos;
return other;
} else {
return new ArrayBytes(this.getBytes());
public void reinit(long position, int numBytes) {
this.pos = position;
this.len = numBytes;
public String toString() {
return "FsBytes("+flatten()+"@"+pos+")";
static final class FsPostingNode extends PostingNode {
BytesInterface data;
final VarPosition vp = new VarPosition(0);
long startPos;
long pushPos, pushCt;
private FsPostingNode(FsPostingNode other) {
docId = other.docId;
token = other.token.copy();
vp.position = other.vp.position;
data =;
startPos = other.startPos;
pushPos = other.pushPos;
pushCt = other.pushCt;
public FsPostingNode(BytesInterface data, long pos) {
this.token = new FsBytes(data, 0, 0); = data;
private void init(long pos) {
vp.position = pos;
startPos = pos;
public FsPostingNode copy() {
return new FsPostingNode(this);
public PostingNode copyInto(PostingNode other) {
if (other == null || !(other instanceof FsPostingNode)) return copy();
FsPostingNode fsOther = (FsPostingNode)other;
fsOther.docId = docId;
fsOther.token = token.copyInto(fsOther.token); = data;
fsOther.pushCt = pushCt;
fsOther.pushPos = pushPos;
fsOther.startPos = startPos;
fsOther.vp.position = vp.position;
return fsOther;
private void parse() {
startPos = vp.position;
long v1 = data.getVLong(vp);
docId = v1 >> 1;
long v2 = data.getVLong(vp);
int numBytes = (int)v2;
// System.out.println("P "+vp.position+":"+numBytes+"@ "+data);
((FsBytes)token).reinit(vp.position, numBytes);
//token = new FsBytes(data, vp.position, numBytes);
vp.position += numBytes;
if ( (v1 & 1) == 1) {
pushPos = data.getVLong(vp);
pushCt = data.getVLong(vp);
} else {
pushPos = 0;
pushCt = 0;
public static PostingNode writeDependentPushes(BytesInterface data, PostingNode node, long ct) {
if (node instanceof FsPostingNode) return node;
SegmentPush push = node.getPush();
if (push == null) return node;
push = writePush(data, push, ct - 1);
return node;
public static FsPostingNode write(BytesInterface data, PostingNode node) {
if (node instanceof FsPostingNode) return (FsPostingNode)node;
SegmentPush push = node.getPush();
long v1 = node.getDocId() << 1;
if (push != null) {
v1 |= 1;
long startPos = data.getSize();
Bytes token = node.getToken();
// System.out.println("W "+data.getSize()+":"+token.getLength()+" ["+token+"] @ "+data);
if (push != null) {
return new FsPostingNode(data, startPos);
public long getCount() {
return pushCt+1;
public PostingNode destructiveDown() {
return this;
public long getPushCount() {
return pushCt;
public SegmentPush getPush() {
if (pushPos == 0) return null;
FsPostingNode start = new FsPostingNode(data, pushPos);
return new SegmentPush(start, pushCt);
public PostingNode copyPushLeadInto(PostingNode other) {
if (other == null || ! (other instanceof FsPostingNode)) {
return new FsPostingNode(data, pushPos);
} else {
FsPostingNode fsOther = (FsPostingNode)other;
return fsOther;
public String toString() {
return "FsPostingNode @"+startPos+" dId:"+docId+" tok:"+token+" psh@"+pushPos;
public FsPostingNode destructiveNext() {
return this;
public static SegmentPush writePush(BytesInterface data, SegmentPush push, long ct) {
if (push.leadNode instanceof FsPostingNode) return push;
ArrayList<MemoryPostingNode> depWrittenNodes = new ArrayList<MemoryPostingNode>();
for(Iterator<PostingNode> i = push.iterator(); i.hasNext();) {
MemoryPostingNode node = (MemoryPostingNode);
node = (MemoryPostingNode)FsPostingNode.writeDependentPushes(data, node, node.getCount());
push = new SegmentPush(depWrittenNodes, push.getCount());
FsPostingNode leadNode = null;
for(Iterator<PostingNode> i = push.iterator(); i.hasNext();) {
PostingNode node =;
FsPostingNode written = FsPostingNode.write(data, node);
if (leadNode == null) {
leadNode = written;
return new SegmentPush(leadNode, ct);
static class FsTokenRec extends TokenRec {
public FsTokenRec(BytesInterface data, byte token, long subTablePos, long pushPos, long pushCt) {
(subTablePos == 0) ? null : new FsTokenTable(data, subTablePos),
new SegmentPush(new FsPostingNode(data, pushPos), pushCt));
public FsTokenTable getFsTokenTable() {
return (FsTokenTable)tokenTable;
* Does not actually write the TokenRec; just writes the summary
* push as well as all pushes under sub token tables (if any)
public static FsTokenRec write(BytesInterface data, TokenRec rec) {
SegmentPush push = rec.getSegmentPush();
SegmentPush writtenPush = writePush(data, push, push.getCount());
TokenTable tbl = rec.getTokenTable();
FsTokenTable newTbl = null;
if (tbl != null) {
newTbl = FsTokenTable.write(data, tbl);
long pushStart = ((FsPostingNode)writtenPush.leadNode).startPos;
return new FsTokenRec(data, rec.token,
(newTbl == null) ? 0 : newTbl.tableHeadPtr,
pushStart, writtenPush.getCount());
static class FsTokenRecIterator implements Iterator<TokenRec> {
private BytesInterface data;
private final VarPosition itrPtr;
private int numRecs;
FsTokenRecIterator(BytesInterface data, long ptr) { = data;
this.itrPtr = new VarPosition(ptr);
this.numRecs = (int)data.getVLong(itrPtr);
public boolean hasNext() {
return numRecs > 0;
public TokenRec next() {
byte ch = data.getByte(itrPtr.position++);
long subTblPos = data.getVLong(itrPtr);
long pushPos = data.getVLong(itrPtr);
long pushCt = data.getVLong(itrPtr);
return new FsTokenRec(data, ch, subTblPos, pushPos, pushCt);
public void remove() {
throw new RuntimeException();
static class FsTokenTable extends TokenTable {
final long tableHeadPtr, recHeadPtr;
final BytesInterface data;
FsBytes fixedPrefix;
FsTokenTable(BytesInterface data, long tableHeadPtr) { = data;
this.tableHeadPtr = tableHeadPtr;
VarPosition vp = new VarPosition(tableHeadPtr);
int prefixLength = (int)data.getVLong(vp);
fixedPrefix = new FsBytes(data, vp.position, prefixLength);
recHeadPtr = vp.position + prefixLength;
public Bytes getFixedPrefix() { return fixedPrefix; }
public Iterator<TokenRec> getTokenRecs() {
return new FsTokenRecIterator(data, recHeadPtr);
public static FsTokenTable write(BytesInterface data, TokenTable table) {
if (table instanceof FsTokenTable) return (FsTokenTable)table;
ArrayList<FsTokenRec> buffer = new ArrayList<FsTokenRec>();
for(Iterator<TokenRec> i = table.getTokenRecs(); i.hasNext();) {
TokenRec rec =;
buffer.add(FsTokenRec.write(data, rec));
long tblStart = data.getSize();
Bytes prefix = table.getFixedPrefix();
int numRecs = buffer.size();
for(FsTokenRec rec : buffer) {
FsTokenTable subTable = rec.getFsTokenTable();
data.appendVLong((subTable==null) ? 0 : subTable.tableHeadPtr);
SegmentPush push = rec.getSegmentPush();
return new FsTokenTable(data, tblStart);
protected TokenTable getRootTokenTable(long revNum) {
return new FsTokenTable(data, revNum);
protected void setNewTokenTable(TokenTable newTokenTable) {
long newHead = FsTokenTable.write(data, newTokenTable).tableHeadPtr;
if (sync) data.force();
topRevNum = newHead;
private class RebuiltTokenTable {
RebuiltTokenTable[] subTables;
ArrayList<PostingNode> summaryBuffer;
SegmentPush writeAsPush() {
// TODO : this should be made into a list of node lists
// where each level is one tier higher than the previous
// fortunately, it isn't called right at the moment (I believe)
// so throwing an exception:
throw new RuntimeException("Not Implemented");
ArrayList<PostingNode> buffer = summaryBuffer;
int len = buffer.size();
long ct=0;
for(int i=0; i<len; i++) {
PostingNode node = buffer.get(i);
ct += node.getCount();
FsPostingNode fsNode = FsPostingNode.write(data, node);
buffer.set(i, fsNode);
SegmentPush push = new SegmentPush(buffer.get(0), ct);
return push;
void add(long docId, Bytes term, int termIdx) {
if (subTables != null && termIdx < term.getLength()) {
int b = term.get(termIdx);
subTables[b].add(docId, term, termIdx + 1);
if (summaryBuffer != null) {
if (summaryBuffer.size() < targetPostingListLength) {
summaryBuffer.add(new MemoryPostingNode(docId, term.subBytes(termIdx)));
} else {
SegmentPush push = writeAsPush();
MemoryPostingNode summaryNode = new MemoryPostingNode(push, docId, term);
FsTokenTable writeTokenTables() {
ArrayList<TokenRec> recs = new ArrayList<TokenRec>();
for(int i=0; i < subTables.length; i++) {
RebuiltTokenTable subRebuilt = subTables[i];
FsTokenTable subWritten = null;
if (subRebuilt.subTables != null) {
subWritten = subRebuilt.writeTokenTables();
SegmentPush push = subRebuilt.writeAsPush();
recs.add(new TokenRec((byte)i, subWritten, push));
return FsTokenTable.write(data, new MemoryTokenTable(null, recs));
public long rebuild(Iterator<EditRec> edits) {
RebuiltTokenTable rebuiltTable = new RebuiltTokenTable();
while(edits.hasNext()) {
EditRec edit =;
rebuiltTable.add(edit.getDocId(), edit.getTerm(), 0);
return topRevNum;
public void close() {
data = header = null; // needed?
public boolean isSynced() {
return sync;