package dovetaildb.bagindex;
import gnu.trove.TIntIntHashMap;
import gnu.trove.TIntObjectHashMap;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import dovetaildb.dbservice.DbServiceUtil;
import dovetaildb.fileaccessor.BitFieldFile;
import dovetaildb.fileaccessor.FileAccessor;
import dovetaildb.fileaccessor.FileOfLongs;
import dovetaildb.fileaccessor.OffsetValueFilePair;
import dovetaildb.fileaccessor.PagedFile;
import dovetaildb.scan.AbstractScanner;
import dovetaildb.scan.Scanner;
import dovetaildb.scan.ScannerUtil;
import dovetaildb.util.BinarySearcher;
import dovetaildb.util.MutableInt;
import dovetaildb.util.Pair;
import dovetaildb.util.Util;
public class RawBagIndex extends BagIndex {
/** subtract this mask for actual page offset; if bit is set, page is a ranged value page. */
private static final int RANGE_MASK = 0x80000000;
/** doc deletion file: 4 byte doc id, 4 byte page pointer */
FileOfLongs ddf;
/** postinglist offset file: 4 byte term pointer, 4 byte page pointer */
FileOfLongs pof;
/**
* A postinglist value file; different page formats:
* 00 <val:4><docid:4>,... (mixed posting list, docid order)
* 01 <prevpage:4>,<nextpage:4>,<docid:4>,... (single term posting list, docid order)
* 10 <revDeletedIn:4><docid:4>,... (deletion page, docid order)
*/
PagedFile pvf;
/** document field data: list of term pointers */
OffsetValueFilePair dfPair;
/** value field data */
OffsetValueFilePair vfPair;
long maxDocId;
static final class IntScanner extends AbstractScanner {
private final IntBuffer ib;
private final PagedFile pvf;
private final int firstPage;
private int nextPage, offset;
private long docId;
IntScanner(PagedFile pvf, int page) {
this.pvf = pvf;
firstPage = page;
ib = pvf.getIntBuffer();
setupNewPage(page);
}
private void setupNewPage(int page) {
offset = pvf.getIntOffestForPage(page);
nextPage = ib.get(offset+1);
offset += 2;
docId = ib.get(offset) & 0xffffffff;
}
public long doc() { return docId; }
public boolean next() {
docId = ib.get(offset) & 0xffffffff;
if (docId == 0) {
if (nextPage == firstPage) return false;
setupNewPage(nextPage);
}
if (docId == 0) return false;
offset++;
return true;
}
}
static abstract class LongScanner extends AbstractScanner {
protected final LongBuffer lb;
protected int offset;
protected long docId, data;
LongScanner(PagedFile pvf, int page) {
offset = pvf.getLongOffestForPage(page);
lb = pvf.getLongBuffer();
}
public long doc() { return docId; }
public boolean next() {
long val = lb.get(offset++);
docId = bottomUInt(val);
data = topUInt(val);
return docId != 0;
}
}
static final class DelScanner extends AbstractScanner {
final PagedFile pvf;
DelPageScanner pageScanner;
final LongBuffer delPages;
int delPagesIndex, docId;
final int delPagesMax;
final long revNum;
public DelScanner(PagedFile pvf, LongBuffer delPages, long revNum) {
this.pvf = pvf;
this.delPages = delPages;
this.revNum = revNum;
delPagesIndex = 0;
delPagesMax = delPages.limit();
newPage();
}
private void newPage() {
if (delPagesIndex >= delPagesMax) return;
long pair = delPages.get(delPagesIndex++);
int pageNum = (int)bottomUInt(pair);
pageScanner = new DelPageScanner(pvf, pageNum, revNum);
}
public long doc() {
return pageScanner.doc();
}
public boolean next() {
boolean hasNext = pageScanner.next();
if (!hasNext) newPage();
return pageScanner.next();
}
}
static final class DelPageScanner extends LongScanner {
private final long revNum;
DelPageScanner(PagedFile pvf, int page, long revNum) {
super(pvf, page);
this.revNum = revNum;
}
public boolean next() {
while(super.next()) {
if (data <= revNum) return true;
}
return false;
}
}
static final class RangeScanner extends LongScanner {
private final long revNum;
private final byte[] minTermValue, maxTermValue;
private final int minCmp, maxCmp;
private final OffsetValueFilePair vfPair;
RangeScanner(PagedFile pvf, int page, OffsetValueFilePair vfPair,
byte[] minTermValue, boolean minIsExclusive,
byte[] maxTermValue, boolean maxIsExclusive,
long revNum) {
super(pvf, page);
this.vfPair = vfPair;
this.minTermValue = minTermValue;
this.maxTermValue = maxTermValue;
this.minCmp = minIsExclusive ? 1 : 0;
this.maxCmp = maxIsExclusive ? -1 : 0;
this.revNum = revNum;
}
public boolean next() {
while(super.next()) {
if (minTermValue != null)
if (vfPair.cmp(minTermValue, data) >= minCmp) continue;
if (maxTermValue != null)
if (vfPair.cmp(maxTermValue, data) <= maxCmp) continue;
if (docId > revNum) return false;
return true;
}
return false;
}
}
private void deleteInRev(long[] deletions, long revNum) {
int oldNumRecs = (int) ddf.getNumRecs();
long numRecs = oldNumRecs + deletions.length;
ddf.setNumRecs(numRecs);
LongBuffer lb = ddf.getLongBuffer();
int delIdx = deletions.length - 1;
int readIdx = oldNumRecs - 1;
int writeIdx = (int) (numRecs - 1);
while(delIdx >= 0) {
int targetCmpVal = (int) topUInt(deletions[delIdx]);
long val = lb.get(readIdx--);
while(topUInt(val) > targetCmpVal) {
lb.put(writeIdx--, val);
val = lb.get(readIdx--);
}
lb.put(writeIdx--, deletions[delIdx--]);
}
}
static class TermInDocRec implements Comparable {
byte[] term;
long docId;
long termIdx;
public TermInDocRec(byte[] term, long termIdx, long docId) {
this.term = term;
this.termIdx = termIdx;
this.docId = docId;
}
public int compareTo(Object otherObj) {
TermInDocRec other = (TermInDocRec)otherObj;
int cmp = Util.compareBytes(this.term, other.term);
if (cmp == 0) {
cmp = (int)(this.docId - other.docId);
}
return cmp;
}
}
public long commitNewRev(long[] deletions, Collection<Pair<byte[][],byte[][]>> inserts) {
long oldMaxDocId = maxDocId;
int numInserts = inserts.size();
long revNum = maxDocId + numInserts;
if (numInserts == 0) {
// commits with only deletions occupy a docId space, so pre-delete it
inserts.add(new Pair<byte[][],byte[][]>(null,null));
revNum++;
}
Arrays.sort(deletions);
deleteInRev(deletions, revNum);
ArrayList<TermInDocRec> termToId = new ArrayList<TermInDocRec>();
for(Pair<byte[][],byte[][]> docPair : inserts) {
byte[][] indexTerms = docPair.getLeft();
byte[][] storeTerms = docPair.getRight();
// write to dfPair for each doc (both sets of terms)
byte[] dvfBuffer = new byte[4 * (indexTerms.length+storeTerms.length)];
int dvfIdx = 0;
for(byte[][] terms : new byte[][][]{indexTerms,storeTerms}) {
for(byte[] indexTerm : terms) {
long valIdx = vfPair.append(indexTerm);
Util.leUIntToBytes(valIdx, dvfBuffer, dvfIdx);
dvfIdx += 4;
if (terms == indexTerms) {
termToId.add(new TermInDocRec(indexTerm, valIdx, maxDocId));
}
}
}
dfPair.append(dvfBuffer);
maxDocId++;
}
Collections.sort(termToId);
int termToIdSize = termToId.size();
for(int i=0; i < termToIdSize; i++) {
TermInDocRec rec = termToId.get(i);
appendTermDoc(rec.term, rec.termIdx, rec.docId);
}
return maxDocId;
}
private void appendTermDoc(byte[] term, long termIdx, long docId) {
int termIndex = lookupTermIndex(term);
long page = bottomUInt(pof.get(termIndex));
boolean isRanged = (page & RANGE_MASK) == RANGE_MASK;
page = page & ~RANGE_MASK;
if (!isRanged) {
appendToLinkedPages(page, docId);
} else {
appendToRangedPage(term, page, termIdx, docId);
}
}
private void appendToLinkedPages(long page, long docId) {
IntBuffer ib = pvf.getIntBuffer();
int offset = pvf.getIntOffestForPage((int)page);
// prev_page:4, next_page:4
long prevPage = ib.get(offset) & 0xffffffff;
int ppOffset = pvf.getIntOffestForPage((int)prevPage)+2;
int maxOffset = pvf.getIntOffestForPage((int)prevPage+1);
for(; ppOffset < maxOffset; ppOffset++) {
int val = ib.get(ppOffset);
if (val == 0) {
ib.put(ppOffset, (int)docId);
return;
}
}
// (page full); insert new
int newPage = pvf.newPageIndex();
int i = pvf.getIntOffestForPage((int)newPage);
ib.put(i+0, (int)prevPage);
ib.put(i+4, (int)page);
ib.put(i+8, (int)docId);
ib.put(i+12,0);
// now, repoint
ib.put(offset+0, newPage);
ib.put(ppOffset+4, newPage);
}
private final ReadWriteLocker pageLocker = new ReadWriteLocker();
public void appendToRangedPage(byte[] term, long page, long termIdx, long docId) {
LongBuffer lb = pvf.getLongBuffer();
int offset = pvf.getLongOffestForPage((int)page);
int maxOffset = pvf.getLongOffestForPage((int)page+1) - 1;
long rec = termIdx << 32 | docId;
for(; offset < maxOffset; offset++) {
long val = lb.get(offset);
if (bottomUInt(val) == 0) { // terminating position
lb.put(offset, rec);
lb.put(offset+1, 0);
return;
}
}
// All full; split page:
ArrayList<TermInDocRec> terms = new ArrayList<TermInDocRec>();
offset = pvf.getLongOffestForPage((int)page);
for(; offset < maxOffset; offset++) {
long val = lb.get(offset);
long curTermIdx = topUInt(val);
byte[] curTerm = vfPair.valueAt((int)curTermIdx);
terms.add(new TermInDocRec(curTerm, rec, rec));
}
int cutIndex = (terms.size() * 4) / 10;
if (Util.compareBytes(terms.get(cutIndex).term, terms.get(0).term) == 0) {
// Initial segment is worthy of a single-value posting list
extractSinlgePostingList(termIdx);
} else if (Util.compareBytes(terms.get(cutIndex).term, terms.get(terms.size()-1).term) == 0) {
// Final segment is worthy of a single-value posting list
extractSinlgePostingList(termIdx);
} else {
// split the range in half:
ArrayList<TermInDocRec> subTerms1 = new ArrayList<TermInDocRec>();
ArrayList<TermInDocRec> subTerms2 = new ArrayList<TermInDocRec>();
Collections.copy(terms.subList(0, cutIndex+1), subTerms1);
Collections.copy(terms.subList(cutIndex+1, terms.size()), subTerms2);
}
// recurse once to do the actual insertion
appendTermDoc(term, termIdx, docId);
// pageLocker.lockForWrite((int)page);
// try {
// int highPage = pvf.newPageIndex();
//
// } finally {
// pageLocker.unlock((int)page);
// }
}
private void extractSinlgePostingList(long termIdx) {
}
static final class ReadWriteLocker {
final TIntObjectHashMap<MutableInt> map = new TIntObjectHashMap<MutableInt>();
public void lockForRead(int i) {
MutableInt val;
synchronized(map) {
if (! map.containsKey(i)) {
map.put(i, new MutableInt(1));
return;
}
val = map.get(i);
}
synchronized(val) {
while (val.get() == -1) {
try {
val.wait();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
val.incr();
}
}
public void lockForWrite(int i) {
MutableInt val;
synchronized(map) {
if (! map.containsKey(i)) {
map.put(i, new MutableInt(-1));
return;
}
val = map.get(i);
}
synchronized(val) {
while (val.get() > 0) {
try {
val.wait();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
val.set(-1);
}
}
public void unlock(int i) {
MutableInt val;
synchronized(map) {
val = map.get(i);
}
synchronized(val) {
int curVal = val.get();
if (curVal == -1) {
val.set(0);
val.notify();
} else {
val.decr();
if (curVal == 1)
val.notify();
}
}
}
}
/*
* public static int binarySearch( Comparable [ ] a, Comparable x )
{
int low = 0;
int high = a.length - 1;
int mid;
while( low <= high )
{
mid = ( low + high ) / 2;
if( a[ mid ].compareTo( x ) < 0 )
low = mid + 1;
else if( a[ mid ].compareTo( x ) > 0 )
high = mid - 1;
else
return mid;
}
return NOT_FOUND; // NOT_FOUND = -1
}
*/
private static long topUInt(long l) {
return l >>> 32;
}
private static long bottomUInt(long l) {
return l & 0xFFFFFFFF;
}
int lookupTermIndex(byte[] termValue1) {
LongBuffer lb = pof.getLongBuffer();
int low = 0;
int topRec = (int)pof.getNumRecs() - 1;
int high = topRec;
int mid;
while( low <= high ) {
mid = ( low + high ) / 2;
long midPair = lb.get(mid);
long midTermId = topUInt(midPair);
int cmp = vfPair.cmp(termValue1, midTermId);
if (cmp < 0) {
high = mid - 1;
} else if (cmp > 0) {
low = mid;
} else {
low = mid;//return bottomUInt(midPair);
}
}
return low;
}
Scanner[] scanRange(byte[] termValue1, byte[] termValue2,
boolean term1IsExclusive, boolean term2IsExclusive,
long revNum) {
LongBuffer lb = pof.getLongBuffer();
int low = lookupTermIndex(termValue1);
// because of concurrent insertions, target may have moved ahead; that's why we continue to check the lower bound
ArrayList<Scanner> scanners = new ArrayList<Scanner>();
int lbLimit = lb.limit() / 8;
while(true) {
long pair = lb.get(low++);
long termId = topUInt(pair);
int page = (int)bottomUInt(pair);
boolean isRanged = (page & RANGE_MASK) == RANGE_MASK;
int cmp1 = vfPair.cmp(termValue1, termId);
int cmp2 = vfPair.cmp(termValue2, termId);
// cmp3 compares the term2 to the next iteration's term
int cmp3 = (low >= lbLimit) ? 1 :
vfPair.cmp(termValue2, topUInt(lb.get(low)));
if (cmp1 == 0) {
if (term1IsExclusive && !isRanged) continue;
}
if (cmp2 >= 0) {
if (cmp2 < 0) break;
if (term2IsExclusive) break;
}
Scanner scanner = null;
if (isRanged) {
byte[] minTerm = (cmp1 > 0) ? termValue1 : null;
byte[] maxTerm = (cmp3 < 0) ? termValue2 : null;
scanner = new RangeScanner(pvf, page, vfPair,
minTerm, term1IsExclusive,
maxTerm, term2IsExclusive,
revNum);
} else {
scanner = new IntScanner(pvf, page);
}
scanners.add(scanner);
}
return scanners.toArray(new Scanner[scanners.size()]);
}
@Override
public void close() {
pof.close();
pvf.close();
ddf.close();
dfPair.close();
vfPair.close();
}
@Override
public Scanner fetchDeletions(long revNum) {
return new DelScanner(pvf, ddf.getLongBuffer(), revNum);
}
@Override
public BagIndexDoc fetchDoc(long docId) {
// TODO Auto-generated method stub
return null;
}
@Override
public Scanner fetchRange(byte[] term1, byte[] term2, boolean isExclusive1, boolean isExclusive2, long revNum) {
Scanner[] scanners = scanRange(term1, term2, isExclusive1, isExclusive2, revNum);
return ScannerUtil.disjunctiveScanner(scanners);
}
@Override
public Scanner fetchTd(byte[] term, long revNum) {
Scanner[] scanners = scanRange(term, term, false, false, revNum);
return scanners[0];
}
@Override
public String getHomedir() {
// TODO Auto-generated method stub
return null;
}
@Override
public BagIndexDoc refetchDoc(BagIndexDoc doc, long docId) {
// TODO Auto-generated method stub
}
@Override
public void setHomedir(String homeDir) {
// TODO Auto-generated method stub
}
}