Package dovetaildb.bagindex

Source Code of dovetaildb.bagindex.RawBagIndex$TermInDocRec

package dovetaildb.bagindex;

import gnu.trove.TIntIntHashMap;
import gnu.trove.TIntObjectHashMap;

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import dovetaildb.dbservice.DbServiceUtil;
import dovetaildb.fileaccessor.BitFieldFile;
import dovetaildb.fileaccessor.FileAccessor;
import dovetaildb.fileaccessor.FileOfLongs;
import dovetaildb.fileaccessor.OffsetValueFilePair;
import dovetaildb.fileaccessor.PagedFile;
import dovetaildb.scan.AbstractScanner;
import dovetaildb.scan.Scanner;
import dovetaildb.scan.ScannerUtil;
import dovetaildb.util.BinarySearcher;
import dovetaildb.util.MutableInt;
import dovetaildb.util.Pair;
import dovetaildb.util.Util;

public class RawBagIndex extends BagIndex {
 
  /** subtract this mask for actual page offset; if bit is set, page is a ranged value page. */
  private static final int RANGE_MASK = 0x80000000;

  /** doc deletion file: 4 byte doc id, 4 byte page pointer */
  FileOfLongs         ddf;
 
  /** postinglist offset file: 4 byte term pointer, 4 byte page pointer */
  FileOfLongs         pof;
 
  /** 
   * A postinglist value file; different page formats:
   *  00  <val:4><docid:4>,...  (mixed posting list, docid order)
   *  01  <prevpage:4>,<nextpage:4>,<docid:4>,...  (single term posting list, docid order)
   *  10  <revDeletedIn:4><docid:4>,...  (deletion page, docid order)
   */
  PagedFile           pvf;
 
  /** document field data: list of term pointers */
  OffsetValueFilePair dfPair;

  /** value field data */
  OffsetValueFilePair vfPair;

  long                maxDocId;
 
 
  static final class IntScanner extends AbstractScanner {
    private final IntBuffer ib;
    private final PagedFile pvf;
    private final int firstPage;
    private int nextPage, offset;
    private long docId;
    IntScanner(PagedFile pvf, int page) {
      this.pvf = pvf;
      firstPage = page;
      ib = pvf.getIntBuffer();
      setupNewPage(page);
    }
    private void setupNewPage(int page) {
      offset = pvf.getIntOffestForPage(page);
      nextPage = ib.get(offset+1);
      offset += 2;
      docId = ib.get(offset) & 0xffffffff;
    }
    public long doc() {  return docId; }
    public boolean next() {
      docId = ib.get(offset) & 0xffffffff;
      if (docId == 0) {
        if (nextPage == firstPage) return false;
        setupNewPage(nextPage);
      }
      if (docId == 0) return false;
      offset++;
      return true;
    }
  }
 
  static abstract class LongScanner extends AbstractScanner {
    protected final LongBuffer lb;
    protected int offset;
    protected long docId, data;
    LongScanner(PagedFile pvf, int page) {
      offset = pvf.getLongOffestForPage(page);
      lb = pvf.getLongBuffer();
    }
    public long doc() {  return docId; }
    public boolean next() {
      long val = lb.get(offset++);
      docId = bottomUInt(val);
      data = topUInt(val);
      return docId != 0;
    }
  }

  static final class DelScanner extends AbstractScanner {
    final PagedFile pvf;
    DelPageScanner pageScanner;
    final LongBuffer delPages;
    int delPagesIndex, docId;
    final int delPagesMax;
    final long revNum;
    public DelScanner(PagedFile pvf, LongBuffer delPages, long revNum) {
      this.pvf = pvf;
      this.delPages = delPages;
      this.revNum = revNum;
      delPagesIndex = 0;
      delPagesMax = delPages.limit();
      newPage();
    }
    private void newPage() {
      if (delPagesIndex >= delPagesMax) return;
      long pair = delPages.get(delPagesIndex++);
      int pageNum = (int)bottomUInt(pair);
      pageScanner = new DelPageScanner(pvf, pageNum, revNum);
    }
    public long doc() {
      return pageScanner.doc();
    }
    public boolean next() {
      boolean hasNext = pageScanner.next();
      if (!hasNext) newPage();
      return pageScanner.next();
    }
   
  }
 
  static final class DelPageScanner extends LongScanner {
    private final long revNum;
    DelPageScanner(PagedFile pvf, int page, long revNum) {
      super(pvf, page);
      this.revNum = revNum;
    }
    public boolean next() {
      while(super.next()) {
        if (data <= revNum) return true;
      }
      return false;
    }   
  }
 
  static final class RangeScanner extends LongScanner {
    private final long revNum;
    private final byte[] minTermValue, maxTermValue;
    private final int minCmp, maxCmp;
    private final OffsetValueFilePair vfPair;
    RangeScanner(PagedFile pvf, int page, OffsetValueFilePair vfPair,
        byte[] minTermValue, boolean minIsExclusive,
        byte[] maxTermValue, boolean maxIsExclusive,
        long revNum) {
      super(pvf, page);
      this.vfPair = vfPair;
      this.minTermValue = minTermValue;
      this.maxTermValue = maxTermValue;
      this.minCmp = minIsExclusive ?  1 : 0;
      this.maxCmp = maxIsExclusive ? -1 : 0;
      this.revNum = revNum;
    }
    public boolean next() {
      while(super.next()) {
        if (minTermValue != null)
          if (vfPair.cmp(minTermValue, data) >= minCmp) continue;
        if (maxTermValue != null)
          if (vfPair.cmp(maxTermValue, data) <= maxCmp) continue;
        if (docId > revNum) return false;
        return true;
      }
      return false;
    }   
  }
 
  private void deleteInRev(long[] deletions, long revNum) {
    int oldNumRecs = (int) ddf.getNumRecs();
    long numRecs = oldNumRecs + deletions.length;
    ddf.setNumRecs(numRecs);
    LongBuffer lb = ddf.getLongBuffer();
    int delIdx = deletions.length - 1;
    int readIdx = oldNumRecs - 1;
    int writeIdx = (int) (numRecs - 1);
    while(delIdx >= 0) {
      int targetCmpVal = (int) topUInt(deletions[delIdx]);
      long val = lb.get(readIdx--);
      while(topUInt(val) > targetCmpVal) {
        lb.put(writeIdx--, val);
        val = lb.get(readIdx--);
      }
      lb.put(writeIdx--, deletions[delIdx--]);
    }
  }
 
  static class TermInDocRec implements Comparable {
    byte[] term;
    long docId;
    long termIdx;
    public TermInDocRec(byte[] term, long termIdx, long docId) {
      this.term = term;
      this.termIdx = termIdx;
      this.docId = docId;
    }
    public int compareTo(Object otherObj) {
      TermInDocRec other = (TermInDocRec)otherObj;
      int cmp = Util.compareBytes(this.term, other.term);
      if (cmp == 0) {
        cmp = (int)(this.docId - other.docId);
      }
      return cmp;
    }
  }

  public long commitNewRev(long[] deletions, Collection<Pair<byte[][],byte[][]>> inserts) {
    long oldMaxDocId = maxDocId;
    int numInserts = inserts.size();
    long revNum = maxDocId + numInserts;
    if (numInserts == 0) {
      // commits with only deletions occupy a docId space, so pre-delete it
      inserts.add(new Pair<byte[][],byte[][]>(null,null));
      revNum++;
    }
   
    Arrays.sort(deletions);
    deleteInRev(deletions, revNum);
   
    ArrayList<TermInDocRec> termToId = new ArrayList<TermInDocRec>();
    for(Pair<byte[][],byte[][]> docPair : inserts) {

      byte[][] indexTerms = docPair.getLeft();
      byte[][] storeTerms = docPair.getRight();
     
      // write to dfPair for each doc (both sets of terms)
     
      byte[] dvfBuffer = new byte[4 * (indexTerms.length+storeTerms.length)];
      int dvfIdx = 0;
      for(byte[][] terms : new byte[][][]{indexTerms,storeTerms}) {
        for(byte[] indexTerm : terms) {
          long valIdx = vfPair.append(indexTerm);
          Util.leUIntToBytes(valIdx, dvfBuffer, dvfIdx);
          dvfIdx += 4;
          if (terms == indexTerms) {
            termToId.add(new TermInDocRec(indexTerm, valIdx, maxDocId));
          }
        }
      }
      dfPair.append(dvfBuffer);
      maxDocId++;
    }
    Collections.sort(termToId);
    int termToIdSize = termToId.size();
    for(int i=0; i < termToIdSize; i++) {
      TermInDocRec rec = termToId.get(i);
      appendTermDoc(rec.term, rec.termIdx, rec.docId);
    }
    return maxDocId;
  }
 
  private void appendTermDoc(byte[] term, long termIdx, long docId) {
    int termIndex = lookupTermIndex(term);
    long page = bottomUInt(pof.get(termIndex));
      boolean isRanged = (page & RANGE_MASK) == RANGE_MASK;
      page = page & ~RANGE_MASK;
    if (!isRanged) {
      appendToLinkedPages(page, docId);
    } else {
      appendToRangedPage(term, page, termIdx, docId);
    }
  }

  private void appendToLinkedPages(long page, long docId) {
    IntBuffer ib = pvf.getIntBuffer();
    int offset = pvf.getIntOffestForPage((int)page);
    // prev_page:4, next_page:4
    long prevPage = ib.get(offset) & 0xffffffff;
    int ppOffset = pvf.getIntOffestForPage((int)prevPage)+2;
    int maxOffset = pvf.getIntOffestForPage((int)prevPage+1);
    for(; ppOffset < maxOffset; ppOffset++) {
      int val = ib.get(ppOffset);
      if (val == 0) {
        ib.put(ppOffset, (int)docId);
        return;
      }
    }
    // (page full); insert new
    int newPage = pvf.newPageIndex();
    int i = pvf.getIntOffestForPage((int)newPage);
    ib.put(i+0, (int)prevPage);
    ib.put(i+4, (int)page);
    ib.put(i+8, (int)docId);
    ib.put(i+12,0);
    // now, repoint
    ib.put(offset+0,   newPage);
    ib.put(ppOffset+4, newPage);
  }

  private final ReadWriteLocker pageLocker = new ReadWriteLocker();
 
  public void appendToRangedPage(byte[] term, long page, long termIdx, long docId) {
    LongBuffer lb = pvf.getLongBuffer();
    int offset    = pvf.getLongOffestForPage((int)page);
    int maxOffset = pvf.getLongOffestForPage((int)page+1) - 1;
    long rec = termIdx << 32 | docId;
    for(; offset < maxOffset; offset++) {
      long val = lb.get(offset);
      if (bottomUInt(val) == 0) { // terminating position
        lb.put(offset, rec);
        lb.put(offset+1, 0);
        return;
      }
    }
   
    // All full; split page:
    ArrayList<TermInDocRec> terms = new ArrayList<TermInDocRec>();
    offset = pvf.getLongOffestForPage((int)page);
    for(; offset < maxOffset; offset++) {
      long val = lb.get(offset);
      long curTermIdx = topUInt(val);
      byte[] curTerm = vfPair.valueAt((int)curTermIdx);
      terms.add(new TermInDocRec(curTerm, rec, rec));
     
    }
    int cutIndex = (terms.size() * 4) / 10;
   
    if (Util.compareBytes(terms.get(cutIndex).term, terms.get(0).term) == 0) {
      // Initial segment is worthy of a single-value posting list
      extractSinlgePostingList(termIdx);
    } else if (Util.compareBytes(terms.get(cutIndex).term, terms.get(terms.size()-1).term) == 0) {
      // Final segment is worthy of a single-value posting list
      extractSinlgePostingList(termIdx);
    } else {
      // split the range in half:
      ArrayList<TermInDocRec> subTerms1 = new ArrayList<TermInDocRec>();
      ArrayList<TermInDocRec> subTerms2 = new ArrayList<TermInDocRec>();
      Collections.copy(terms.subList(0, cutIndex+1), subTerms1);
      Collections.copy(terms.subList(cutIndex+1, terms.size()), subTerms2);
    }
    // recurse once to do the actual insertion
    appendTermDoc(term, termIdx, docId);
   
   
   
//    pageLocker.lockForWrite((int)page);
//    try {
//      int highPage = pvf.newPageIndex();
//     
//    } finally {
//      pageLocker.unlock((int)page);
//    }
  }

  private void extractSinlgePostingList(long termIdx) {
   
  }
 
  static final class ReadWriteLocker {
    final TIntObjectHashMap<MutableInt> map = new TIntObjectHashMap<MutableInt>();
    public void lockForRead(int i) {
      MutableInt val;
      synchronized(map) {
        if (! map.containsKey(i)) {
          map.put(i, new MutableInt(1));
          return;
        }
        val = map.get(i);
      }
      synchronized(val)  {
        while (val.get() == -1) {
          try {
            val.wait();
          } catch (InterruptedException e) {
            throw new RuntimeException(e);
          }
        }
        val.incr();
      }
    }
    public void lockForWrite(int i) {
      MutableInt val;
      synchronized(map) {
        if (! map.containsKey(i)) {
          map.put(i, new MutableInt(-1));
          return;
        }
        val = map.get(i);
      }
      synchronized(val)  {
        while (val.get() > 0) {
          try {
            val.wait();
          } catch (InterruptedException e) {
            throw new RuntimeException(e);
          }
        }
        val.set(-1);
      }
    }
    public void unlock(int i) {
      MutableInt val;
      synchronized(map) {
        val = map.get(i);
      }
      synchronized(val)  {
        int curVal = val.get();
        if (curVal == -1) {
          val.set(0);
          val.notify();
        } else {
          val.decr();
          if (curVal == 1)
          val.notify();
        }
      }
    }
  }
 
  /*
   *  public static int binarySearch( Comparable [ ] a, Comparable x )
    {
        int low = 0;
        int high = a.length - 1;
        int mid;
        while( low <= high )
        {
            mid = ( low + high ) / 2;

            if( a[ mid ].compareTo( x ) < 0 )
                low = mid + 1;
            else if( a[ mid ].compareTo( x ) > 0 )
                high = mid - 1;
            else
                return mid;
        }
        return NOT_FOUND;     // NOT_FOUND = -1
    }
   */
 
  private static long topUInt(long l) {
    return l >>> 32;
  }
 
  private static long bottomUInt(long l) {
    return l & 0xFFFFFFFF;
  }
 
  int lookupTermIndex(byte[] termValue1) {
    LongBuffer lb = pof.getLongBuffer();
    int low = 0;
    int topRec = (int)pof.getNumRecs() - 1;
    int high = topRec;
    int mid;
        while( low <= high ) {
            mid = ( low + high ) / 2;
        long midPair   = lb.get(mid);
        long midTermId = topUInt(midPair);
        int cmp = vfPair.cmp(termValue1, midTermId);
        if      (cmp < 0) {
          high = mid - 1;
        } else if (cmp > 0) {
          low = mid;
        } else {
          low = mid;//return bottomUInt(midPair);
        }
        }
        return low;
  }
 
  Scanner[] scanRange(byte[] termValue1, byte[] termValue2,
      boolean term1IsExclusive, boolean term2IsExclusive,
      long revNum) {
    LongBuffer lb = pof.getLongBuffer();
    int low = lookupTermIndex(termValue1);
        // because of concurrent insertions, target may have moved ahead; that's why we continue to check the lower bound
        ArrayList<Scanner> scanners = new ArrayList<Scanner>();
        int lbLimit = lb.limit() / 8;
        while(true) {
          long pair = lb.get(low++);
          long termId = topUInt(pair);
          int page = (int)bottomUInt(pair);
          boolean isRanged = (page & RANGE_MASK) == RANGE_MASK;
          int cmp1 = vfPair.cmp(termValue1, termId);
          int cmp2 = vfPair.cmp(termValue2, termId);
           // cmp3 compares the term2 to the next iteration's term
          int cmp3 = (low >= lbLimit) ? 1 :
            vfPair.cmp(termValue2, topUInt(lb.get(low)));
          if (cmp1 == 0) {
            if (term1IsExclusive && !isRanged) continue;
          }
          if (cmp2 >= 0) {
            if (cmp2 < 0) break;
            if (term2IsExclusive) break;
          }
          Scanner scanner = null;
          if (isRanged) {
            byte[] minTerm = (cmp1 > 0) ? termValue1 : null;
            byte[] maxTerm = (cmp3 < 0) ? termValue2 : null;
            scanner = new RangeScanner(pvf, page, vfPair,
                minTerm, term1IsExclusive,
                maxTerm, term2IsExclusive,
                revNum);
          } else {
            scanner = new IntScanner(pvf, page);
          }
          scanners.add(scanner);
        }
      return scanners.toArray(new Scanner[scanners.size()]);
  }

  @Override
  public void close() {
    pof.close();
    pvf.close();
    ddf.close();
    dfPair.close();
    vfPair.close();
  }
 
  @Override
  public Scanner fetchDeletions(long revNum) {
    return new DelScanner(pvf, ddf.getLongBuffer(), revNum);
  }

  @Override
  public BagIndexDoc fetchDoc(long docId) {
    // TODO Auto-generated method stub
    return null;
  }

  @Override
  public Scanner fetchRange(byte[] term1, byte[] term2, boolean isExclusive1, boolean isExclusive2, long revNum) {
    Scanner[] scanners = scanRange(term1, term2, isExclusive1, isExclusive2, revNum);
    return ScannerUtil.disjunctiveScanner(scanners);
  }

  @Override
  public Scanner fetchTd(byte[] term, long revNum) {
    Scanner[] scanners = scanRange(term, term, false, false, revNum);
    return scanners[0];
  }

  @Override
  public String getHomedir() {
    // TODO Auto-generated method stub
    return null;
  }

  @Override
  public BagIndexDoc refetchDoc(BagIndexDoc doc, long docId) {
    // TODO Auto-generated method stub
   
  }

  @Override
  public void setHomedir(String homeDir) {
    // TODO Auto-generated method stub
   
  }


}
TOP

Related Classes of dovetaildb.bagindex.RawBagIndex$TermInDocRec

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.