Package dovetaildb.bagindex

Source Code of dovetaildb.bagindex.BlueSteelBagIndex

package dovetaildb.bagindex;

import gnu.trove.TLongLongHashMap;

import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import dovetaildb.bytes.ArrayBytes;
import dovetaildb.bytes.Bytes;
import dovetaildb.bytes.CompoundBytes;
import dovetaildb.bytes.SlicedBytes;
import dovetaildb.querynode.AbstractRangeQueryNode;
import dovetaildb.querynode.FilteredRangeQueryNode;
import dovetaildb.querynode.LiteralRangeQueryNode;
import dovetaildb.querynode.OrderedOrQueryNode;
import dovetaildb.querynode.QueryNode;
import dovetaildb.querynode.RangeQueryNode;
import dovetaildb.util.PriorityQueue;
import dovetaildb.util.Util;

public abstract class BlueSteelBagIndex extends BagIndex {

 
  /*
   * Term tree alternative:
   *
   * TokenTable:
   *   Fixed prefix token (optional)
   *   An ordered list of Tokens
   *  
   * TokenRec:
   *   1 byte of term content
   *   offset pointer to sub TokenTable
   *   SegmentPush (pointer to doc id list + count)
   *  
   * TermRec is either a DocRec or a SegmentPush.
   *
   * DocRec:
   *   Doc id delta
   *   Partial term
   *     (not including term tree already traversed and
   *      not including suffix covered by subterms, if any)
   * SegmentPush is:
   *   doc id list offset pointer
   *   document count
   */


  public static class TokenTable {
    private Bytes fixedPrefix;
    private ArrayList<TokenRec> tokenRecs;
    TokenTable(Bytes fixedPrefix, ArrayList<TokenRec> tokenRecs) {
      this.fixedPrefix = fixedPrefix;
      this.tokenRecs = tokenRecs;
    }
    public TokenTable(byte prefixByte, ArrayList<TokenRec> tokenRecs) {
      this(new ArrayBytes(new byte[]{prefixByte}), tokenRecs);
    }
    public Bytes getFixedPrefix() { return fixedPrefix; }
    public ArrayList<TokenRec> getTokenRecs() { return tokenRecs; }
    public String toString() {
      String s="\nTokenTable("+fixedPrefix+"):\n";
      for(TokenRec r:tokenRecs) {
        s += r+"\n";
      }
      return s;
    }
  }
  public static class TokenRec {
    private byte token;
    private TokenTable tokenTable;
    private SegmentPush segmentPush;
    public TokenRec(byte token, TokenTable tokenTable, SegmentPush segmentPush) {
      this.token = token;
      this.tokenTable = tokenTable;
      this.segmentPush = segmentPush;
    }
    public byte getToken() { return token; }
    public TokenTable getTokenTable() { return tokenTable; }
    public SegmentPush getSegmentPush() { return segmentPush; }
    public String toString() {
      return "('"+((char)token)+"') summary: "+segmentPush+"\n"+tokenTable;
    }
  }
  public static class PostingNode {
    private long docId;
    private Bytes token;
    private SegmentPush push; // optional
   
    // must have either this (for nonmaterialized):
    public PostingNode next;
    // or this (for materialized):
    private long idxPos;

    public PostingNode(long idxPos) {
      this.idxPos = idxPos;
    }
    public PostingNode(long docId, Bytes token) {
      this.docId = docId;
      this.token = token;
    }
    public PostingNode(SegmentPush push, long docId, Bytes token) {
      this(docId, token);
      this.push = push;
    }
    public long getDocId() { return docId; }
    public Bytes getToken() { return token; }
    public long getCount() {
      if (push == null) return 1;
      else return push.count + 1;
    }
    public SegmentPush getPush() { return push; }
    public String toString() {
      return "PostingNode dId:"+docId+" tok:"+token+" psh:"+push;
    }
    public void copyInto(PostingNode prev) {
      prev.docId = docId;
      prev.idxPos = idxPos;
      prev.push = push;
      prev.token = token;
    }
    public void setIdxPos(int idxPos) {
      this.idxPos = idxPos;
    }
    public PostingNode next() {
      return next;
    }
    public int compareTo(EditRec e) {
      long ret = getDocId() - e.docId;
      if (ret == 0) return token.compareTo(e.term);
      else return (ret > 0) ? 1 : -1;
    }
   
  }
  public static class SegmentPush {
    public long count;
    public PostingNode leadNode;
    public long idxPos;
    public List<PostingNode> nodes;
    public SegmentPush(List<PostingNode> output) {
      nodes = output;
      idxPos = 0;
      count = 0;
      PostingNode lastNode = null;
      for (PostingNode node : nodes) {
        count += node.getCount();
        if (lastNode != null) lastNode.next = node;
        lastNode = node;
      }
    }
    public SegmentPush(PostingNode start, long count) {
      this.idxPos = start.idxPos;
      this.leadNode = start;
      this.count = count;
      this.nodes = null;
    }
    public SegmentPush() {
      idxPos = 0;
      count = 0;
      this.nodes = new ArrayList<PostingNode>();
    }
    public String toString() {
      if (nodes != null) {
        return "SegmentPush"+nodes;
      } else if (leadNode != null) {
        return "SegmentPush(lead="+leadNode+",ct="+count+")";
      } else {
        return "SegmentPush@"+this.idxPos;
      }
    }
    public long getIdxPos() { return idxPos; }
    public long getCount() { return count; }
    static class NodesItr  implements CopyableIterator<PostingNode> {
      int nextIdx = 0;
      public List<PostingNode> nodes;
      NodesItr() {}
      NodesItr(List<PostingNode> nodes) {
        this.nodes = nodes;
      }
      public CopyableIterator<PostingNode> copyInto(CopyableIterator<PostingNode> targetItr) {
        NodesItr target;
        if (targetItr == null || !(targetItr instanceof NodesItr)) {
          target = new NodesItr();
        } else {
          target = (NodesItr)targetItr;
        }
        target.nextIdx = nextIdx;
        target.nodes = nodes;
        return target;
       
      }
      public boolean hasNext() {
        return nextIdx < nodes.size();
      }
      public PostingNode next() {
        return nodes.get(nextIdx++);
      }
      public void remove() { throw new RuntimeException(); }
    }
    static class SegPushItr implements CopyableIterator<PostingNode> {
      PostingNode node;
      long remaining;
      public SegPushItr() {}
      public SegPushItr(PostingNode node, long remaining) {
        this.node = node;
        this.remaining = remaining;
      }
      public boolean hasNext() {
//        return remaining > 0;
        return node != null;
      }
      public PostingNode next() {
        PostingNode ret = node;
        remaining -= node.getCount();
        if (remaining > 0) node = node.next();
        else node = null;
        return ret;
       
//        if (node==null) node = leadNode;
//        else node = node.next();
//        remaining -= node.getCount();
//        return node;
      }
      public void remove() {
        throw new RuntimeException();
      }
      public CopyableIterator<PostingNode> copyInto(CopyableIterator<PostingNode> targetItr) {
        SegPushItr target;
        if (targetItr == null || !(targetItr instanceof SegPushItr)) {
          target = new SegPushItr();
        } else {
          target = (SegPushItr)targetItr;
        }
        target.node = node;
        target.remaining = remaining;
        return target;
      }
    }
    public CopyableIterator<PostingNode> iterator() {
      if (nodes != null) {
        return new NodesItr(nodes);
      } else {
        return new SegPushItr(leadNode, count);
      }
    }
  }
 
  public static ArrayList<EditRec> popTermBytes(ArrayList<EditRec> editBuffer) {
    ArrayList<EditRec> newTermBytes = new ArrayList<EditRec>();
    for(EditRec edit : editBuffer) {
      Bytes term = edit.term;
      term = term.subBytes(1, term.getLength()-1);
      newTermBytes.add(new EditRec(edit.docId, term, edit.isDeletion));
    }
    return newTermBytes;
  }

  static abstract class MergeIterator<A,B> {
    Iterator<A> a;
    Iterator<B> b;
    A curA;
    B curB;
    boolean hasLeft, hasRight;
    public MergeIterator() {}
    public MergeIterator(Iterator<A> a, Iterator<B> b) {
      this.a = a;
      this.b = b;
      hasLeft = hasRight = true;
    }
    public A getLeft()  { return hasLeft  ? curA : null; }
    public B getRight() { return hasRight ? curB : null; }
    public void next() {
      if (hasLeft) {
        curA = (a.hasNext()) ? a.next() : null;
      }
      if (hasRight) {
        curB = (b.hasNext()) ? b.next() : null;
      }
      if (curA != null && curB != null) {
        int cmp = compare(curA, curB);
        hasLeft  = (cmp <= 0);
        hasRight = (cmp >= 0);
      } else {
        hasLeft  = curA != null;
        hasRight = curB != null;
      }
    }
    public abstract int compare(A a, B b);
  }
  static final class TokenEditMergeIterator extends MergeIterator<TokenRec, EditRec> {
    TokenEditMergeIterator(Iterator<TokenRec> a, Iterator<EditRec> b) {
      super(a, b);
    }
    @Override
    public int compare(TokenRec a, EditRec b) {
      int tok  = 0xFF & a.getToken();
      int edit = 0xFF & b.term.get(0);
      return tok - edit;
    }
  }
  static final class NodeEditMergeIterator extends MergeIterator<PostingNode, EditRec> {
    NodeEditMergeIterator(Iterator<PostingNode> a, Iterator<EditRec> b) {
      super(a, b);
    }
    @Override
    public int compare(PostingNode a, EditRec b) {
      return a.compareTo(b);
    }
  }
  public static TokenTable applyEditsToTokenTable(Collection<EditRec> edits, TokenTable table) {
    // prereq: edits are ordered by term, then by doc id
    if (edits.isEmpty()) return table;
    if (table == null) return null;
    /*
    Bytes fixedPrefix = table.getFixedPrefix();
    if (fixedPrefix != null) {
      // ensure all items match
      boolean allMatch = true;
      if (! allMatch) {
        // split table
      }
    }
    */
    TokenEditMergeIterator merge = new TokenEditMergeIterator(
        table.getTokenRecs().iterator(),
        edits.iterator());
    TokenRec parent = null;
    ArrayList<EditRec> editBuffer = new ArrayList<EditRec>();
    byte editBufferByte = 0;
    ArrayList<TokenRec> output = new ArrayList<TokenRec>();
    while(true) {
      merge.next();
      TokenRec rec = merge.getLeft();
      EditRec    edit = merge.getRight();
     
     
      // save away rec if co-occurs with edit,
      // otherwise write.
     
      if ( edit == null || edit.term.get(0) != editBufferByte) {
        // cut a new tree if (1) edit byte changes or
        // (2) edit is missing
        if (! editBuffer.isEmpty()) {
          ArrayList<EditRec> subTokenTableEdits = popTermBytes(editBuffer);
          SegmentPush push;
          TokenTable subTable;
          if (parent == null) {
            subTable = null;
            push = new SegmentPush(new ArrayList<PostingNode>());
          } else {
            subTable = parent.getTokenTable();
            push = parent.getSegmentPush();
          }
          subTable = applyEditsToTokenTable(subTokenTableEdits, subTable);
          EditRec.sortById(subTokenTableEdits);
          SegmentPush termList = spliceEditsIntoSegmentPush(subTokenTableEdits, push);
          output.add(new TokenRec(editBufferByte, subTable, termList));
          editBuffer = new ArrayList<EditRec>();
          parent = null;
        }
      }
      if (edit != null) {
        editBufferByte = (byte)edit.term.get(0);
        editBuffer.add(edit);
        if (rec != null) parent = rec;
      } else {
        if (rec != null) output.add(rec);
        else break;
      }
    }
    return new TokenTable(null, output);
  }

  public static class AdjacencyRec {
    LinkedList<PostingNode> node1, node2;
    AdjacencyRec left, right;
    long ct1, ct2;
    SegmentPush newPush = null;
    boolean dirty = false;
    public AdjacencyRec(PostingNode node2, AdjacencyRec left) {
      this.node2 = new LinkedList<PostingNode>();
      this.node2.add(node2);
      this.left = left;
      if (left == null) {
        this.node1 = null;
      } else {
        left.right = this;
        this.node1 = left.node2;
        ct1 = node1.get(0).getCount();
      }
      ct2 = node2.getCount();
    }
    public AdjacencyRec() {
    }
    public AdjacencyRec(PostingNode firstNode, PostingNode secondNode) {
      this.node1 = new LinkedList<PostingNode>();
      this.node2 = new LinkedList<PostingNode>();
      node1.add(firstNode);
      node2.add(secondNode);
      this.left = this.right = null;
      this.ct1 = firstNode.getCount();
      this.ct2 = secondNode.getCount();
    }
  }
 
 
  public static SegmentPush balanceSegmentPush(SegmentPush segment, int threshold) {
    long ct = segment.count;
    if (ct <= threshold) return segment;
    PriorityQueue queue = new PriorityQueue(500) {
      @Override
      public int compare(Object a, Object b) {
        AdjacencyRec r1 = (AdjacencyRec)a;
        AdjacencyRec r2 = (AdjacencyRec)b;
        return (int)((r1.ct1+r2.ct2) - (r2.ct1+r2.ct2));
      }
    };
    Iterator<PostingNode> itr = segment.iterator();
    PostingNode firstNode = itr.next();
    PostingNode secondNode = itr.next();
    AdjacencyRec firstAdj = new AdjacencyRec(firstNode, secondNode);
    queue.insertAndGrow(firstAdj);
    AdjacencyRec prevAdj = firstAdj;
    while(itr.hasNext()) {
      PostingNode curNode = itr.next();
      AdjacencyRec adj = new AdjacencyRec(curNode, prevAdj);
      queue.insertAndGrow(adj);
      prevAdj = adj;
    }
    while(queue.size >= threshold) {
      AdjacencyRec rec = (AdjacencyRec)queue.pop();
      if (rec.dirty) {
        rec.dirty = false;
        queue.insert(rec);
      } else {
        rec.node1.addAll(rec.node2);
        if (rec.left != null) {
          rec.left.right = rec.right;
          rec.left.ct2 += rec.ct2;
          rec.left.dirty = true;
        } else {
          firstAdj = rec.right;
        }
        if (rec.right != null) {
          rec.right.left = rec.left;
          rec.right.node1 = rec.node1;
          rec.right.ct1 += rec.ct1;
          rec.right.dirty = true;
        }
      }
    }
    AdjacencyRec adj = firstAdj;
    SegmentAccumulator accum = new SegmentAccumulator();
    while(true) {
      accum.addManyAsBalancedNode(adj.node1, threshold);
      if (adj.right == null) {
        accum.addManyAsBalancedNode(adj.node2, threshold);
        break;
      }
      adj = adj.right;
    }
    return new SegmentPush(accum.output);
  }

  static interface CopyableIterator<T> extends Iterator<T> {
    // If possible, copy the state of <this> into <target>
    // (and return <target>).  If not possible allocate a
    // copy of <this> and return the newly allocated copy.
    public CopyableIterator<T> copyInto(CopyableIterator<T> target);
  }
 
  static final class TraversalStackDelme {
    TraversalStackDelme parent;
    CopyableIterator<PostingNode> pointer;
    PostingNode current;
    long cap;
   
    public TraversalStackDelme() {}
    public TraversalStackDelme(SegmentPush push) {
      parent = null;
      pointer = push.iterator();
      current = null;
      cap = push.count;
    }
    public void copyInto(TraversalStackDelme o) {
      o.parent = parent;
      o.pointer = pointer;
      o.current = current;
      o.cap = cap;
    }
    public boolean isAt(EditRec edit) {
      return (edit==null) ? false : current.compareTo(edit) >= 0;
    }
    public void next() {
      current = pointer.next();
      cap -= current.getCount();
    }
    public void up() {
      parent.copyInto(this);
    }
    public void down() {
      long childCap = current.push.count;
      if (cap >= 0) {
        TraversalStackDelme newParent = new TraversalStackDelme();
        copyInto(newParent);
        parent = newParent;
      } else { // transfer deficit to subsection
        childCap += (cap+1);
      }
      CopyableIterator<PostingNode> childPointer = current.push.iterator();
     
      cap = childCap;
      pointer = childPointer;
      current = null;
    }
    public boolean nextOrDown() {
      if (cap <= 0) {
        if (parent == null) {
          return false;
        } else {
          up();
        }
        return true;
      }
      next();
      while(current.getPush() != null && current.getPush().getCount()>0) {
        down();
        next();
      };
      return true;
    }
    public boolean nextToward(EditRec edit) {
      // returns true if the is a summary node visit
      if (cap <= 0) {
        if (parent == null) {
          current = null;
        } else {
          up();
        }
        return true;
      }
      next();
      while(
        current.getPush() != null &&
        current.getPush().getCount()>0 &&
        isAt(edit))
      {
        down();
        next();
      };
      return false;
    }
    public boolean isEqual(EditRec edit) {
      return (current.docId == edit.docId &&
          current.token.equals(edit.term));
    }
  }

  enum CheckpointStatus {NONE, UNCHANGED, CHANGED};
  static class StackRec {
    private PostingNode current, chkCurrent;
    private CopyableIterator<PostingNode> pointer, chkPointer;
    private long cap, chkCap;
    private CheckpointStatus checkpoint = CheckpointStatus.NONE;
    StackRec() {}
    void set(CopyableIterator<PostingNode> pointer, long cap) {
      dirty();
      this.pointer = pointer;
      this.cap = cap;
      this.current = null;
    }
    void checkpoint() {
      if (checkpoint == CheckpointStatus.NONE) {
        checkpoint = CheckpointStatus.UNCHANGED;
      }
    }
    private boolean uncheckpoint() {
      boolean wasCheckpointed = (checkpoint != CheckpointStatus.NONE);
      checkpoint = CheckpointStatus.NONE;
      chkPointer = null; // allow gc
      return wasCheckpointed;
    }
    public boolean rewindToCheckpoint() {
      if (checkpoint == CheckpointStatus.CHANGED) {
        current = chkCurrent;
        chkPointer.copyInto(pointer);
        cap = chkCap;
      }
      if (checkpoint == CheckpointStatus.NONE) return false;
      checkpoint = CheckpointStatus.NONE;
      chkPointer = null; // allow gc
      return true;
    }
    private void dirty() {
      if (checkpoint == CheckpointStatus.UNCHANGED) {
        chkCurrent = current;
        pointer.copyInto(chkPointer);
        chkCap = cap;
        checkpoint = CheckpointStatus.CHANGED;
      }
    }
    public void next() {
      dirty();
      current = pointer.next();
      cap -= current.getCount();
    }
    public void clearCurrent() {
      dirty();
      current = null;
    }
  }
  static final class TraversalStack {
    StackRec[] primary;
    int bottom = -1;
    int chkBottom = -1;
   
    public TraversalStack() {
      primary = new StackRec[12];
      for(int i=0; i< primary.length; i++) {
        primary[i] = new StackRec();
      }
    }
    public TraversalStack(CopyableIterator<PostingNode> pointer, long cap) {
      this();
      bottom = 0;
      primary[0].set(pointer, cap);
    }
    public TraversalStack(SegmentPush push) {
      this(push.iterator(), push.count);
    }
//    public void copyInto(TraversalStack o) {
//      o.parent = parent;
//      o.pointer = pointer;
//      o.current = current;
//      o.cap = cap;
//    }
    public boolean isAt(EditRec edit) {
      return (edit==null) ? false : primary[bottom].current.compareTo(edit) >= 0;
    }
    public void next() {
      primary[bottom].next();
    }
    public void up() {
      bottom--;
      if (chkBottom != -1) {
        primary[bottom].checkpoint();
      }
    }
    public PostingNode getCurrent() {
      return primary[bottom].current;
    }
    private void pushAndCheck() {
      bottom++;
      if (bottom == primary.length) {
        StackRec[] orig = primary;
        primary = new StackRec[primary.length*2];
        System.arraycopy(orig, 0, primary,    0, bottom);
      }
      if (chkBottom != -1) {
        primary[bottom].checkpoint();
      }
    }
    public void down() {
      StackRec rec = primary[bottom];
      long curCap = rec.cap;
      SegmentPush push = rec.current.push;
      long childCap = push.count;
      if (curCap >= 0) {
        pushAndCheck();
        rec = primary[bottom];
//        TraversalStack newParent = new TraversalStack();
//        copyInto(newParent);
//        parent = newParent;
      } else { // transfer deficit to subsection
        childCap += (curCap+1);
      }
      CopyableIterator<PostingNode> childPointer = push.iterator();
     
      rec.set(childPointer, childCap);
    }
    public boolean nextOrDown() {
      StackRec rec = primary[bottom];
      if (rec.cap <= 0) {
        if (bottom == 0) {
          return false;
        } else {
          up();
        }
        return true;
      }
      next();
      while(true) {
        SegmentPush push = primary[bottom].current.getPush();
        if (push == null || push.getCount()<=0) break;
        down();
        next();
      };
      return true;
    }
    public void checkpoint() {
      if (chkBottom != -1) uncheckpoint();
      chkBottom = bottom;
      primary[bottom].checkpoint();
    }
    public void uncheckpoint() {
      if (chkBottom == -1) throw new RuntimeException("Illegal state; no checkpoint defined");
      chkBottom = -1;
      for(int i=bottom; i>=0; i--) {
        if (! primary[i].uncheckpoint()) break;
      }
      for(int i=bottom+1; i<primary.length; i++) {
        if (! primary[i].uncheckpoint()) break;
      }
    }
    public void rewindToCheckpoint() {
      if (chkBottom == -1) throw new RuntimeException("Illegal state; no checkpoint defined");
      bottom = chkBottom;
      chkBottom = -1;
      for(int i=bottom; i>=0; i--) {
        if (! primary[i].rewindToCheckpoint()) break;
      }
      for(int i=bottom+1; i<primary.length; i++) {
        if (! primary[i].uncheckpoint()) break;
      }
    }
    public boolean nextToward(EditRec edit) {
      StackRec rec = primary[bottom];
      // returns true if this is a summary node visit
      if (rec.cap <= 0) {
        if (bottom == 0) {
          rec.clearCurrent();
        } else {
          up();
        }
        return true;
      }
      next();
      while(true) {
        SegmentPush push = primary[bottom].current.getPush();
        if (push == null || push.getCount()<=0 || ! isAt(edit)) break;
        down();
        next();
      }
      return false;
    }
    public boolean isEqual(EditRec edit) {
      PostingNode current = primary[bottom].current;
      return (current.docId == edit.docId &&
          current.token.equals(edit.term));
    }
  }

  static class SegmentAccumulator {
    PostingNode leadNode, lastNode;
    long ct;
    ArrayList<PostingNode> output = new ArrayList<PostingNode>();
    public void register(PostingNode node) {
      lastNode = node;
      if (ct==0) leadNode = node;
      ct += node.getCount();
    }
    public void addManyAsBalancedNode(LinkedList<PostingNode> node1, int leafSize) {
      if (node1.size() == 1) {
        register(node1.get(0));
        cut();
      } else {
        for(PostingNode n : node1) {
          SegmentPush push = n.getPush();
          if (push != null) {
            for(Iterator<PostingNode> i=push.iterator(); i.hasNext();) {
              register(i.next());
            }
          }
          register(n);
        }
        cut();
        PostingNode last = peekLast();
        last.push = balanceSegmentPush(last.getPush(), leafSize);
      }
    }
    public PostingNode peekLast() {
      return output.get(output.size()-1);
    }
    public void cut(long docId, Bytes token) {
      SegmentPush push = (ct > 0) ? new SegmentPush(leadNode, ct) : null;
      output.add(new PostingNode(push, docId, token));
      ct = 0;
    }
    public void cut() {
      if (ct > 1) {
        SegmentPush push = new SegmentPush(leadNode, ct - 1);
        output.add(new PostingNode(push, lastNode.docId, lastNode.token));
      } else if (ct == 1) {
        output.add(new PostingNode(leadNode.docId, leadNode.token));
      }
      ct = 0;
    }
  }

  public static SegmentPush spliceEditsIntoSegmentPush(List<EditRec> edits, SegmentPush segment) {
    SegmentAccumulator accum = new SegmentAccumulator();
    TraversalStack stack = new TraversalStack(segment);
    Iterator<EditRec> editItr = edits.iterator();
    EditRec edit = editItr.hasNext() ? editItr.next() : null;
    do {
      boolean isSummaryVisit = stack.nextToward(edit);
      PostingNode node = stack.getCurrent();
      if (node == null) break;
      boolean deleteCur = false;
      while (edit != null && stack.isAt(edit)) {
        if (edit.isDeletion) {
          deleteCur = true;
        } else {
          accum.cut(edit.docId, edit.term);
        }
        edit = editItr.hasNext() ? editItr.next() : null;
      }
      if (isSummaryVisit) {
        if (deleteCur) {
          accum.cut();
        } else {
          accum.cut(node.docId, node.token);
        }
      } else {
        if (deleteCur) {
          accum.cut();
        } else {
          accum.register(node);
        }
      }
    } while (stack.getCurrent() != null);
    while (edit != null) {
      if (edit.isDeletion) throw new RuntimeException("data moved");
      accum.cut(edit.docId, edit.term);
      edit = editItr.hasNext() ? editItr.next() : null;
    }
    return new SegmentPush(accum.output);
  }

  protected abstract TokenTable getRootTokenTable(long revNum);
  protected abstract void setNewTokenTable(TokenTable newTokenTable);

  protected String homeDir;
  protected long topRevNum;
  protected long nextDocId = 1;

  @Override
  public long getCurrentRevNum() {
    return topRevNum;
  }

  @Override
  public String getHomedir() {
    return homeDir;
  }

  @Override
  public void setHomedir(String homeDir) {
    this.homeDir = homeDir;
  }
 
  class BlueSteelPostingListQuery extends AbstractRangeQueryNode {
    TraversalStack traversal;
    CompoundBytes termBuffer;
    public BlueSteelPostingListQuery(Bytes prefix, byte suffix, SegmentPush segmentPush) {
      this(new ArrayBytes(Util.appendByte(prefix.getBytes(), suffix)), segmentPush);
    }
    public BlueSteelPostingListQuery(Bytes prefix, SegmentPush segmentPush) {
      super(prefix, ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, false, false);
      termBuffer = new CompoundBytes(prefix, null);
      traversal = new TraversalStack(segmentPush);
      traversal.next();
      traversal.checkpoint();
    }
    public void adjustPrefix(Bytes prefix) {
      throw new RuntimeException("Query adjustment not supported");
    }
    public void adjustSuffixMax(Bytes newMax, boolean isExclusive) {
      throw new RuntimeException("Query adjustment not supported");
    }
    public void adjustSuffixMin(Bytes newMin, boolean isExclusive) {
      throw new RuntimeException("Query adjustment not supported");
    }
    public long doc() {
      return traversal.getCurrent().docId;
    }
    public boolean next() {
      long curDocId = (traversal.getCurrent()!=null) ? traversal.getCurrent().docId : -1;
      do {
        boolean nxt = traversal.nextOrDown();
        if (!nxt) return false;
      } while(traversal.getCurrent().docId == curDocId);
      traversal.checkpoint();
      return true;
    }
    public NextStatus nextTerm() {
      long curDocId = traversal.getCurrent().docId;
      boolean nxt = traversal.nextOrDown();
      if (!nxt) return NextStatus.AT_END;
       if (traversal.getCurrent().docId == curDocId) {
         return NextStatus.NEXT_TERM;
       } else {
         traversal.checkpoint();
         return NextStatus.NEXT_DOC;
       }
    }
    public void resetTerms() {
      traversal.rewindToCheckpoint();
    }
    public boolean skipTo(long target) {
      do {
        traversal.nextToward(new EditRec(target, ArrayBytes.EMPTY_BYTES, false));
        if (traversal.getCurrent() == null) return false;
      } while(traversal.getCurrent().docId < target);
      traversal.checkpoint();
      return true;
    }
    public Bytes term() {
      termBuffer.setSuffix(traversal.getCurrent().token);
      return termBuffer;
    }
  }
 
  @Override
  public RangeQueryNode getRange(Bytes prefix, Bytes term1, Bytes term2,
      boolean isExclusive1, boolean isExclusive2, long revNum) {
    TokenTable table = getRootTokenTable(revNum);
    int len = prefix.getLength();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    for(int i=0; i<len; i++) {
      for(TokenRec rec : table.tokenRecs) {
        if (rec.token >= prefix.get(i)) {
          if (rec.token > prefix.get(i)) return null;
          if (rec.tokenTable==null) {
            Bytes pre = new SlicedBytes(prefix, 0, i+1);
            pre = new ArrayBytes(pre.getBytes()); // optimize
            RangeQueryNode ret = new BlueSteelPostingListQuery(pre, rec.segmentPush);
            return FilteredRangeQueryNode.make(ret, prefix, term1, term2, isExclusive1, isExclusive2);
          }
          table = rec.tokenTable;
          bos.write(rec.token);
          break;
        }
      }
    }
    if (bos.size() > 0) {
      prefix = new CompoundBytes(prefix, new ArrayBytes(bos.toByteArray())).flatten();
    }
   
    if (table == null) return null;
    int top,bottom;
    if (term1 == null) {
      top = 0;
      isExclusive1 = false;
    } else if (term1.getLength() > 0) {
      top = term1.get(0);
    } else {
      top = 0;
    }
    if (term2 == null) {
      bottom = 255;
      isExclusive2 = false;
    } else if (term2.getLength() > 0) {
      bottom = term2.get(0);
    } else { //specified, but is empty (must be a very narrow range!)
      bottom = 0;
    }
    Iterator<TokenRec> itr = table.getTokenRecs().iterator();
    TokenRec rec;
    do {
      if (! itr.hasNext()) return null;
      rec = itr.next();
    } while((0xFF & rec.token) < top);
    if (top==bottom) {
      RangeQueryNode q = new BlueSteelPostingListQuery(prefix, rec.token, rec.segmentPush);
      return FilteredRangeQueryNode.make(q, null, term1, term2, isExclusive1, isExclusive2);
    }
    List<RangeQueryNode> queries = new ArrayList<RangeQueryNode>();
    if (rec.token == top) {
      RangeQueryNode q = new BlueSteelPostingListQuery(prefix, rec.token, rec.segmentPush);
      q = FilteredRangeQueryNode.make(q, null, term1, null, isExclusive1, false);
      queries.add(q);
    }
    while(rec.token < bottom) {
      queries.add(new BlueSteelPostingListQuery(prefix, rec.token, rec.segmentPush));
      if (! itr.hasNext()) break;
      rec = itr.next();
    }
    if (rec.token == bottom) {
      RangeQueryNode q = new BlueSteelPostingListQuery(prefix, rec.token, rec.segmentPush);
      q = FilteredRangeQueryNode.make(q, null, null, term2, false, isExclusive2);
      queries.add(q);
    }
    return new OrderedOrQueryNode(queries, prefix, term1, term2, isExclusive1, isExclusive2);
  }

  @Override
  public QueryNode getTerm(Bytes term, long revNum) {
    return getRange(term, ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, false, false, revNum);
  }
 
  @Override
  public long commitNewRev(Collection<EditRec> edits) {
    TLongLongHashMap idMap = new TLongLongHashMap();
    for(EditRec edit : edits) {
      long docId = edit.docId;
      if (docId < 0) {
        if (idMap.contains(docId)) {
          edit.docId = idMap.get(docId);
        } else {
          long newDocId = nextDocId++;
          idMap.put(docId, newDocId);
          edit.docId = newDocId;
        }
      }
    }
    TokenTable newTokenTable = applyEditsToTokenTable(edits, getRootTokenTable(topRevNum));
    setNewTokenTable(newTokenTable);
    return topRevNum;
  }
}

TOP

Related Classes of dovetaildb.bagindex.BlueSteelBagIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.