Package dovetaildb.bagindex

Source Code of dovetaildb.bagindex.BlueSteelBagIndex$TraversalStack

package dovetaildb.bagindex;


import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.logging.Level;

import dovetaildb.bytes.ArrayBytes;
import dovetaildb.bytes.Bytes;
import dovetaildb.bytes.CompoundBytes;
import dovetaildb.bytes.SlicedBytes;
import dovetaildb.querynode.AbstractQueryNode;
import dovetaildb.querynode.FilterLiteralsQueryNode;
import dovetaildb.querynode.FilteredQueryNode;
import dovetaildb.querynode.FlatOrQueryNode;
import dovetaildb.querynode.OrderedOrQueryNode;
import dovetaildb.querynode.QueryNode;
import dovetaildb.util.Util;

public abstract class BlueSteelBagIndex extends BagIndex {

 
  /*
   * Term tree alternative:
   *
   * TokenTable:
   *   Fixed prefix token (optional)
   *   An ordered list of Tokens
   *  
   * TokenRec:
   *   1 byte of term content
   *   offset pointer to sub TokenTable
   *   SegmentPush (pointer to doc id list + count)
   *  
   * TermRec is either a DocRec or a SegmentPush.
   *
   * DocRec:
   *   Doc id delta
   *   Partial term
   *     (not including term tree already traversed and
   *      not including suffix covered by subterms, if any)
   * SegmentPush is:
   *   doc id list offset pointer
   *   document count
   */


  private static final long serialVersionUID = 763109748243653125L;
 
  protected String homeDir;
  protected long topRevNum;
  protected long nextDocId = 1;
  protected BalancingPolicy policy = new ProbabilisticBalancingPolicy(10);
  protected int targetPostingListLength = 10;
  protected int termTableDepth = 1;

  protected abstract TokenTable getRootTokenTable(long revNum);
  protected abstract void setNewTokenTable(TokenTable newTokenTable);

  public static interface BalancingPolicy extends Serializable {
    public int getTargetLength();
    public boolean decide(SegmentPush push);
  }

  public static class ProbabilisticBalancingPolicy implements BalancingPolicy {

    final int targetLength;
    final float randomness;
    final float inbalanceTolerance;
    final Random rand = new Random(734673462);

    private static final long serialVersionUID = -761827523790404571L;
   
    public ProbabilisticBalancingPolicy(int targetLength) {
      this(targetLength, 0.5f, 0.0f);
    }
    public ProbabilisticBalancingPolicy(int targetLength, float inbalanceTolerance, float randomness) {
      this.targetLength = targetLength;
      this.randomness = randomness;
      this.inbalanceTolerance = inbalanceTolerance;
    }
    public int getTargetLength() { return targetLength; }
    public boolean decide(SegmentPush push) {
      long totalCount = push.getCount();
      float targetSubcount = (totalCount < targetLength) ? 1.0f : totalCount / (float)targetLength;
      int count = 0;
      float delta = 0;
      PostingNode node = push.leadNode.copy();
      while(true) {
        long curCt = node.getCount();
        totalCount -= curCt;
        delta += Math.abs(curCt - targetSubcount);
        count++;
        if (totalCount <= 0) break;
        node = node.destructiveNext();
      }
      float averageDelta = delta / (float)count;
      float bumpinessFactor = averageDelta / targetLength;
      float randomFactor = (rand.nextFloat()*randomness - (randomness/2));
      return (bumpinessFactor + randomFactor > inbalanceTolerance);
    }
  }
 
  public static abstract class TokenTable {
    public abstract Bytes getFixedPrefix();
    public abstract Iterator<TokenRec> getTokenRecs();
    /** null return normally means that there are no results; but
     * a null return with a resulting empty SlicedBytes means
     * that all results apply. */
    public TokenRec descend(SlicedBytes bytes) {
      Bytes fixedPrefix = getFixedPrefix();
      int prefixLen = 0;
      if (fixedPrefix != null) {
        if (! fixedPrefix.isPrefixOf(bytes)) {
          return null;
        }
        prefixLen = fixedPrefix.getLength();
      }
      int bytesLen = bytes.getLength();
      if (prefixLen >= bytesLen) return null;
      int ch = bytes.get(prefixLen);
      for(Iterator<TokenRec> i=getTokenRecs(); i.hasNext(); ) {
        TokenRec rec = i.next();
        if ((rec.token & 0xFF) == ch) {
          if (rec.tokenTable == null || prefixLen+1 == bytesLen) {
            bytes.subBytes(prefixLen + 1); // set the slice position
            return rec;
          } else {
            return rec.tokenTable.descend(bytes);
          }
        }
      }
      return null;
    }
    public Map<String,Object> getMetrics(int detailLevel) {
      int numTokens = 0;
      int termCt = 0;
      int topLevelCt = 0;
      List<Map<String,Object>> detail = new ArrayList<Map<String,Object>>();
      for(Iterator<TokenRec> i=getTokenRecs(); i.hasNext(); ) {
        TokenRec rec = i.next();
        numTokens++;
        SegmentPush push = rec.getSegmentPush();
        termCt += push.getCount();
        topLevelCt += push.getTopLevelCount();
        TokenTable sub = rec.getTokenTable();
        if (sub != null) {
          Map<String,Object> submetrics = sub.getMetrics(detailLevel);
          submetrics.put("prefix", getFixedPrefix().getAsString());
          detail.add(submetrics);
        }
      }
      return Util.literalSMap().p("numTokens", numTokens).p("termCt", termCt).p("topLevelCt",topLevelCt).p("detail", detail);
    }
  }
 
  public static class MemoryTokenTable extends TokenTable {
    private Bytes fixedPrefix;
    private ArrayList<TokenRec> tokenRecs;
    MemoryTokenTable(Bytes fixedPrefix, ArrayList<TokenRec> tokenRecs) {
      if (tokenRecs.size() > 256) {
        throw new RuntimeException("TokenTable is too big ("+tokenRecs.size()+")");
      }
      this.fixedPrefix = fixedPrefix;
      this.tokenRecs = tokenRecs;
    }
    public Bytes getFixedPrefix() { return fixedPrefix; }
    public Iterator<TokenRec> getTokenRecs() { return tokenRecs.iterator(); }
    public String toString() {
      String s="\nTokenTable("+fixedPrefix+"):\n";
      for(TokenRec r:tokenRecs) {
        s += r+"\n";
      }
      return s;
    }
  }
  public static class TokenRec {
    protected byte token;
    protected TokenTable tokenTable;
    protected SegmentPush segmentPush;
    public TokenRec(byte token, TokenTable tokenTable, SegmentPush segmentPush) {
      this.token = token;
      this.tokenTable = tokenTable;
      this.segmentPush = segmentPush;
    }
    public byte getToken() { return token; }
    public TokenTable getTokenTable() { return tokenTable; }
    public SegmentPush getSegmentPush() { return segmentPush; }
    public String toString() {
      return "('"+((char)token)+"') summary: "+segmentPush+"\n"+tokenTable;
    }
  }
  public static abstract class PostingNode {
    public long docId;
    public Bytes token;
    public final long getDocId() { return docId; }
    public final Bytes getToken() { return token; }
    public abstract long getCount();
    public abstract PostingNode destructiveNext();
    public abstract PostingNode destructiveDown();
    public abstract long getPushCount();
    public SegmentPush getPush() {
      PostingNode copy = this.copy();
      copy = copy.destructiveDown();
      return new SegmentPush(copy, getPushCount());
    }
    public int compareTo(long docId, Bytes term) {
      long ret = getDocId() - docId;
      if (ret == 0) return token.compareTo(term);
      else return (ret > 0) ? 1 : -1;
    }
    public int compareTo(EditRec e) {
      return compareTo(e.docId, e.term);
    }
    public void getTextDisplay(Writer w, String prefix, long ct) {
      SegmentPush push = getPush();
      long pushCt = getCount() - 1;
      if (ct <= pushCt) {
        pushCt = ct;
        ct = 0;
      } else {
        ct -= pushCt;
      }
      if (push != null && pushCt > 0) {
        push.getTextDisplay(w, prefix+" ", pushCt);
      }
      if (ct > 0) {
        try {
          w.write(prefix+getDocId()+" '"+Util.makePrintable(getToken().getBytes())+"'\n");
        } catch (IOException e) { throw new RuntimeException(e); }
        ct--;
        PostingNode next = destructiveNext();
        if (next != null && ct > 0) {
          next.getTextDisplay(w, prefix, ct);
        }
      }
    }
    public abstract PostingNode copy();
    public PostingNode copyInto(PostingNode other) {
      return copy();
    }
    public PostingNode copyPushLeadInto(PostingNode other) {
      return getPush().leadNode.copy();
    }
  }
  public static final class MemoryPostingNode extends PostingNode implements Cloneable {
    private SegmentPush push; // optional
    private PostingNode next;

    public MemoryPostingNode(long docId, Bytes token) {
      this.docId = docId;
      if (token != null) token = token.flatten();
      this.token = token;
    }
    public MemoryPostingNode(SegmentPush push, long docId, Bytes token) {
      this(docId, token);
      this.push = push;
    }
    private MemoryPostingNode(PostingNode node, SegmentPush push) {
      this(push, node.getDocId(), node.getToken());
    }
    public static MemoryPostingNode make(PostingNode node) {
      SegmentPush push = node.getPush();
      if (push != null) {
        push = push.copy();
      }
      return new MemoryPostingNode(node, push);
    }
    public long getCount() {
      if (push == null) return 1;
      else return push.count + 1;
    }
    public SegmentPush getPush() { return push; }
    public void setPush(SegmentPush push) {
      this.push = push;
    }
    public PostingNode destructiveDown() {
      return push.leadNode;
    }
    public long getPushCount() {
      if (push == null) return 0;
      else return push.getCount();
    }
    public PostingNode destructiveNext() {
      return next;
    }
    public void setNext(PostingNode node) {
      next = node;
    }
    public int compareTo(EditRec e) {
      long ret = getDocId() - e.docId;
      if (ret == 0) return token.compareTo(e.term);
      else return (ret > 0) ? 1 : -1;
    }
    public String toString() {
      return "MemPostingNode dId:"+docId+" tok:"+token+" psh:"+push;
    }
    @Override
    public MemoryPostingNode copy() {
      try {
        MemoryPostingNode copy = (MemoryPostingNode)clone();
        copy.token = token.copy();
        return copy;
      } catch (CloneNotSupportedException e) {
        throw new RuntimeException(e);
      }
    }
    @Override
    public PostingNode copyInto(PostingNode other) {
      return copy();
//      if (other == null || !(other instanceof MemoryPostingNode)) return copy();
//      MemoryPostingNode memOther = (MemoryPostingNode)other;
//      memOther.docId = docId;
//      memOther.token = token.copyInto(memOther.token);
      // NEEDS push & next
//      return memOther;
    }
    public PostingNode next() {
      return next;
    }
  }
  public static class PostingNodeIterator implements Iterator<PostingNode> {
    private PostingNode node;
    private long count;
    public PostingNodeIterator(PostingNode lead, long count) {
      this.node = lead;
      this.count = count;
    }
    @Override
    public boolean hasNext() {
      return count > 0;
    }
    @Override
    public PostingNode next() {
      PostingNode cur = node;
      count -= cur.getCount();
      node = node.copy().destructiveNext();
      return cur;
    }
    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }
  }
  public static final class SegmentPush {
    public long count;
    public PostingNode leadNode;
    public SegmentPush(ArrayList<MemoryPostingNode> nodes, long count) {
      this(nodes);
      this.count = count;
    }
    public SegmentPush(List<MemoryPostingNode> output) {
      if (output.size() > 1) { // TODO consider removing
        if (output.get(0).docId == output.get(1).docId) {
          if (output.get(0).token.compareTo(output.get(1).token) == 0) {
            assert false;
          }
        }
      }
      count = 0;
      MemoryPostingNode lastNode = null;
      for (MemoryPostingNode node : output) {
        count += node.getCount();
        if (lastNode == null) {
          leadNode = node;
        } else {
          lastNode.setNext(node);
        }
        lastNode = node;
      }
    }
    public SegmentPush(PostingNode start, long count) {
      this.leadNode = start;
      this.count = count;
    }
    public SegmentPush() {
      count = 0;
    }
    public void getTextDisplay() {
      OutputStreamWriter w = new OutputStreamWriter(System.out);
      getTextDisplay(w, "", Long.MAX_VALUE);
      try {
        w.flush();
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
    public void getTextDisplay(Writer w, String prefix, long ct) {
      if (ct > count) ct = count;
      leadNode.getTextDisplay(w, prefix, ct);
      try {
        w.write(prefix+"SegPsh ct="+ct+"\n");
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
    public String toString() {
      return "SegmentPush(ct="+count+",lead="+leadNode+")";
    }
    public long getCount() { return count; }
    public long getTopLevelCount() {
      Iterator<PostingNode> i = new PostingNodeIterator(leadNode.copy(), count);
      long ctr = 0;
      while(i.hasNext()) {
        i.next();
        ctr++;
      }
      return ctr;
    }
    public SegmentPush copy() {
      return new SegmentPush((leadNode==null) ? null : leadNode.copy(), count);
    }
    public Iterator<PostingNode> iterator() {
      return new PostingNodeIterator(leadNode.copy(), count);
    }
  }

  public static ArrayList<EditRec> popTermBytes(ArrayList<EditRec> editBuffer) {
    ArrayList<EditRec> newTermBytes = new ArrayList<EditRec>();
    for(EditRec edit : editBuffer) {
      Bytes term = edit.term;
      int newLength = term.getLength() - 1;
      term = new SlicedBytes(term, 1, newLength);
      newTermBytes.add(new EditRec(edit.docId, term, edit.isDeletion));
    }
    return newTermBytes;
  }

  static abstract class MergeIterator<A,B> {
    Iterator<A> a;
    Iterator<B> b;
    A curA;
    B curB;
    boolean hasLeft, hasRight;
    public MergeIterator() {}
    public MergeIterator(Iterator<A> a, Iterator<B> b) {
      this.a = a;
      this.b = b;
      hasLeft = hasRight = true;
    }
    public A getLeft()  { return hasLeft  ? curA : null; }
    public B getRight() { return hasRight ? curB : null; }
    public void next() {
      if (hasLeft) {
        curA = (a.hasNext()) ? a.next() : null;
      }
      if (hasRight) {
        curB = (b.hasNext()) ? b.next() : null;
      }
      if (curA != null && curB != null) {
        int cmp = compare(curA, curB);
        hasLeft  = (cmp <= 0);
        hasRight = (cmp >= 0);
      } else {
        hasLeft  = curA != null;
        hasRight = curB != null;
      }
    }
    public abstract int compare(A a, B b);
  }
  static final class TokenEditMergeIterator extends MergeIterator<TokenRec, EditRec> {
    TokenEditMergeIterator(Iterator<TokenRec> a, Iterator<EditRec> b) {
      super(a, b);
    }
    @Override
    public int compare(TokenRec a, EditRec b) {
      int tok  = 0xFF & a.getToken();
      int edit = (b.term.getLength() == 0) ? -1 : 0xFF & b.term.get(0);
      return tok - edit;
    }
  }
  static final class NodeEditMergeIterator extends MergeIterator<PostingNode, EditRec> {
    NodeEditMergeIterator(Iterator<PostingNode> a, Iterator<EditRec> b) {
      super(a, b);
    }
    @Override
    public int compare(PostingNode a, EditRec b) {
      return a.compareTo(b);
    }
  }
  public static TokenTable applyEditsToTokenTable(Collection<EditRec> edits, TokenTable table, BalancingPolicy policy, int termTableDepth) {
    // prereq: edits are ordered by term, then by doc id
    TokenEditMergeIterator merge = new TokenEditMergeIterator(
        table.getTokenRecs(),
        edits.iterator());
    TokenRec parent = null;
    ArrayList<EditRec> editBuffer = new ArrayList<EditRec>();
    byte editBufferByte = 0;
    ArrayList<TokenRec> output = new ArrayList<TokenRec>();
    while(true) {
      merge.next();
      TokenRec rec = merge.getLeft();
      EditRec edit = merge.getRight();
     
      if (edit != null && edit.term.getLength() == 0) {
        // very short term, already captured in parent's summary list
        if (rec == null) continue;
        else edit = null;
      }
     
      // save away rec if co-occurs with edit,
      // otherwise write.
     
      if ( edit == null || ((byte)edit.term.get(0)) != editBufferByte) {
        // cut a new tree if (1) edit byte changes or
        // (2) edit is missing
        if (! editBuffer.isEmpty()) {
          ArrayList<EditRec> subTokenTableEdits = popTermBytes(editBuffer);
          SegmentPush push;
          TokenTable subTable;
          if (parent == null) {
            subTable = null;
            push = new SegmentPush(new ArrayList<MemoryPostingNode>());
          } else {
            subTable = parent.getTokenTable();
            push = parent.getSegmentPush();
          }
          if (subTable == null) {
            if (termTableDepth > 0) {
              Bytes newPrefix = new CompoundBytes(table.getFixedPrefix(), ArrayBytes.SINGLE_BYTE_OBJECTS[editBufferByte & 0xff]);
              subTable = new MemoryTokenTable(newPrefix, new ArrayList<TokenRec>());
            }
          }
          if (subTable != null) {
            subTable = applyEditsToTokenTable(subTokenTableEdits, subTable, policy, termTableDepth-1);
          }
          EditRec.sortById(subTokenTableEdits);
//          verifySegmentPush(push);
          SegmentPush termList = spliceEditsIntoSegmentPush(subTokenTableEdits, push);
//          verifySegmentPush(termList);
          long prevCt = termList.getCount();
          termList = BlueSteelBagIndex.balanceSegmentPush(termList, policy);
//          termList.getTextDisplay();
          long newCount = termList.getCount();
          assert newCount == prevCt;
//          assert verifySegmentPush(termList);
          if (termList.getCount() > 0) {
            output.add(new TokenRec(editBufferByte, subTable, termList));
          }
          editBuffer = new ArrayList<EditRec>();
          parent = null;
        }
      }
      if (edit != null) {
        editBufferByte = (byte)edit.term.get(0);
        editBuffer.add(edit);
        if (rec != null) parent = rec;
      } else {
        if (rec != null) output.add(rec);
        else break;
      }
    }
    return new MemoryTokenTable(table.getFixedPrefix(), output);
  }

  public static class AdjacencyRec {
    LinkedList<PostingNode> node1, node2;
    AdjacencyRec left, right;
    long ct1, ct2;
    SegmentPush newPush = null;
    boolean dirty = false;
    public AdjacencyRec(PostingNode node2, AdjacencyRec left) {
      this.node2 = new LinkedList<PostingNode>();
      this.node2.add(node2);
      this.left = left;
      if (left == null) {
        this.node1 = null;
      } else {
        left.right = this;
        this.node1 = left.node2;
        ct1 = node1.get(0).getCount();
      }
      ct2 = node2.getCount();
    }
    public AdjacencyRec() {
    }
    public AdjacencyRec(PostingNode firstNode, PostingNode secondNode) {
      this.node1 = new LinkedList<PostingNode>();
      this.node2 = new LinkedList<PostingNode>();
      this.left = this.right = null;
      node1.add(firstNode);
      ct1 = firstNode.getCount();
      node2.add(secondNode);
      ct2 = secondNode.getCount();
    }
  }
 
  public static SegmentPush balanceSegmentPush(SegmentPush segment, BalancingPolicy policy) {
    /*
    long totalCount = segment.count;
    if (totalCount <= 1) return segment;
    long targetCt = totalCount / target;
    if (targetCt <= 0) targetCt = 1;
    long maxCt = (targetCt * 3) / 2;
    */
   
    int targetLength = policy.getTargetLength();
    long totalCount = segment.count;
    long targetCt = totalCount / targetLength;
    if (targetCt <= 1) return segment;
    // on the leaf lists, always put the longer counts at the bottom:
    if (targetCt < targetLength) targetCt = targetLength;
   
    SegmentAccumulator accum = new SegmentAccumulator(policy);
    TraversalStack traversal = new TraversalStack(segment);
    boolean isSum = traversal.nextContainingNode();
    assert !isSum;
    boolean isSummaryVisit = false;
    while(true) {
      PostingNode node = traversal.getCurrent();
      if (node == null) break;
      if (isSummaryVisit) {
        node = new MemoryPostingNode(null, node.getDocId(), node.getToken());
//        accum.register(new MemoryPostingNode(null, node.getDocId(), node.getToken()));
      }
      long curCt = node.getCount();
      if (curCt > 1) {
        if (policy.decide(node.getPush())) {
          // too big, split it and retry loop
          traversal.down();
          isSum = traversal.nextContainingNode();
          assert !isSum;
          continue;
        }
      }
      long proposedCt = accum.ct + curCt;
      if (Math.abs(accum.ct  -targetCt) <
        Math.abs(proposedCt-targetCt)) {
        // we're closer to target before adding this item
        // so cut what we've got and retry the current node
        if (accum.ct > 0) {
          accum.cut();
          continue;
        }
      }
      accum.registerAndLink(MemoryPostingNode.make(node.copy()));
      isSummaryVisit = traversal.nextContainingNode();
    }
    return new SegmentPush(accum.getOutput());
  }

  static final class TraversalStack implements Cloneable {
    BlueSteelStackRec[] primary;
    int bottom = -1;
   
    public TraversalStack() {
      primary = new BlueSteelStackRec[12];
      for(int i=0; i< primary.length; i++) {
        primary[i] = new BlueSteelStackRec();
      }
    }
    public TraversalStack(PostingNode pointer, long cap) {
      this();
      bottom = 0;
      primary[0].set(pointer, cap);
    }
    public TraversalStack(SegmentPush push) {
      this(push.leadNode, push.count);
    }
    public boolean isAt(long docId, Bytes term) {
      return primary[bottom].getCurrent().compareTo(docId, term) >= 0;
    }
    public int cmpTo(long docId, Bytes term) {
      PostingNode cur = primary[bottom].getCurrent();
      if (cur == null) return 1; // (i am at the end of all results)
      return cur.compareTo(docId, term);
    }
    public long cmpTo(long docId) {
      PostingNode cur = primary[bottom].getCurrent();
      if (cur == null) return 1; // (i am at the end of all results)
      return cur.docId - docId;
    }
    public boolean isAt(EditRec edit) {
      return (edit==null) ? false : primary[bottom].getCurrent().compareTo(edit) >= 0;
    }
    public void next() {
      primary[bottom].next();
    }
    /** returns false if there is no next value */
    public boolean up() {
      while (true) {
        primary[bottom].clearCurrent();
        if (bottom <= 0) return false;
        bottom--;
        if (primary[bottom].cap >= 0) return true;
      }
    }
    public PostingNode getCurrent() {
      PostingNode node =  primary[bottom].getCurrent();
      return node;
    }
    private void pushAndCheck() {
      bottom++;
      if (bottom == primary.length) {
        BlueSteelStackRec[] orig = primary;
        primary = new BlueSteelStackRec[primary.length*2];
        System.arraycopy(orig, 0, primary,    0, bottom);
        for(int i=bottom; i< primary.length; i++) {
          primary[i] = new BlueSteelStackRec();
        }
      }
    }
    public void down() {
      BlueSteelStackRec rec = primary[bottom];
      PostingNode current = rec.getCurrent();
      long curCap = rec.cap;
      long childCap = current.getPushCount();
      rec.hasPrev = false;
      pushAndCheck();
      rec = primary[bottom];
      if (curCap < 0) { // transfer deficit to subsection
        childCap += (curCap+1);
      }
      rec.setAsPushFrom(current, childCap);
    }

    public boolean nextContainingNode() {
      /** returns true on a summary node visit */
      BlueSteelStackRec rec = primary[bottom];
      if (rec.cap <= 0) {
        up();
        return true;
      }
      next();
      if (rec.cap < 0) {
        down();
        return nextContainingNode();
      }
      return false;
    }
    public boolean nextOrDown() {
      /** returns true if at a valid position after advancing */
      BlueSteelStackRec rec = primary[bottom];
      if (rec.cap <= 0) {
        return up();
      }
      next();
      downAsFarAsPossible();
      return true;
    }
    public void downAsFarAsPossible() {
      while(true) {
        SegmentPush push = primary[bottom].getCurrent().getPush();
        if (push == null || push.getCount()<=0) break;
        down();
        next();
      };
    }
    private boolean downUntilBefore(long seekDoc, Bytes seekTerm, boolean revertOnFail) {
      int origBottom = this.bottom;
      while(true) {
        if (primary[bottom].getCurrent().getPushCount() <= 0) {
          break;
        }
        down();
        next();
        if (cmpTo(seekDoc, seekTerm) <= 0) return true;
      };
      if (revertOnFail) {
        while(bottom > origBottom) {
          up();
        }
        return false;
      } else {
        return true;
      }
     
    }
    public void seek(long seekDoc, Bytes seekTerm) {
      /*
       * Lower docs are left or down, higher docs are right or up
       * So, (1) back up by resetting posting list and going up & repeat as needed
       * (2) next() until we're there
       */
      int cmp = cmpTo(seekDoc, seekTerm);
      if (cmp > 0) {
        BlueSteelStackRec rec = primary[bottom];
        // Note that prev becomes null when going over a segment push, so this is safe:
        if (rec.hasPrev && rec.prev.compareTo(seekDoc, seekTerm) < 0) {
          return;
        }
        while (true) {
          boolean revertOnFail = (bottom > 0);
          rewindCurrentFrame();
          next();
          if (cmpTo(seekDoc, seekTerm) <= 0) break;
          if (downUntilBefore(seekDoc, seekTerm, revertOnFail)) break;
          up();
        }
        cmp = cmpTo(seekDoc, seekTerm);
      }
      while (cmp < 0) {
        nextToward(seekDoc, seekTerm);
        if (primary[bottom].getCurrent() == null) break;
        cmp = cmpTo(seekDoc, seekTerm);
        if (cmp >= 0) break;
      }
    }
   
    public void seek(long seekDoc) {
      /*
       * Lower docs are left or down, higher docs are right or up
       * So, (1) back up by resetting posting list and going up & repeat as needed
       * (2) next() until we're there
       */
      long cmp = cmpTo(seekDoc);
      if (cmp > 0) {
        BlueSteelStackRec rec = primary[bottom];
        // Note that prev becomes null when going over a segment push, so this is safe:
        if (rec.hasPrev && rec.prev.docId < seekDoc) {
          return;
        }
        while (true) {
          boolean revertOnFail = (bottom > 0);
          rewindCurrentFrame();
          next();
          if (cmpTo(seekDoc) <= 0) break;
          if (downUntilBefore(seekDoc, ArrayBytes.EMPTY_BYTES, revertOnFail)) break;
          up();
        }
        cmp = cmpTo(seekDoc);
      }
      while (cmp < 0) {
        nextToward(seekDoc, ArrayBytes.EMPTY_BYTES);
        if (primary[bottom].getCurrent() == null) break;
        cmp = cmpTo(seekDoc);
        if (cmp >= 0) break;
      }
    }

    public boolean nextToward(long docId, Bytes term) {
      // returns true if this is a summary node visit
      BlueSteelStackRec rec = primary[bottom];
      if (rec.cap <= 0) {
        up();
        return true;
      }
      next();
      while(true) {
        BlueSteelStackRec bottomRec = primary[bottom];
        PostingNode cur = bottomRec.getCurrent();
        if (cur.getPushCount() == 0) break;
        if (bottomRec.cap >= 0 && ! isAt(docId, term)) break;
        down();
        next();
      }
      return false;
    }
    public boolean isEqual(EditRec edit) {
      PostingNode current = primary[bottom].getCurrent();
      return (current.getDocId() == edit.docId &&
          current.getToken().equals(edit.term));
    }
    public void printPosition() {
      for(int i=0; i<bottom; i++) {
        System.out.print("| ");
      }
      System.out.println(this.getCurrent());
    }
    public long getFirstDocOfCurrentFrame() {
      return primary[bottom].initial.getDocId();
    }
    public void rewindCurrentFrame() {
      primary[bottom].rewindCurrentFrame();
    }
    public void clearCurrent() {
      primary[bottom].clearCurrent();
    }
    public String toString() {
      StringBuffer s = new StringBuffer("TraversalStack(");
      for(int i=0; i<=bottom; i++) {
        s.append(primary[i].toString());
        if (i<bottom) s.append(",");
      }
      s.append(")");
      return s.toString();
    }
    public TraversalStack copy() {
      TraversalStack copy = new TraversalStack();
      copy.bottom = bottom;
      copy.primary = new BlueSteelStackRec[primary.length];
      for(int i=0; i<primary.length; i++) {
        copy.primary[i] = primary[i].copy();
      }
      return copy;
    }
    public long getCurrentDocId() {
      PostingNode cur = primary[bottom].getCurrent();
      if (cur == null) return Long.MAX_VALUE;
      return cur.docId;
    }
  }

  static boolean verifySegmentPush(SegmentPush push) {
    push = push.copy();
    TraversalStack stack = new TraversalStack(push.copy());
    long lastDocId = Long.MIN_VALUE;
    Bytes lastTerm = new ArrayBytes(new byte[]{});
    while(stack.nextOrDown()) {
      stack.printPosition();
      long docId = stack.getCurrent().getDocId();
      if (docId < lastDocId) {
        throw new RuntimeException("DOC ID ERROR: cur="+docId+" prev="+lastDocId);
      } else if (docId == lastDocId) {
        assert ! stack.getCurrent().getToken().equals(lastTerm);
      }
      lastDocId = docId;
      lastTerm = stack.getCurrent().getToken().copyInto(lastTerm);
    }
    return true;
  }
 
  static boolean verifyTokenTable(TokenTable table) {
    for(Iterator<TokenRec> recItr = table.getTokenRecs(); recItr.hasNext();) {
      TokenRec rec = recItr.next();
      TokenTable subTable = rec.tokenTable;
      System.out.println((char)rec.token+" - subtables:");
      if (subTable != null) {
        if (! verifyTokenTable(subTable)) return false;
      }
      System.out.println((char)rec.token+" - summary list:");
      if (! verifySegmentPush(rec.segmentPush)) return false;
    }
    return true;
  }
 
  static final class SegmentAccumulator {
    protected PostingNode leadNode, lastNode;
    protected BalancingPolicy policy;
    protected int numElements;
    protected long ct;
    protected long lastDocId = Long.MIN_VALUE;
    protected ArrayList<MemoryPostingNode> output = new ArrayList<MemoryPostingNode>();

    public SegmentAccumulator(BalancingPolicy policy) {
      this.policy = policy;
    }
    public ArrayList<MemoryPostingNode> getOutput() {
      cut();
      return output;
    }
    public void registerAndLink(MemoryPostingNode node) {
      if (ct > 0) {
        ((MemoryPostingNode)lastNode).setNext(node);
      }
      register(node);
    }
    public void register(PostingNode node) {
      lastNode = node;
      if (ct==0) leadNode = node;
      ct += node.getCount();
      numElements++;
      long newDocId = node.getDocId();
      if (newDocId < lastDocId) {
        throw new RuntimeException();
      } else {
        lastDocId = newDocId;
      }
    }
    public MemoryPostingNode peekLast() {
      return output.get(output.size()-1);
    }
    protected void add(MemoryPostingNode head) {
      if (policy != null && numElements > policy.getTargetLength()*2) {
        SegmentPush newPush = balanceSegmentPush(head.getPush(), policy);
        head.setPush(newPush);
      }
      output.add(head);
      ct = 0;
      numElements = 0;
    }
    public void cut(long docId, Bytes token) {
      SegmentPush push = (ct > 0) ? new SegmentPush(leadNode, ct) : null;
      add(new MemoryPostingNode(push, docId, token));
    }
    public void cut() {
      if (numElements > 1) {
        SegmentPush push = new SegmentPush(leadNode, ct - 1);
        add(new MemoryPostingNode(push, lastNode.getDocId(), lastNode.getToken()));
      } else if (numElements == 1) {
        add(new MemoryPostingNode(leadNode.getPush(), leadNode.getDocId(), leadNode.getToken()));
      }
    }
  }

  public static SegmentPush spliceEditsIntoSegmentPush(List<EditRec> edits, SegmentPush segment) {
    SegmentAccumulator accum = new SegmentAccumulator(null);
    TraversalStack stack = new TraversalStack(segment);
    Iterator<EditRec> editItr = edits.iterator();
    EditRec edit = editItr.hasNext() ? editItr.next() : null;
    do {
     
      boolean isSummaryVisit;
      if (edit != null) {
        isSummaryVisit = stack.nextToward(edit.docId, edit.term);
      } else {
        isSummaryVisit = stack.nextToward(Long.MAX_VALUE, ArrayBytes.EMPTY_BYTES);
      }
      PostingNode node = stack.getCurrent();
      if (node == null) break;
      node = node.copy();
      boolean deleteCur = false;
      while (edit != null && stack.isAt(edit)) {
        if (edit.isDeletion) {
          deleteCur = true;
        } else {
          accum.cut(edit.docId, edit.term);
        }
        edit = editItr.hasNext() ? editItr.next() : null;
      }
      if (isSummaryVisit) {
        if (deleteCur) {
          accum.cut();
        } else {
          accum.cut(node.getDocId(), node.getToken());
        }
      } else {
        if (deleteCur) {
          accum.cut();
        } else {
          accum.register(node);
        }
      }
    } while (stack.getCurrent() != null);
    while (edit != null) {
      if (edit.isDeletion) {
        throw new RuntimeException("data moved");
      }
      accum.cut(edit.docId, edit.term);
      edit = editItr.hasNext() ? editItr.next() : null;
    }
    ArrayList<MemoryPostingNode> nodes = accum.getOutput();
    return new SegmentPush(nodes);
  }

  @Override
  public long getCurrentRevNum() {
    return topRevNum;
  }

  @Override
  public String getHomedir() {
    return homeDir;
  }

  @Override
  public void setHomedir(String homeDir) {
    this.homeDir = homeDir;
  }

 
  class BlueSteelPostingListQuery extends AbstractQueryNode implements Cloneable {
   
    protected final TraversalStack traversal;
    protected CompoundBytes termBuffer;
    protected SlicedBytes seekBuffer = new SlicedBytes(ArrayBytes.EMPTY_BYTES, 0, 0);
    protected TokenRec tokenRec;
    protected long docId = -1;
   
    public BlueSteelPostingListQuery(TraversalStack traversal, CompoundBytes termBuffer, TokenRec tokenRec) {
      this.traversal = traversal;
      this.termBuffer = termBuffer;
      this.tokenRec = tokenRec;
      this.docId = traversal.getCurrentDocId();
    }
    public BlueSteelPostingListQuery(Bytes prefix, TokenRec tokenRec) {
      traversal = new TraversalStack(tokenRec.getSegmentPush());
      init(prefix, tokenRec);
    }
    protected void init(Bytes prefix, TokenRec tokenRec) {
      this.tokenRec = tokenRec;
      termBuffer = new CompoundBytes(prefix, null);
      if (! traversal.nextOrDown()) {
        throw new RuntimeException();
      }
      docId = traversal.getCurrentDocId();
    }
    public long doc() {
      assert docId == traversal.getCurrentDocId();
      return docId;
//      PostingNode node = traversal.getCurrent();
//      if (node == null) return Long.MAX_VALUE;
//      return node.docId;
    }
    public boolean next() {
      long oldDocId = docId;
      do {
        boolean nxt = traversal.nextOrDown();
        if (!nxt) {
          traversal.clearCurrent();
          docId = Long.MAX_VALUE;
          return false;
        }
        docId = traversal.getCurrentDocId();
      } while(docId == oldDocId);
      return true;
    }
    public NextStatus nextTerm() {
      long oldDocId = docId;
      boolean nxt = traversal.nextOrDown();
      if (!nxt) {
        traversal.clearCurrent();
        docId = Long.MAX_VALUE;
        return NextStatus.AT_END;
      }
      docId = traversal.getCurrentDocId();
      if (docId == oldDocId) {
        return NextStatus.NEXT_TERM;
      } else {
        return NextStatus.NEXT_DOC;
      }
    }
    public void seek(long seekDoc, Bytes seekTerm) {
      int seekTermLen = seekTerm.getLength();
      Bytes myPrefix = termBuffer.getPrefix();
      int myPrefixLen = myPrefix.getLength();

      if (myPrefix.isPrefixOf(seekTerm)) {
        seekBuffer.reInitialize(seekTerm, myPrefixLen, seekTermLen - myPrefixLen);
        seekTerm = seekBuffer;
      } else {
        if (seekTerm.compareTo(myPrefix) > 0) {
          // snap to the next doc
          seekTerm = ArrayBytes.EMPTY_BYTES;
          seekDoc++;
        } else {
          seekBuffer.reInitialize(seekTerm, myPrefixLen, seekTermLen - myPrefixLen);
          seekTerm = seekBuffer;
        }
      }
     
      /*
      if (myPrefix.isPrefixOf(seekTerm)) {
        seekBuffer.reInitialize(seekTerm, myPrefixLen, seekTermLen - myPrefixLen);
        seekTerm = seekBuffer;
      } else {
        if (seekTerm.compareTo(myPrefix) > 0) {
          // snap to the next doc
          seekTerm = ArrayBytes.EMPTY_BYTES;
          seekDoc++;
        }
      }
      */
        /*
      if (! myPrefix.isPrefixOf(seekTerm)) {
        seekTerm = ArrayBytes.EMPTY_BYTES;
        seekTermLen = 0;
        if (seekTerm.compareTo(myPrefix) > 0) {
          // snap to the next doc
          seekDoc++;
        }
      }
      if (seekTermLen > myPrefixLen) {
        seekBuffer.reInitialize(seekTerm, myPrefixLen, seekTermLen - myPrefixLen);
        seekTerm = seekBuffer;
      } else {
        seekTerm = ArrayBytes.EMPTY_BYTES;
      }
*/
      traversal.seek(seekDoc, seekTerm);
      docId = traversal.getCurrentDocId();
    }
    public Bytes term() {
      if (traversal.getCurrent() == null) {
        throw new RuntimeException();
      }
      Bytes token = traversal.getCurrent().token;
      termBuffer.setSuffix(token);
      return termBuffer;
    }
    public QueryNode specialize(Range range) {
      Bytes prefix = range.prefix;
      int prefixLen = prefix.getLength();
      Bytes myPrefix = termBuffer.getPrefix();
      int myPrefixLen = myPrefix.getLength();
      if (prefixLen <= myPrefixLen) {
        if (! range.containsPrefix(myPrefix)) return null;
        return this;
      }// TODO can exclude something in the else too
      if (tokenRec.tokenTable == null) {
        return this;
      }
      SlicedBytes slice = new SlicedBytes(prefix, 0);
      TokenRec subRec = tokenRec.tokenTable.descend(slice);
      if (subRec == null) {
        if (slice.getLength() > 0) {
          return null; // no results apply
        } else {
          return this; // all results apply
        }
      } else if (subRec == tokenRec) {
        // no descent; just return ourself
        return this;
      }
      return queryNodeFromDescent(subRec, slice, tokenRec.tokenTable, range);
    }
   
    public QueryNode copy() {
      CompoundBytes termBufferCopy = new CompoundBytes(termBuffer.getPrefix(), termBuffer.getSuffix());
      return new BlueSteelPostingListQuery(traversal.copy(), termBufferCopy, tokenRec);
    }
    public String toString() {
      return "BSBIQN(cur="+traversal.getCurrent()+": depth="+traversal.bottom+")";
    }
    @Override
    public long nextValidDocId(long docId) {
      traversal.seek(docId);
      this.docId = traversal.getCurrentDocId();
      return this.docId;
    }
  }
 
  @Override
  public QueryNode getRange(Range range, long revNum) {
    TokenTable table = getRootTokenTable(revNum);
    SlicedBytes slice = new SlicedBytes(range.prefix, 0);
    TokenRec rec = table.descend(slice);
    return queryNodeFromDescent(rec, slice, table, range);
  }
  private QueryNode queryNodeFromDescent(TokenRec rec, SlicedBytes slice, TokenTable table, Range range) {
    boolean matchesAll = slice.getLength() == 0;
    if (rec == null) {
      if (! matchesAll) {
        return null;
      }
    } else {
      table = rec.getTokenTable();
    }
    range.propagatePrefixIntoRange(slice.getSlicePosition());
    range.prefix = range.prefix.flatten();
    if (table == null || (matchesAll && rec != null)) {
      QueryNode n = new BlueSteelPostingListQuery(range.prefix, rec);
      return FilteredQueryNode.make(n, range);
    } else {
      return makeMatching(table, range);
    }
  }

  public QueryNode makeMatching(TokenTable table, Range range) {
    ArrayList<QueryNode> nodes = new ArrayList<QueryNode>();
    int top,bottom;
    if (range.minSuffix == null) {
      top = 0;
      range.isMinIncluded = true;
    } else if (range.minSuffix.getLength() > 0) {
      top = range.minSuffix.get(0);
    } else {
      top = 0;
    }
    if (range.maxSuffix == null) {
      bottom = 255;
      range.isMaxIncluded = true;
    } else if (range.maxSuffix.getLength() > 0) {
      bottom = range.maxSuffix.get(0);
    } else { //specified, but is empty (must be a very narrow range!)
      bottom = 0;
    }
    for(Iterator<TokenRec> recItr = table.getTokenRecs(); recItr.hasNext();) {
      TokenRec rec = recItr.next();
      int tok = rec.token & 0xFF;
      if (top <= tok && tok <= bottom) {
        Bytes subPrefix = CompoundBytes.make(range.prefix, ArrayBytes.SINGLE_BYTE_OBJECTS[tok]);
        QueryNode n = new BlueSteelPostingListQuery(subPrefix, rec);
        if (top == tok || tok == bottom) {
          n = FilteredQueryNode.make(n, range);
        }
        if (n != null) nodes.add(n);
      }
    }
    //return OrderedOrQueryNode.make(nodes);
    return FlatOrQueryNode.make(nodes);
  }

  @Override
  public QueryNode getTerms(List<Bytes> terms, long revNum) {
    if (terms.size() == 0) return null;
    Range range = new Range();
    Bytes minTerm = null;
    Bytes maxTerm = null;
    for(Bytes term : terms) {
      if (minTerm == null || minTerm.compareTo(term) > 0) {
        minTerm = term;
      }
      if (maxTerm == null || maxTerm.compareTo(term) < 0) {
        maxTerm = term;
      }
    }
    if (minTerm == maxTerm && minTerm != null) {
      maxTerm = minTerm.copy();
    }
    range.setBoundsAndExtractPrefix(minTerm, maxTerm);
    QueryNode node = getRange(range, revNum);
    if (node == null) return null;
    return FilterLiteralsQueryNode.make(node, terms);
  }
 
  @Override
  public long commitNewRev(Collection<EditRec> edits) {
    if (edits.isEmpty()) return topRevNum;
    Util.logger.log(Level.FINEST, "About to commit "+edits.size()+" term changes");
    nextDocId = BagIndexUtil.assignIds(edits, nextDocId);
    TokenTable root = getRootTokenTable(topRevNum);
    TokenTable newTokenTable = applyEditsToTokenTable(edits, root, policy, termTableDepth-1);
//    assert verifyTokenTable(newTokenTable);
    setNewTokenTable(newTokenTable);
    return topRevNum;
  }
 
  public void setTermTableDepth(int depth) {
    this.termTableDepth = depth;
  }
 
  @Override
  public Map<String,Object> getMetrics(int detailLevel) {
    return Util.literalSMap().p("home", homeDir).p("topRev", topRevNum).p("tbl", getRootTokenTable(topRevNum).getMetrics(detailLevel));
  }
}

TOP

Related Classes of dovetaildb.bagindex.BlueSteelBagIndex$TraversalStack

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.