Package dovetaildb.dbservice

Source Code of dovetaildb.dbservice.DbServiceUtil$OptionsIter

package dovetaildb.dbservice;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import dovetaildb.apiservice.ApiException;
import dovetaildb.bagindex.BagIndex;
import dovetaildb.bagindex.EditRec;
import dovetaildb.bytes.ArrayBytes;
import dovetaildb.bytes.Bytes;
import dovetaildb.bytes.CompoundBytes;
import dovetaildb.iter.Iter;
import dovetaildb.iter.LiteralIter;
import dovetaildb.iter.MergeIter;
import dovetaildb.querynode.AndNotQueryNode;
import dovetaildb.querynode.AndQueryNode;
import dovetaildb.querynode.OrderedOrQueryNode;
import dovetaildb.querynode.QueryNode;
import dovetaildb.querynode.QueryNodeTemplate;
import dovetaildb.querynode.RangeQueryNode;
import dovetaildb.scriptbridge.ScriptFunction;
import dovetaildb.util.Base64;

public class DbServiceUtil {

  public static Object reduceIter(Iter i, ScriptFunction reduceFn) {
    Object[] buffer = new Object[256];
    Object[] params = new Object[256];
    int num = 0;
    int paramsBase = 0;
    while( (num = i.pullAvailable(buffer, true)) != -1) {
      int remaining = 256 - paramsBase;
      int numToCopy = (remaining > num) ? num : remaining;
      System.arraycopy(buffer, 0, params, paramsBase, numToCopy);
      paramsBase += numToCopy;
      if (paramsBase == 256) {
        params[0] = reduceFn.call(params);
        paramsBase = 1;
      }
    }
    return reduceFn.call(Arrays.copyOf(params, paramsBase));
  }
 
  private static class OptionsIter extends WrappingIter {
    int offset, limit;
    ScriptFunction map, filter;
    OptionsIter(Iter subIter, int offset, int limit,
        ScriptFunction map, ScriptFunction filter) {
      super(subIter);
      this.offset = offset;
      this.limit = limit;
      this.map = map;
      this.filter = filter;
    }
    public int pullAvailable(Object[] buffer, boolean block) {
      int ret = subIter.pullAvailable(buffer, block);
      if (ret <= 0) return ret;
      while(offset > 0) {
        if (ret > offset) {
          ret -= offset;
          offset = 0;
          System.arraycopy(buffer, offset, buffer, 0, ret);
          break;
        }
        offset -= ret;
        ret = subIter.pullAvailable(buffer, block);
      }
      if (ret > limit) {
        ret = limit;
        limit = 0;
      } else {
        limit -= ret;
      }
      if (map != null) {
        Object[] params = new Object[]{null};
        for(int i=0; i<ret; i++) {
          params[0] = buffer[i];
          buffer[i] = map.call(params);
        }
        return ret;
      }
      if (filter != null) {
        int toIndex = 0;
        Object[] params = new Object[]{null};
        for(int fromIndex=0; fromIndex < ret; fromIndex++) {
          if (((Boolean)filter.call(params)).booleanValue()) {
            buffer[toIndex++] = buffer[fromIndex];
          }
        }
        ret = toIndex;
      }
      return ret;
    }
    public void adjustMax(String name, Object newMax, boolean isExclusive) {
      subIter.adjustMax(name, newMax, isExclusive);
     
    }
    public void adjustMin(String name, Object newMin, boolean isExclusive) {
      subIter.adjustMin(name, newMin, isExclusive);
    }
  }

  /**
   * Handles "map", "filter", "reduce", "offset", and "limit"
   * @param i
   * @param options
   * @return
   */
  public static Iter applyPostprecessingOptions(Iter i, Map<String,Object> options) {
    if (options == null) return i;
    int offset = 0;
    int limit = Integer.MAX_VALUE;
    ScriptFunction mapFn    = (ScriptFunction) options.get("map");
    ScriptFunction filterFn = (ScriptFunction) options.get("filter");
    i = new OptionsIter(i, offset, limit, mapFn, filterFn);
    ScriptFunction reduceFn = (ScriptFunction) options.get("reduce");
    if (reduceFn != null) {
      Object result = reduceIter(i, reduceFn);
      i = new LiteralIter(new Object[]{result});
    }
    return i;
  }

  // map,reduce,sort,offset,limit,bookmark,diversity
  public static Iter mergeItersUsingOptions(Map<String, Object> options, List<Iter> subIters) {
    return new MergeIter(subIters);
  }

 
  public static final String OP_AND = "&";
  public static final String OP_OR  = "|";
  public static final String OP_NOT = "!";
  public static final String OP_ANY = "*";
  public static final String OP_AS  = "$";
  public static final String OP_LT  = "<";
  public static final String OP_GT  = ">";
  public static final String OP_LE  = "<=";
  public static final String OP_GE  = ">=";
  public static final String OP_RG_EE = "()";
  public static final String OP_RG_EI = "(]";
  public static final String OP_RG_IE = "[)";
  public static final String OP_RG_II = "[]";
 
  /*
   * Got the hash code literals from jython like so:
   * >>> import java
   * >>> for s in ['&','|','!','*','$','<','>','<=','>=','()','(]','[)','[]']: print java.lang.String(s).hashCode()
   * ...  
   */
  public static final int OP_HASH_AND = 38;
  public static final int OP_HASH_OR  = 124;
  public static final int OP_HASH_NOT = 33;
  public static final int OP_HASH_ANY = 42;
  public static final int OP_HASH_AS  = 36;
  public static final int OP_HASH_LT  = 60;
  public static final int OP_HASH_GT  = 62;
  public static final int OP_HASH_LE  = 1921;
  public static final int OP_HASH_GE  = 1983;
  public static final int OP_HASH_BETWEEN_EE = 1281;
  public static final int OP_HASH_BETWEEN_EI = 1333;
  public static final int OP_HASH_BETWEEN_IE = 2862;
  public static final int OP_HASH_BETWEEN_II = 2914;

  public static final HashSet<Integer> SYMBOLS = new HashSet<Integer>();
  static {
    SYMBOLS.add(OP_HASH_AND);
    SYMBOLS.add(OP_HASH_OR);
    SYMBOLS.add(OP_HASH_NOT);
    SYMBOLS.add(OP_HASH_ANY);
    SYMBOLS.add(OP_HASH_AS);
    SYMBOLS.add(OP_HASH_LT);
    SYMBOLS.add(OP_HASH_GT);
    SYMBOLS.add(OP_HASH_LE);
    SYMBOLS.add(OP_HASH_GE);
    SYMBOLS.add(OP_HASH_BETWEEN_EE);
    SYMBOLS.add(OP_HASH_BETWEEN_EI);
    SYMBOLS.add(OP_HASH_BETWEEN_IE);
    SYMBOLS.add(OP_HASH_BETWEEN_II);
  }
 
  public static final ArrayBytes HEADER_BYTE_S = new ArrayBytes(new byte[]{'s'});
  public static final ArrayBytes HEADER_BYTE_L = new ArrayBytes(new byte[]{'l'});
  public static final ArrayBytes HEADER_BYTE_T = new ArrayBytes(new byte[]{'t'});
  public static final ArrayBytes HEADER_BYTE_F = new ArrayBytes(new byte[]{'f'});
  public static final ArrayBytes HEADER_BYTE_COLON = new ArrayBytes(new byte[]{':'});
  public static final ArrayBytes HEADER_BYTE_LISTOPEN = new ArrayBytes(new byte[]{'['});
  public static final ArrayBytes HEADER_BYTE_MAPOPEN  = new ArrayBytes(new byte[]{'{'});

  public static Bytes sencodeMapKey(String key) {
    try {
      return new ArrayBytes(((String)key).getBytes("utf-8"));
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
  }
 
  public static void sencodeMulti(Bytes prefix, Object val, ArrayList<EditRec> buffer, long docId, boolean idDel) {
    if (val instanceof Map) {
      Map map = (Map)val;
      prefix = new CompoundBytes(prefix, HEADER_BYTE_MAPOPEN);
      buffer.add(new EditRec(docId, prefix, idDel));
      for(Object entryObj : map.entrySet()) {
        Map.Entry entry = (Map.Entry) entryObj;
        String key = (String)entry.getKey();
        Bytes sub = new CompoundBytes(prefix, sencodeMapKey(key));
        sub = new CompoundBytes(sub, HEADER_BYTE_COLON);
        sencodeMulti(sub, entry.getValue(), buffer, docId, idDel);
      }
    } else if (val instanceof List) {
      List list = (List)val;
      prefix = new CompoundBytes(prefix, HEADER_BYTE_LISTOPEN);
      buffer.add(new EditRec(docId, prefix, idDel));
      for(Object subVal : list) {
        sencodeMulti(prefix, subVal, buffer, docId, idDel);
      }
    } else {
      buffer.add(new EditRec(docId, new CompoundBytes(prefix,sencode(val)), false));
    }
  }
 
  public static Bytes sencode(Object val) {
    if (val instanceof Number) {
      long bits = Double.doubleToLongBits(((Number)val).doubleValue());
      // Invert the negation flag itself to put positives above negatives:
      if ((bits & 0x8000000000000000L) != 0) {
        // if it's a negative, invert the other bits so that a bytewise
        // lexiographic sort puts big negatives below small negatives
        bits ^= 0xFFFFFFFFFFFFFFFFL;
      } else {
        bits ^= 0x8000000000000000L;
      }
      return new ArrayBytes(new byte[] {
          'n',
          (byte)((bits >>> 8 * 7) & 0xFF),
          (byte)((bits >>> 8 * 6) & 0xFF),
          (byte)((bits >>> 8 * 5) & 0xFF),
          (byte)((bits >>> 8 * 4) & 0xFF),
          (byte)((bits >>> 8 * 3) & 0xFF),
          (byte)((bits >>> 8 * 2) & 0xFF),
          (byte)((bits >>> 8 * 1) & 0xFF),
          (byte)((bits) & 0xFF)});
    } else if (val instanceof String) {
      try {
        Bytes valBytes = new ArrayBytes(((String)val).getBytes("utf-8"));
        return new CompoundBytes(HEADER_BYTE_S, valBytes);
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
      }
    } else if (val == null) {
      return HEADER_BYTE_L;
    } else if (val instanceof Boolean) {
      if (((Boolean)val).booleanValue()) return HEADER_BYTE_T;
      else return HEADER_BYTE_F;
    } else {
      throw new ApiException("UnencodableValue","Result of type \""+val.getClass().getName()+"\" cannot be encoded in JSON (must be a String, Number, Boolean, HashMap, or ArrayList)");
    }
  }

  public static QueryNodeTemplate applyPatternToBagIndex(Object pattern, BagIndex index, long revNum) {
    QueryNodeTemplate templ = applyPatternToBagIndex(ArrayBytes.EMPTY_BYTES, pattern, index, revNum);
    if (templ.varMappings.isEmpty()) {
      templ.varMappings.put("", index.getRange(ArrayBytes.EMPTY_BYTES, null, null, false, false, revNum));
    }
    return templ;
  }
  public static QueryNodeTemplate applyPatternToBagIndex(Bytes prefix, Object pattern, BagIndex index, long revNum) {
    Map<String, QueryNode> vars = new HashMap<String, QueryNode>();
    QueryNode queryNode;
    if (pattern instanceof Map) {
      Map map = (Map)pattern;
      prefix = new CompoundBytes(prefix, HEADER_BYTE_MAPOPEN);
      ArrayList<QueryNode> nodes = new ArrayList<QueryNode>();
      for(Object entryObj : map.entrySet()) {
        Map.Entry entry = (Map.Entry) entryObj;
        String key = (String)entry.getKey();
        Object value = entry.getValue();
        Bytes curPrefix = new CompoundBytes(prefix, sencodeMapKey(key));
        curPrefix = new CompoundBytes(curPrefix, HEADER_BYTE_COLON);
        QueryNodeTemplate templ = applyPatternToBagIndex(curPrefix, value, index, revNum);
        if (templ.queryNode != null) {
          nodes.add(templ.queryNode);
          vars.putAll(templ.varMappings);
        }
      }
      if (nodes.isEmpty()) {
        queryNode = index.getTerm(prefix, revNum);
      } else {
        queryNode = AndQueryNode.make(nodes);
      }
    } else if (pattern instanceof List) {
      List list = (List)pattern;
      if (list.size() > 0 && SYMBOLS.contains(list.get(0).hashCode())) {
        return applyQueryToBagIndex(prefix, list, index, revNum);
      } else if (list.size() > 1) {
        throw new RuntimeException("malformed list structure in query: "+pattern);
      } else {
        prefix = new CompoundBytes(prefix, HEADER_BYTE_LISTOPEN);
        if (list.isEmpty()) {
          queryNode = index.getTerm(prefix, revNum);
        } else {
          QueryNodeTemplate templ = applyPatternToBagIndex(prefix, list.get(0), index, revNum);
          queryNode = templ.queryNode;
          vars.putAll(templ.varMappings);
        }
      }
    } else {
      queryNode = index.getTerm(new CompoundBytes(prefix,sencode(pattern)), revNum);
    }
    return new QueryNodeTemplate(queryNode, vars);
  }

  public static QueryNodeTemplate applyQueryToBagIndex(Bytes prefix, List query, BagIndex index, long revNum) {
    Map<String, QueryNode> vars = new HashMap<String, QueryNode>();
    QueryNode queryNode;
    ArrayList<QueryNode> clauses;
    if (query == null) {
      queryNode = index.getRange(ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, false, false, revNum);
    } else {
      int opHash = query.get(0).hashCode();
      int numArgs = query.size();
      switch(opHash) {
      case DbServiceUtil.OP_HASH_AS:
        queryNode = index.getRange(prefix, null, null, false, false, revNum);
        if (query.size() > 2) {
          throw new RuntimeException("Not yet supported");
//          QueryNodeTemplate subNode=applyQueryToBagIndex(prefix, (List)query.get(2), index, revNum);
//          vars.put((String)query.get(1), ExternalTermQueryNode.make(subNode.queryNode, queryNode));
//          queryNode = subNode.queryNode;
//          vars = subNode.varMappings;
        } else {
          vars.put((String)query.get(1), queryNode);
        }
        break;
      case DbServiceUtil.OP_HASH_OR:
      case DbServiceUtil.OP_HASH_AND:
        clauses = new ArrayList<QueryNode>(numArgs-1);
        for(int i=1; i<numArgs; i++) {
          QueryNodeTemplate node=applyPatternToBagIndex(prefix, query.get(i), index, revNum);
          if (node.queryNode != null) clauses.add(node.queryNode);
          vars.putAll(node.varMappings);
        }
        if (opHash == DbServiceUtil.OP_HASH_OR) {
          queryNode = new OrderedOrQueryNode(clauses, null, null, null, false, false);
        } else {
          queryNode = AndQueryNode.make(clauses);
        }
        break;
      case DbServiceUtil.OP_HASH_NOT:
        QueryNode matchesSoFar = index.getRange(prefix, null, null, false, false, revNum);
        clauses = new ArrayList<QueryNode>(numArgs-1);
        for(int i=1; i<numArgs; i++) {
          QueryNodeTemplate node=applyPatternToBagIndex(prefix, query.get(i), index, revNum);
          if (node.queryNode != null) clauses.add(node.queryNode);
        }
        QueryNode negativeMatches = OrderedOrQueryNode.make(clauses);
        queryNode = AndNotQueryNode.make(matchesSoFar, negativeMatches);
        break;
      default:
        Bytes term1 = null;
        Bytes term2 = null;
        boolean isExclusive1 = false;
        boolean isExclusive2 = false;
        switch(opHash) {
        case DbServiceUtil.OP_HASH_AS:
        case DbServiceUtil.OP_HASH_ANY:
          break;
        case DbServiceUtil.OP_HASH_GT:
          isExclusive1 = true;
        case DbServiceUtil.OP_HASH_GE:
          term1 = sencode(query.get(1));
          break;
        case DbServiceUtil.OP_HASH_LT:
          isExclusive2 = true;
        case DbServiceUtil.OP_HASH_LE:
          term2 = sencode(query.get(1));
          break;
        case DbServiceUtil.OP_HASH_BETWEEN_EE:
          isExclusive2 = true;
        case DbServiceUtil.OP_HASH_BETWEEN_EI:
          isExclusive1 = true;
        case DbServiceUtil.OP_HASH_BETWEEN_II:
          term1 = sencode(query.get(1));
          term2 = sencode(query.get(2));
          break;
        case DbServiceUtil.OP_HASH_BETWEEN_IE:
          isExclusive2 = true;
          term1 = sencode(query.get(1));
          term2 = sencode(query.get(2));
        default:
          throw new ApiException("QueryFormatError", "Unknown query operator: \""+query.get(0)+"\"");
        }
        queryNode = index.getRange(prefix, term1, term2, isExclusive1, isExclusive2, revNum);
      }
    }
    return new QueryNodeTemplate(queryNode, vars);
  }

  /*
  public static boolean applyPatternToObject(Object pattern, Object obj) {
    if (pattern instanceof Map) {
      if (!(obj instanceof Map)) return false;
      Map patternMap = (Map)pattern;
      Map objMap = (Map)obj;
      if (! objMap.keySet().containsAll(patternMap.keySet())) return false;
      for(Object patternEntryObj : patternMap.entrySet()) {
        Map.Entry entry = (Map.Entry) patternEntryObj;
        String key = (String)entry.getKey();
        Object subPattern = entry.getValue();
        if (! applyPatternToObject(subPattern, objMap.get(key))) return false;
      }
      return true;
    } else if (pattern instanceof List) {
      List list = (List)pattern;
      if (list.size() > 0 && SYMBOLS.contains(list.get(0).hashCode())) {
        return applyQueryToObject(list, obj);
      } else if (list.size() > 1) {
        throw new RuntimeException("malformed list structure in query: "+pattern);
      } else {
        if (!(obj instanceof List)) return false;
        if (list.isEmpty()) return true;
        Object subPattern = list.get(0);
        List listObj = (List)obj;
        boolean matches = false;
        for(Object subObj : listObj) {
          if (applyPatternToObject(subPattern, subObj)) {
            matches = true;
            break;
          }
        }
        return matches;
      }
    } else {
//      queryNode = index.getTerm(new CompoundBytes(prefix,sencode(pattern)), revNum);
    }
  }
*/

  // last parameter is scoring (and is optional)
  // default atomic constraint score->1, "AND"/"OR"->sum (even distribution)
  // bounds propagation:
  //  (bound)->min->(bound to every clause)
  //  (bound)->sum->(bound to every clause according to bound-(sum of other clause weights))
  //  (bound)->max->(no additional bounding)
  // Note that you can use bounds not only to drop entire posting lists but to find occurance combinations which are not productive
  //   or to think of it differently, the logic for traversing an "AND"/"OR" clause could take the score under consideration and artifically bump the doc_id before checking every clause
  // grouping is done elsewhere
 
  // primitives:
  // RANGE X Y X_EXCL Y_EXCL
  // EXCLUSIVE_MIN X
  // EXCLUSIVE_MAX X
  // MIN X
  // MAX X
  // NOT C

  // Sorting is done first with double values, then with an optional user-specified comparator, then by ID
  // These 3 values also constitute a bookmark
 
  // ["AND", [
  //          ["HAS_OTHER_THAN", "name", ["phil"]],   (default score is one)
  //          ["MIN", "age", 14, ["SCORE_LINEAR_INTERPOLATE",[14,30,70],[0.0,0.8,1.0]]]
  //         ], ["SCORE_SUM", 3, 4] ]

  // Overreliance on JSON?  Really, it's just a store that groups a bunch of term strings together to make an object:
  //   (has age/)
  //   (hasany type/person, type/place, type/org)
  //   (min age/ 12)
  //   (range <prefix> <min> <max>)
  // or can i get away with finding some way to do a mapping?
  // A value indicated by path (numeric keys indicate a list, string keys a dict)
  // How do I indicate an empty list or dict? (I can't!)
  // but then i can't range search multivalue fields, right? type/*/
 
  // DDB core: dump sets of byte strings as documents into bags;
  // query using range(fieldname, prefix, minSuffix(optional), maxSuffix(optional))
  // complex query using some query syntax:
  //   ["AND", ["RANGE_MIN", "age", to_bin(14)], ["NOT", ["IN", "name", ["phil"]]]]

  // fulltext handling: term/rockies
  // What's the API? 
  //  - ddb.insert(["",..]), ddb.update(<conjunctive term list>,["",..]), ddb.remove([<op>,<arg>,..]), ddb.query([<op>,<arg>,..])
  //  - low level: commit([docid1,..], [["",..],..])
  //    fetchRange(revNum, t1, t2, excl1, excl2, [k1,k2],[0.1,0.5])  During linear interpolation, all keys must be of the same length 
  //
  // How does this integrate with scripting?  Are there commit hooks? 
  // Scoring/sorting? reduce?
 
 
 
}
TOP

Related Classes of dovetaildb.dbservice.DbServiceUtil$OptionsIter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.