package dovetaildb.dbservice;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import dovetaildb.apiservice.ApiException;
import dovetaildb.bagindex.BagIndex;
import dovetaildb.bagindex.EditRec;
import dovetaildb.bytes.ArrayBytes;
import dovetaildb.bytes.Bytes;
import dovetaildb.bytes.CompoundBytes;
import dovetaildb.iter.Iter;
import dovetaildb.iter.LiteralIter;
import dovetaildb.iter.MergeIter;
import dovetaildb.querynode.AndNotQueryNode;
import dovetaildb.querynode.AndQueryNode;
import dovetaildb.querynode.OrderedOrQueryNode;
import dovetaildb.querynode.QueryNode;
import dovetaildb.querynode.QueryNodeTemplate;
import dovetaildb.querynode.RangeQueryNode;
import dovetaildb.scriptbridge.ScriptFunction;
import dovetaildb.util.Base64;
public class DbServiceUtil {
public static Object reduceIter(Iter i, ScriptFunction reduceFn) {
Object[] buffer = new Object[256];
Object[] params = new Object[256];
int num = 0;
int paramsBase = 0;
while( (num = i.pullAvailable(buffer, true)) != -1) {
int remaining = 256 - paramsBase;
int numToCopy = (remaining > num) ? num : remaining;
System.arraycopy(buffer, 0, params, paramsBase, numToCopy);
paramsBase += numToCopy;
if (paramsBase == 256) {
params[0] = reduceFn.call(params);
paramsBase = 1;
}
}
return reduceFn.call(Arrays.copyOf(params, paramsBase));
}
private static class OptionsIter extends WrappingIter {
int offset, limit;
ScriptFunction map, filter;
OptionsIter(Iter subIter, int offset, int limit,
ScriptFunction map, ScriptFunction filter) {
super(subIter);
this.offset = offset;
this.limit = limit;
this.map = map;
this.filter = filter;
}
public int pullAvailable(Object[] buffer, boolean block) {
int ret = subIter.pullAvailable(buffer, block);
if (ret <= 0) return ret;
while(offset > 0) {
if (ret > offset) {
ret -= offset;
offset = 0;
System.arraycopy(buffer, offset, buffer, 0, ret);
break;
}
offset -= ret;
ret = subIter.pullAvailable(buffer, block);
}
if (ret > limit) {
ret = limit;
limit = 0;
} else {
limit -= ret;
}
if (map != null) {
Object[] params = new Object[]{null};
for(int i=0; i<ret; i++) {
params[0] = buffer[i];
buffer[i] = map.call(params);
}
return ret;
}
if (filter != null) {
int toIndex = 0;
Object[] params = new Object[]{null};
for(int fromIndex=0; fromIndex < ret; fromIndex++) {
if (((Boolean)filter.call(params)).booleanValue()) {
buffer[toIndex++] = buffer[fromIndex];
}
}
ret = toIndex;
}
return ret;
}
public void adjustMax(String name, Object newMax, boolean isExclusive) {
subIter.adjustMax(name, newMax, isExclusive);
}
public void adjustMin(String name, Object newMin, boolean isExclusive) {
subIter.adjustMin(name, newMin, isExclusive);
}
}
/**
* Handles "map", "filter", "reduce", "offset", and "limit"
* @param i
* @param options
* @return
*/
public static Iter applyPostprecessingOptions(Iter i, Map<String,Object> options) {
if (options == null) return i;
int offset = 0;
int limit = Integer.MAX_VALUE;
ScriptFunction mapFn = (ScriptFunction) options.get("map");
ScriptFunction filterFn = (ScriptFunction) options.get("filter");
i = new OptionsIter(i, offset, limit, mapFn, filterFn);
ScriptFunction reduceFn = (ScriptFunction) options.get("reduce");
if (reduceFn != null) {
Object result = reduceIter(i, reduceFn);
i = new LiteralIter(new Object[]{result});
}
return i;
}
// map,reduce,sort,offset,limit,bookmark,diversity
public static Iter mergeItersUsingOptions(Map<String, Object> options, List<Iter> subIters) {
return new MergeIter(subIters);
}
public static final String OP_AND = "&";
public static final String OP_OR = "|";
public static final String OP_NOT = "!";
public static final String OP_ANY = "*";
public static final String OP_AS = "$";
public static final String OP_LT = "<";
public static final String OP_GT = ">";
public static final String OP_LE = "<=";
public static final String OP_GE = ">=";
public static final String OP_RG_EE = "()";
public static final String OP_RG_EI = "(]";
public static final String OP_RG_IE = "[)";
public static final String OP_RG_II = "[]";
/*
* Got the hash code literals from jython like so:
* >>> import java
* >>> for s in ['&','|','!','*','$','<','>','<=','>=','()','(]','[)','[]']: print java.lang.String(s).hashCode()
* ...
*/
public static final int OP_HASH_AND = 38;
public static final int OP_HASH_OR = 124;
public static final int OP_HASH_NOT = 33;
public static final int OP_HASH_ANY = 42;
public static final int OP_HASH_AS = 36;
public static final int OP_HASH_LT = 60;
public static final int OP_HASH_GT = 62;
public static final int OP_HASH_LE = 1921;
public static final int OP_HASH_GE = 1983;
public static final int OP_HASH_BETWEEN_EE = 1281;
public static final int OP_HASH_BETWEEN_EI = 1333;
public static final int OP_HASH_BETWEEN_IE = 2862;
public static final int OP_HASH_BETWEEN_II = 2914;
public static final HashSet<Integer> SYMBOLS = new HashSet<Integer>();
static {
SYMBOLS.add(OP_HASH_AND);
SYMBOLS.add(OP_HASH_OR);
SYMBOLS.add(OP_HASH_NOT);
SYMBOLS.add(OP_HASH_ANY);
SYMBOLS.add(OP_HASH_AS);
SYMBOLS.add(OP_HASH_LT);
SYMBOLS.add(OP_HASH_GT);
SYMBOLS.add(OP_HASH_LE);
SYMBOLS.add(OP_HASH_GE);
SYMBOLS.add(OP_HASH_BETWEEN_EE);
SYMBOLS.add(OP_HASH_BETWEEN_EI);
SYMBOLS.add(OP_HASH_BETWEEN_IE);
SYMBOLS.add(OP_HASH_BETWEEN_II);
}
public static final ArrayBytes HEADER_BYTE_S = new ArrayBytes(new byte[]{'s'});
public static final ArrayBytes HEADER_BYTE_L = new ArrayBytes(new byte[]{'l'});
public static final ArrayBytes HEADER_BYTE_T = new ArrayBytes(new byte[]{'t'});
public static final ArrayBytes HEADER_BYTE_F = new ArrayBytes(new byte[]{'f'});
public static final ArrayBytes HEADER_BYTE_COLON = new ArrayBytes(new byte[]{':'});
public static final ArrayBytes HEADER_BYTE_LISTOPEN = new ArrayBytes(new byte[]{'['});
public static final ArrayBytes HEADER_BYTE_MAPOPEN = new ArrayBytes(new byte[]{'{'});
public static Bytes sencodeMapKey(String key) {
try {
return new ArrayBytes(((String)key).getBytes("utf-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public static void sencodeMulti(Bytes prefix, Object val, ArrayList<EditRec> buffer, long docId, boolean idDel) {
if (val instanceof Map) {
Map map = (Map)val;
prefix = new CompoundBytes(prefix, HEADER_BYTE_MAPOPEN);
buffer.add(new EditRec(docId, prefix, idDel));
for(Object entryObj : map.entrySet()) {
Map.Entry entry = (Map.Entry) entryObj;
String key = (String)entry.getKey();
Bytes sub = new CompoundBytes(prefix, sencodeMapKey(key));
sub = new CompoundBytes(sub, HEADER_BYTE_COLON);
sencodeMulti(sub, entry.getValue(), buffer, docId, idDel);
}
} else if (val instanceof List) {
List list = (List)val;
prefix = new CompoundBytes(prefix, HEADER_BYTE_LISTOPEN);
buffer.add(new EditRec(docId, prefix, idDel));
for(Object subVal : list) {
sencodeMulti(prefix, subVal, buffer, docId, idDel);
}
} else {
buffer.add(new EditRec(docId, new CompoundBytes(prefix,sencode(val)), false));
}
}
public static Bytes sencode(Object val) {
if (val instanceof Number) {
long bits = Double.doubleToLongBits(((Number)val).doubleValue());
// Invert the negation flag itself to put positives above negatives:
if ((bits & 0x8000000000000000L) != 0) {
// if it's a negative, invert the other bits so that a bytewise
// lexiographic sort puts big negatives below small negatives
bits ^= 0xFFFFFFFFFFFFFFFFL;
} else {
bits ^= 0x8000000000000000L;
}
return new ArrayBytes(new byte[] {
'n',
(byte)((bits >>> 8 * 7) & 0xFF),
(byte)((bits >>> 8 * 6) & 0xFF),
(byte)((bits >>> 8 * 5) & 0xFF),
(byte)((bits >>> 8 * 4) & 0xFF),
(byte)((bits >>> 8 * 3) & 0xFF),
(byte)((bits >>> 8 * 2) & 0xFF),
(byte)((bits >>> 8 * 1) & 0xFF),
(byte)((bits) & 0xFF)});
} else if (val instanceof String) {
try {
Bytes valBytes = new ArrayBytes(((String)val).getBytes("utf-8"));
return new CompoundBytes(HEADER_BYTE_S, valBytes);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
} else if (val == null) {
return HEADER_BYTE_L;
} else if (val instanceof Boolean) {
if (((Boolean)val).booleanValue()) return HEADER_BYTE_T;
else return HEADER_BYTE_F;
} else {
throw new ApiException("UnencodableValue","Result of type \""+val.getClass().getName()+"\" cannot be encoded in JSON (must be a String, Number, Boolean, HashMap, or ArrayList)");
}
}
public static QueryNodeTemplate applyPatternToBagIndex(Object pattern, BagIndex index, long revNum) {
QueryNodeTemplate templ = applyPatternToBagIndex(ArrayBytes.EMPTY_BYTES, pattern, index, revNum);
if (templ.varMappings.isEmpty()) {
templ.varMappings.put("", index.getRange(ArrayBytes.EMPTY_BYTES, null, null, false, false, revNum));
}
return templ;
}
public static QueryNodeTemplate applyPatternToBagIndex(Bytes prefix, Object pattern, BagIndex index, long revNum) {
Map<String, QueryNode> vars = new HashMap<String, QueryNode>();
QueryNode queryNode;
if (pattern instanceof Map) {
Map map = (Map)pattern;
prefix = new CompoundBytes(prefix, HEADER_BYTE_MAPOPEN);
ArrayList<QueryNode> nodes = new ArrayList<QueryNode>();
for(Object entryObj : map.entrySet()) {
Map.Entry entry = (Map.Entry) entryObj;
String key = (String)entry.getKey();
Object value = entry.getValue();
Bytes curPrefix = new CompoundBytes(prefix, sencodeMapKey(key));
curPrefix = new CompoundBytes(curPrefix, HEADER_BYTE_COLON);
QueryNodeTemplate templ = applyPatternToBagIndex(curPrefix, value, index, revNum);
if (templ.queryNode != null) {
nodes.add(templ.queryNode);
vars.putAll(templ.varMappings);
}
}
if (nodes.isEmpty()) {
queryNode = index.getTerm(prefix, revNum);
} else {
queryNode = AndQueryNode.make(nodes);
}
} else if (pattern instanceof List) {
List list = (List)pattern;
if (list.size() > 0 && SYMBOLS.contains(list.get(0).hashCode())) {
return applyQueryToBagIndex(prefix, list, index, revNum);
} else if (list.size() > 1) {
throw new RuntimeException("malformed list structure in query: "+pattern);
} else {
prefix = new CompoundBytes(prefix, HEADER_BYTE_LISTOPEN);
if (list.isEmpty()) {
queryNode = index.getTerm(prefix, revNum);
} else {
QueryNodeTemplate templ = applyPatternToBagIndex(prefix, list.get(0), index, revNum);
queryNode = templ.queryNode;
vars.putAll(templ.varMappings);
}
}
} else {
queryNode = index.getTerm(new CompoundBytes(prefix,sencode(pattern)), revNum);
}
return new QueryNodeTemplate(queryNode, vars);
}
public static QueryNodeTemplate applyQueryToBagIndex(Bytes prefix, List query, BagIndex index, long revNum) {
Map<String, QueryNode> vars = new HashMap<String, QueryNode>();
QueryNode queryNode;
ArrayList<QueryNode> clauses;
if (query == null) {
queryNode = index.getRange(ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, false, false, revNum);
} else {
int opHash = query.get(0).hashCode();
int numArgs = query.size();
switch(opHash) {
case DbServiceUtil.OP_HASH_AS:
queryNode = index.getRange(prefix, null, null, false, false, revNum);
if (query.size() > 2) {
throw new RuntimeException("Not yet supported");
// QueryNodeTemplate subNode=applyQueryToBagIndex(prefix, (List)query.get(2), index, revNum);
// vars.put((String)query.get(1), ExternalTermQueryNode.make(subNode.queryNode, queryNode));
// queryNode = subNode.queryNode;
// vars = subNode.varMappings;
} else {
vars.put((String)query.get(1), queryNode);
}
break;
case DbServiceUtil.OP_HASH_OR:
case DbServiceUtil.OP_HASH_AND:
clauses = new ArrayList<QueryNode>(numArgs-1);
for(int i=1; i<numArgs; i++) {
QueryNodeTemplate node=applyPatternToBagIndex(prefix, query.get(i), index, revNum);
if (node.queryNode != null) clauses.add(node.queryNode);
vars.putAll(node.varMappings);
}
if (opHash == DbServiceUtil.OP_HASH_OR) {
queryNode = new OrderedOrQueryNode(clauses, null, null, null, false, false);
} else {
queryNode = AndQueryNode.make(clauses);
}
break;
case DbServiceUtil.OP_HASH_NOT:
QueryNode matchesSoFar = index.getRange(prefix, null, null, false, false, revNum);
clauses = new ArrayList<QueryNode>(numArgs-1);
for(int i=1; i<numArgs; i++) {
QueryNodeTemplate node=applyPatternToBagIndex(prefix, query.get(i), index, revNum);
if (node.queryNode != null) clauses.add(node.queryNode);
}
QueryNode negativeMatches = OrderedOrQueryNode.make(clauses);
queryNode = AndNotQueryNode.make(matchesSoFar, negativeMatches);
break;
default:
Bytes term1 = null;
Bytes term2 = null;
boolean isExclusive1 = false;
boolean isExclusive2 = false;
switch(opHash) {
case DbServiceUtil.OP_HASH_AS:
case DbServiceUtil.OP_HASH_ANY:
break;
case DbServiceUtil.OP_HASH_GT:
isExclusive1 = true;
case DbServiceUtil.OP_HASH_GE:
term1 = sencode(query.get(1));
break;
case DbServiceUtil.OP_HASH_LT:
isExclusive2 = true;
case DbServiceUtil.OP_HASH_LE:
term2 = sencode(query.get(1));
break;
case DbServiceUtil.OP_HASH_BETWEEN_EE:
isExclusive2 = true;
case DbServiceUtil.OP_HASH_BETWEEN_EI:
isExclusive1 = true;
case DbServiceUtil.OP_HASH_BETWEEN_II:
term1 = sencode(query.get(1));
term2 = sencode(query.get(2));
break;
case DbServiceUtil.OP_HASH_BETWEEN_IE:
isExclusive2 = true;
term1 = sencode(query.get(1));
term2 = sencode(query.get(2));
default:
throw new ApiException("QueryFormatError", "Unknown query operator: \""+query.get(0)+"\"");
}
queryNode = index.getRange(prefix, term1, term2, isExclusive1, isExclusive2, revNum);
}
}
return new QueryNodeTemplate(queryNode, vars);
}
/*
public static boolean applyPatternToObject(Object pattern, Object obj) {
if (pattern instanceof Map) {
if (!(obj instanceof Map)) return false;
Map patternMap = (Map)pattern;
Map objMap = (Map)obj;
if (! objMap.keySet().containsAll(patternMap.keySet())) return false;
for(Object patternEntryObj : patternMap.entrySet()) {
Map.Entry entry = (Map.Entry) patternEntryObj;
String key = (String)entry.getKey();
Object subPattern = entry.getValue();
if (! applyPatternToObject(subPattern, objMap.get(key))) return false;
}
return true;
} else if (pattern instanceof List) {
List list = (List)pattern;
if (list.size() > 0 && SYMBOLS.contains(list.get(0).hashCode())) {
return applyQueryToObject(list, obj);
} else if (list.size() > 1) {
throw new RuntimeException("malformed list structure in query: "+pattern);
} else {
if (!(obj instanceof List)) return false;
if (list.isEmpty()) return true;
Object subPattern = list.get(0);
List listObj = (List)obj;
boolean matches = false;
for(Object subObj : listObj) {
if (applyPatternToObject(subPattern, subObj)) {
matches = true;
break;
}
}
return matches;
}
} else {
// queryNode = index.getTerm(new CompoundBytes(prefix,sencode(pattern)), revNum);
}
}
*/
// last parameter is scoring (and is optional)
// default atomic constraint score->1, "AND"/"OR"->sum (even distribution)
// bounds propagation:
// (bound)->min->(bound to every clause)
// (bound)->sum->(bound to every clause according to bound-(sum of other clause weights))
// (bound)->max->(no additional bounding)
// Note that you can use bounds not only to drop entire posting lists but to find occurance combinations which are not productive
// or to think of it differently, the logic for traversing an "AND"/"OR" clause could take the score under consideration and artifically bump the doc_id before checking every clause
// grouping is done elsewhere
// primitives:
// RANGE X Y X_EXCL Y_EXCL
// EXCLUSIVE_MIN X
// EXCLUSIVE_MAX X
// MIN X
// MAX X
// NOT C
// Sorting is done first with double values, then with an optional user-specified comparator, then by ID
// These 3 values also constitute a bookmark
// ["AND", [
// ["HAS_OTHER_THAN", "name", ["phil"]], (default score is one)
// ["MIN", "age", 14, ["SCORE_LINEAR_INTERPOLATE",[14,30,70],[0.0,0.8,1.0]]]
// ], ["SCORE_SUM", 3, 4] ]
// Overreliance on JSON? Really, it's just a store that groups a bunch of term strings together to make an object:
// (has age/)
// (hasany type/person, type/place, type/org)
// (min age/ 12)
// (range <prefix> <min> <max>)
// or can i get away with finding some way to do a mapping?
// A value indicated by path (numeric keys indicate a list, string keys a dict)
// How do I indicate an empty list or dict? (I can't!)
// but then i can't range search multivalue fields, right? type/*/
// DDB core: dump sets of byte strings as documents into bags;
// query using range(fieldname, prefix, minSuffix(optional), maxSuffix(optional))
// complex query using some query syntax:
// ["AND", ["RANGE_MIN", "age", to_bin(14)], ["NOT", ["IN", "name", ["phil"]]]]
// fulltext handling: term/rockies
// What's the API?
// - ddb.insert(["",..]), ddb.update(<conjunctive term list>,["",..]), ddb.remove([<op>,<arg>,..]), ddb.query([<op>,<arg>,..])
// - low level: commit([docid1,..], [["",..],..])
// fetchRange(revNum, t1, t2, excl1, excl2, [k1,k2],[0.1,0.5]) During linear interpolation, all keys must be of the same length
//
// How does this integrate with scripting? Are there commit hooks?
// Scoring/sorting? reduce?
}