package dovetaildb.dbservice;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import dovetaildb.apiservice.ApiException;
import dovetaildb.bagindex.BagIndex;
import dovetaildb.bagindex.BagIndexUtil;
import dovetaildb.bytes.ArrayBytes;
import dovetaildb.iter.Iter;
import dovetaildb.iter.LiteralIter;
import dovetaildb.iter.MergeIter;
import dovetaildb.querynode.AndQueryNode;
import dovetaildb.querynode.OrderedOrQueryNode;
import dovetaildb.querynode.QueryNode;
import dovetaildb.scan.Scanner;
import dovetaildb.scan.ScannerUtil;
import dovetaildb.scriptbridge.ScriptFunction;
public class CopyOfDbServiceUtilDelme {
public static Object reduceIter(Iter i, ScriptFunction reduceFn) {
Object[] buffer = new Object[256];
Object[] params = new Object[256];
int num = 0;
int paramsBase = 0;
while( (num = i.pullAvailable(buffer, true)) != -1) {
int remaining = 256 - paramsBase;
int numToCopy = (remaining > num) ? num : remaining;
System.arraycopy(buffer, 0, params, paramsBase, numToCopy);
paramsBase += numToCopy;
if (paramsBase == 256) {
params[0] = reduceFn.call(params);
paramsBase = 1;
}
}
return reduceFn.call(Arrays.copyOf(params, paramsBase));
}
private static class OptionsIter extends WrappingIter {
int offset, limit;
ScriptFunction map, filter;
OptionsIter(Iter subIter, int offset, int limit,
ScriptFunction map, ScriptFunction filter) {
super(subIter);
this.offset = offset;
this.limit = limit;
this.map = map;
this.filter = filter;
}
public int pullAvailable(Object[] buffer, boolean block) {
int ret = subIter.pullAvailable(buffer, block);
if (ret <= 0) return ret;
while(offset > 0) {
if (ret > offset) {
ret -= offset;
offset = 0;
System.arraycopy(buffer, offset, buffer, 0, ret);
break;
}
offset -= ret;
ret = subIter.pullAvailable(buffer, block);
}
if (ret > limit) {
ret = limit;
limit = 0;
} else {
limit -= ret;
}
if (map != null) {
Object[] params = new Object[]{null};
for(int i=0; i<ret; i++) {
params[0] = buffer[i];
buffer[i] = map.call(params);
}
return ret;
}
if (filter != null) {
int toIndex = 0;
Object[] params = new Object[]{null};
for(int fromIndex=0; fromIndex < ret; fromIndex++) {
if (((Boolean)filter.call(params)).booleanValue()) {
buffer[toIndex++] = buffer[fromIndex];
}
}
ret = toIndex;
}
return ret;
}
}
/**
* Handles "map", "filter", "reduce", "offset", and "limit"
* @param i
* @param options
* @return
*/
public static Iter applyPostprecessingOptions(Iter i, Map<String,Object> options) {
if (options == null) return i;
int offset = 0;
int limit = Integer.MAX_VALUE;
ScriptFunction mapFn = (ScriptFunction) options.get("map");
ScriptFunction filterFn = (ScriptFunction) options.get("filter");
i = new OptionsIter(i, offset, limit, mapFn, filterFn);
ScriptFunction reduceFn = (ScriptFunction) options.get("reduce");
if (reduceFn != null) {
Object result = reduceIter(i, reduceFn);
i = new LiteralIter(new Object[]{result});
}
return i;
}
public static final String OP_AND = "AND";
public static final String OP_OR = "OR";
public static final String OP_NOT = "NOT";
public static final String OP_EQ = "=";
public static final String OP_NE = "!=";
public static final String OP_LT = "<";
public static final String OP_GT = ">";
public static final String OP_LE = "<=";
public static final String OP_GE = ">=";
public static final String OP_RG = "BETWEEN";
public static final String OP_RG_II = "BETWEEN_II";
public static final String OP_RG_IE = "BETWEEN_IE";
public static final String OP_RG_EI = "BETWEEN_EI";
public static final String OP_RG_EE = "BETWEEN_EE";
/*
* Got the hash code literals from jython like so:
* >>> import java
* >>> for s in ['AND','OR','NOT','=','!=','<','>','<=','>=','BETWEEN','BETWEEN_II','BETWEEN_IE','BETWEEN_EI','BETWEEN_EE']: print java.lang.String(s).hashCode()
* ...
*/
public static final int OP_HASH_AND = 64951;
public static final int OP_HASH_OR = 2531;
public static final int OP_HASH_NOT = 77491;
public static final int OP_HASH_EQ = 61;
public static final int OP_HASH_NE = 1084;
public static final int OP_HASH_LT = 60;
public static final int OP_HASH_GT = 62;
public static final int OP_HASH_LE = 1921;
public static final int OP_HASH_GE = 1983;
public static final int OP_HASH_BETWEEN = 501348328;
public static final int OP_HASH_BETWEEN_II = 2066844887;
public static final int OP_HASH_BETWEEN_IE = 2066844883;
public static final int OP_HASH_BETWEEN_EI = 2066844763;
public static final int OP_HASH_BETWEEN_EE = 2066844759;
// map,reduce,sort,offset,limit,bookmark,diversity
public static Iter mergeItersUsingOptions(Map<String, Object> options, List<Iter> subIters) {
return new MergeIter(subIters);
}
public static QueryNode applyPatternToBagIndex(Object pattern, BagIndex index, TermEncoder encoder, long revNum) throws UnencodableValueException {
if (pattern instanceof Map) {
Map map = (Map)pattern;
for(Map.Entry entry : map.entrySet()) {
String key = (String)entry.getKey();
Object value = entry.getValue();
QueryNode node = applyQueryToBagIndex()
}
} else if (pattern instanceof List) {
}
}
public static QueryNode applyQueryToBagIndex(List query, BagIndex index, TermEncoder encoder, long revNum) throws UnencodableValueException {
if (query == null) {
return index.getRange(ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, ArrayBytes.EMPTY_BYTES, false, false, revNum);
}
int opHash = query.get(0).hashCode();
int numArgs = query.size();
String fieldName;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_OR:
case CopyOfDbServiceUtilDelme.OP_HASH_AND:
ArrayList<QueryNode> clauses = new ArrayList<QueryNode>(numArgs-1);
for(int i=0; i<numArgs; i++) {
QueryNode node=applyQueryToBagIndex((List)query.get(i+1), index, encoder, revNum);
if (node != null) clauses.add(node);
}
if (opHash == CopyOfDbServiceUtilDelme.OP_HASH_OR) {
return new OrderedOrQueryNode(clauses, null, null, null, false, false);
} else {
return AndQueryNode.make(clauses);
}
case CopyOfDbServiceUtilDelme.OP_HASH_NOT:
throw new RuntimeException("Not supported");
// QueryNode inner = applyQueryToBagIndex((List)query.get(1), index, encoder, revNum);
// return ScannerUtil.inverseScanner(inner);
case CopyOfDbServiceUtilDelme.OP_HASH_EQ:
case CopyOfDbServiceUtilDelme.OP_HASH_NE:
fieldName = (String)query.get(1);
byte[][] terms = new byte[numArgs-2][];
for(int i=2; i<numArgs; i++) {
terms[i-2] = TermEncoderUtil.encodeWholeTerm(encoder, fieldName, query.get(i));
}
return index.getAllTerms(terms, revNum);
}
fieldName = (String)query.get(1);
byte[] term1 = null;
byte[] term2 = null;
boolean isExclusive1 = false;
boolean isExclusive2 = false;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_GT:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_GE:
term1 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_LT:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_LE:
term2 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EE:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EI:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN:
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_II:
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_IE:
isExclusive2 = true;
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
default:
throw new ApiException("QueryFormatError", "Unknown query operator: \""+query.get(0)+"\"");
}
return index.fetchRange(term1, term2, isExclusive1, isExclusive2, revNum);
}
public static boolean matchesQuery(Map<String, Object> entry, List query) {
int opHash = query.get(0).hashCode();
int numArgs = query.size();
switch(opHash) {
case OP_HASH_AND:
for(int i=1; i<numArgs; i++) {
if (!matchesQuery(entry, (List)query.get(i))) return false;
}
return true;
case OP_HASH_EQ:
// need to ensure any index time magic has already been performed.
// (should be buffer data more like an in-memory BagIndex?)
}
throw new RuntimeException("Invalid operation "+query.get(0));
}
public static Scanner OLDapplyQueryToBagIndex(List query, BagIndex index, TermEncoder encoder, long revNum) throws UnencodableValueException {
if (query == null) {
return ScannerUtil.andNotScanner(index.fetchAll(revNum), index.fetchDeletions(revNum));
}
int opHash = query.get(0).hashCode();
int numArgs = query.size();
String fieldName;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_OR:
case CopyOfDbServiceUtilDelme.OP_HASH_AND:
Scanner[] clauses = new Scanner[numArgs-1];
for(int i=0; i<clauses.length; i++) {
clauses[i] = applyQueryToBagIndex((List)query.get(i+1), index, encoder, revNum);
}
if (opHash == CopyOfDbServiceUtilDelme.OP_HASH_OR) {
ScannerUtil.disjunctiveScanner(clauses);
} else {
ScannerUtil.conjunctiveScanner(clauses);
}
case CopyOfDbServiceUtilDelme.OP_HASH_NOT:
Scanner inner = applyQueryToBagIndex((List)query.get(1), index, encoder, revNum);
return ScannerUtil.inverseScanner(inner);
case CopyOfDbServiceUtilDelme.OP_HASH_EQ:
case CopyOfDbServiceUtilDelme.OP_HASH_NE:
fieldName = (String)query.get(1);
byte[][] terms = new byte[numArgs-2][];
for(int i=2; i<numArgs; i++) {
terms[i-2] = TermEncoderUtil.encodeWholeTerm(encoder, fieldName, query.get(i));
}
return BagIndexUtil.disjunctiveScannerFromTerms(terms, index, revNum);
}
fieldName = (String)query.get(1);
byte[] term1 = null;
byte[] term2 = null;
boolean isExclusive1 = false;
boolean isExclusive2 = false;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_GT:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_GE:
term1 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_LT:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_LE:
term2 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EE:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EI:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN:
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_II:
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_IE:
isExclusive2 = true;
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
default:
throw new ApiException("QueryFormatError", "Unknown query operator: \""+query.get(0)+"\"");
}
return index.fetchRange(term1, term2, isExclusive1, isExclusive2, revNum);
}
// last parameter is scoring (and is optional)
// default atomic constraint score->1, "AND"/"OR"->sum (even distribution)
// bounds propagation:
// (bound)->min->(bound to every clause)
// (bound)->sum->(bound to every clause according to bound-(sum of other clause weights))
// (bound)->max->(no additional bounding)
// Note that you can use bounds not only to drop entire posting lists but to find occurance combinations which are not productive
// or to think of it differently, the logic for traversing an "AND"/"OR" clause could take the score under consideration and artifically bump the doc_id before checking every clause
// grouping is done elsewhere
// primitives:
// RANGE X Y X_EXCL Y_EXCL
// EXCLUSIVE_MIN X
// EXCLUSIVE_MAX X
// MIN X
// MAX X
// NOT C
// Sorting is done first with double values, then with an optional user-specified comparator, then by ID
// These 3 values also constitute a bookmark
// ["AND", [
// ["HAS_OTHER_THAN", "name", ["phil"]], (default score is one)
// ["MIN", "age", 14, ["SCORE_LINEAR_INTERPOLATE",[14,30,70],[0.0,0.8,1.0]]]
// ], ["SCORE_SUM", 3, 4] ]
// Overreliance on JSON? Really, it's just a store that groups a bunch of term strings together to make an object:
// (has age/)
// (hasany type/person, type/place, type/org)
// (min age/ 12)
// (range <prefix> <min> <max>)
// or can i get away with finding some way to do a mapping?
// A value indicated by path (numeric keys indicate a list, string keys a dict)
// How do I indicate an empty list or dict? (I can't!)
// but then i can't range search multivalue fields, right? type/*/
// DDB core: dump sets of byte strings as documents into bags;
// query using range(fieldname, prefix, minSuffix(optional), maxSuffix(optional))
// complex query using some query syntax:
// ["AND", ["RANGE_MIN", "age", to_bin(14)], ["NOT", ["IN", "name", ["phil"]]]]
// fulltext handling: term/rockies
// What's the API?
// - ddb.insert(["",..]), ddb.update(<conjunctive term list>,["",..]), ddb.remove([<op>,<arg>,..]), ddb.query([<op>,<arg>,..])
// - low level: commit([docid1,..], [["",..],..])
// fetchRange(revNum, t1, t2, excl1, excl2, [k1,k2],[0.1,0.5]) During linear interpolation, all keys must be of the same length
//
// How does this integrate with scripting? Are there commit hooks?
// Scoring/sorting? reduce?
public static Scanner applyNgQueryToBagIndex(List query, BagIndex index, TermEncoder encoder, long revNum) throws UnencodableValueException {
if (query == null) {
return ScannerUtil.andNotScanner(index.fetchAll(revNum), index.fetchDeletions(revNum));
}
int opHash = query.get(0).hashCode();
int numArgs = query.size();
String fieldName;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_OR:
case CopyOfDbServiceUtilDelme.OP_HASH_AND:
Scanner[] clauses = new Scanner[numArgs-1];
for(int i=0; i<clauses.length; i++) {
clauses[i] = applyQueryToBagIndex((List)query.get(i+1), index, encoder, revNum);
}
if (opHash == CopyOfDbServiceUtilDelme.OP_HASH_OR) {
ScannerUtil.disjunctiveScanner(clauses);
} else {
ScannerUtil.conjunctiveScanner(clauses);
}
case CopyOfDbServiceUtilDelme.OP_HASH_NOT:
Scanner inner = applyQueryToBagIndex((List)query.get(1), index, encoder, revNum);
return ScannerUtil.inverseScanner(inner);
case CopyOfDbServiceUtilDelme.OP_HASH_EQ:
case CopyOfDbServiceUtilDelme.OP_HASH_NE:
fieldName = (String)query.get(1);
byte[][] terms = new byte[numArgs-2][];
for(int i=2; i<numArgs; i++) {
terms[i-2] = TermEncoderUtil.encodeWholeTerm(encoder, fieldName, query.get(i));
}
return BagIndexUtil.disjunctiveScannerFromTerms(terms, index, revNum);
}
fieldName = (String)query.get(1);
byte[] term1 = null;
byte[] term2 = null;
boolean isExclusive1 = false;
boolean isExclusive2 = false;
switch(opHash) {
case CopyOfDbServiceUtilDelme.OP_HASH_GT:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_GE:
term1 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_LT:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_LE:
term2 = encoder.encode(fieldName, query.get(2));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EE:
isExclusive2 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_EI:
isExclusive1 = true;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN:
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_II:
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
break;
case CopyOfDbServiceUtilDelme.OP_HASH_BETWEEN_IE:
isExclusive2 = true;
term1 = encoder.encode(fieldName, query.get(2));
term2 = encoder.encode(fieldName, query.get(3));
default:
throw new ApiException("QueryFormatError", "Unknown query operator: \""+query.get(0)+"\"");
}
return index.fetchRange(term1, term2, isExclusive1, isExclusive2, revNum);
}
}