package sizzle.compiler;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.log4j.Logger;
import org.scannotation.AnnotationDB;
import org.scannotation.ClasspathUrlFinder;
import sizzle.aggregators.AggregatorSpec;
import sizzle.aggregators.IntSumAggregator;
import sizzle.functions.FunctionSpec;
import sizzle.parser.syntaxtree.Operand;
import sizzle.types.SizzleAny;
import sizzle.types.SizzleArray;
import sizzle.types.SizzleBool;
import sizzle.types.SizzleBytes;
import sizzle.types.SizzleFingerprint;
import sizzle.types.SizzleFloat;
import sizzle.types.SizzleFunction;
import sizzle.types.SizzleInt;
import sizzle.types.SizzleMap;
import sizzle.types.SizzleScalar;
import sizzle.types.SizzleString;
import sizzle.types.SizzleTable;
import sizzle.types.SizzleTime;
import sizzle.types.SizzleTuple;
import sizzle.types.SizzleType;
import sizzle.types.SizzleName;
import sizzle.types.SizzleVarargs;
public class SymbolTable {
private static Logger LOG = Logger.getLogger(SymbolTable.class);
private final boolean strictCompatibility;
private final ClassLoader loader;
private FunctionTrie functions;
private final HashMap<String, Class<?>> aggregators;
private final Map<Class<?>, SizzleType> protomap;
private final Map<String, SizzleType> idmap;
private Map<String, SizzleType> globals;
private Map<String, SizzleType> locals;
private String id;
private Operand operand;
public SymbolTable() throws IOException {
this(new ArrayList<URL>(), new SizzleString());
public SymbolTable(final List<URL> libs) throws IOException {
this(libs, new SizzleString());
public SymbolTable(final SizzleType input) throws IOException {
this(new ArrayList<URL>(), input);
public SymbolTable(final List<URL> libs, final SizzleType input) throws IOException {
this.strictCompatibility = true;
this.loader = Thread.currentThread().getContextClassLoader();
// this maps the Java types in protocol buffers into Sizzle types
this.protomap = new HashMap<Class<?>, SizzleType>();
this.protomap.put(int.class, new SizzleInt());
this.protomap.put(long.class, new SizzleInt());
this.protomap.put(float.class, new SizzleFloat());
this.protomap.put(double.class, new SizzleFloat());
this.protomap.put(boolean.class, new SizzleBool());
this.protomap.put(byte[].class, new SizzleBytes());
this.protomap.put(String.class, new SizzleString());
// this maps scalar Sizzle scalar types names to their classes
// TODO: do this via reflection
this.idmap = new HashMap<String, SizzleType>();
this.idmap.put("any", new SizzleAny());
this.idmap.put("none", null);
this.idmap.put("bool", new SizzleBool());
this.idmap.put("int", new SizzleInt());
this.idmap.put("float", new SizzleFloat());
this.idmap.put("time", new SizzleTime());
this.idmap.put("fingerprint", new SizzleFingerprint());
this.idmap.put("string", new SizzleString());
this.idmap.put("bytes", new SizzleBytes());
// does the same for arrays
// for (final String key : new HashSet<String>(this.idmap.keySet())) {
// final SizzleType value = this.idmap.get(key);
// if (value instanceof SizzleScalar)
// this.idmap.put("array of " + key, new SizzleArray((SizzleScalar)
// value));
// }
// variables with a global scope
this.globals = new HashMap<String, SizzleType>();
// set the type of the input
this.globals.put("input", input);
this.globals.put("true", new SizzleBool());
this.globals.put("false", new SizzleBool());
this.globals.put("PI", new SizzleFloat());
this.globals.put("Inf", new SizzleFloat());
this.globals.put("inf", new SizzleFloat());
this.globals.put("NaN", new SizzleFloat());
this.globals.put("nan", new SizzleFloat());
// variables with a local scope
this.locals = new HashMap<String, SizzleType>();
this.aggregators = new HashMap<String, Class<?>>();
this.functions = new FunctionTrie();
// these generic functions require more finagling than can currently be
// (easily) done with a static method, so they are handled with macros
this.setFunction("def", new SizzleFunction(new SizzleBool(), new SizzleType[] { new SizzleAny() }, "${0} != null"));
this.setFunction("len", new SizzleFunction(new SizzleInt(), new SizzleType[] { new SizzleArray(new SizzleScalar()) }, "${0}.length"));
this.setFunction("len", new SizzleFunction(new SizzleInt(), new SizzleType[] { new SizzleString() }, "${0}.length()"));
this.setFunction("len", new SizzleFunction(new SizzleInt(), new SizzleType[] { new SizzleBytes() }, "${0}.length"));
this.setFunction("len", new SizzleFunction(new SizzleInt(), new SizzleType[] { new SizzleMap(new SizzleScalar(), new SizzleScalar()) },
this.setFunction("haskey", new SizzleFunction(new SizzleBool(), new SizzleType[] { new SizzleMap(new SizzleScalar(), new SizzleScalar()),
new SizzleScalar() }, "${0}.containsKey(${1})"));
this.setFunction("keys", new SizzleFunction(new SizzleArray(new SizzleScalar()), new SizzleType[] { new SizzleMap(new SizzleScalar(),
new SizzleScalar()) }, "${0}.keySet().toArray()"));
this.setFunction("lookup", new SizzleFunction(new SizzleScalar(), new SizzleType[] { new SizzleMap(new SizzleScalar(), new SizzleScalar()),
new SizzleScalar(), new SizzleScalar() }, "(${0}.containsKey(${1}) ? ${0}.get(${1}) : ${2})"));
this.setFunction("regex", new SizzleFunction(new SizzleString(), new SizzleType[] { new SizzleName(new SizzleScalar()), new SizzleInt() },
"sizzle.functions.SizzleSpecialIntrinsics.regex(\"${0}\", ${1})"));
this.setFunction("regex", new SizzleFunction(new SizzleString(), new SizzleType[] { new SizzleName(new SizzleScalar()) },
// these fingerprints are identity functions
this.setFunction("fingerprintof", new SizzleFunction(new SizzleFingerprint(), new SizzleScalar[] { new SizzleInt() }));
this.setFunction("fingerprintof", new SizzleFunction(new SizzleFingerprint(), new SizzleScalar[] { new SizzleTime() }));
/* expose all the casting constructors to Sawzall */
// string to bool
new SizzleFunction("sizzle.functions.SizzleCasts.stringToBoolean", new SizzleBool(), new SizzleScalar[] { new SizzleString() }));
// bool to int
this.setFunction("int", new SizzleFunction("sizzle.functions.SizzleCasts.booleanToLong", new SizzleInt(), new SizzleScalar[] { new SizzleBool() }));
// float to int
this.setFunction("int", new SizzleFunction(new SizzleInt(), new SizzleScalar[] { new SizzleFloat() }, "(long)${0}"));
// time to int
this.setFunction("int", new SizzleFunction(new SizzleInt(), new SizzleScalar[] { new SizzleTime() }));
// fingerprint to int
this.setFunction("int", new SizzleFunction(new SizzleInt(), new SizzleScalar[] { new SizzleFingerprint() }));
// string to int
this.setFunction("int", new SizzleFunction("java.lang.Long.decode", new SizzleInt(), new SizzleScalar[] { new SizzleString() }));
// string to int with param base
this.setFunction("int", new SizzleFunction(new SizzleInt(), new SizzleScalar[] { new SizzleString(), new SizzleInt() },
"java.lang.Long.parseLong(${0}, (int)${1})"));
// bytes to int with param encoding format
this.setFunction("int", new SizzleFunction("sizzle.functions.SizzleCasts.bytesToLong", new SizzleInt(), new SizzleScalar[] { new SizzleBytes(),
new SizzleString() }));
// int to float
this.setFunction("float", new SizzleFunction(new SizzleFloat(), new SizzleScalar[] { new SizzleInt() }, "(double)${0}"));
// string to float
this.setFunction("float", new SizzleFunction("java.lang.Double.parseDouble", new SizzleFloat(), new SizzleScalar[] { new SizzleString() }));
// int to time
this.setFunction("time", new SizzleFunction(new SizzleTime(), new SizzleScalar[] { new SizzleInt() }));
// string to time
this.setFunction("time", new SizzleFunction("sizzle.functions.SizzleCasts.stringToTime", new SizzleTime(), new SizzleScalar[] { new SizzleString() }));
// string to time
this.setFunction("time", new SizzleFunction("sizzle.functions.SizzleCasts.stringToTime", new SizzleTime(), new SizzleScalar[] { new SizzleString(),
new SizzleString() }));
// int to fingerprint
this.setFunction("fingerprint", new SizzleFunction(new SizzleFingerprint(), new SizzleScalar[] { new SizzleInt() }));
// string to fingerprint
this.setFunction("fingerprint", new SizzleFunction("java.lang.Long.parseLong", new SizzleInt(), new SizzleScalar[] { new SizzleString() }));
// string to fingerprint with param base
this.setFunction("fingerprint", new SizzleFunction("java.lang.Long.parseLong", new SizzleInt(), new SizzleScalar[] { new SizzleString(),
new SizzleInt() }));
// bytes to fingerprint
this.setFunction("fingerprint", new SizzleFunction("sizzle.functions.SizzleCasts.bytesToFingerprint", new SizzleFingerprint(),
new SizzleScalar[] { new SizzleBytes() }));
// bool to string
this.setFunction("string", new SizzleFunction("java.lang.Boolean.toString", new SizzleString(), new SizzleScalar[] { new SizzleBool() }));
// int to string
this.setFunction("string", new SizzleFunction("java.lang.Long.toString", new SizzleString(), new SizzleScalar[] { new SizzleInt() }));
// int to string with parameter base
this.setFunction("string", new SizzleFunction("sizzle.functions.SizzleCasts.longToString", new SizzleString(), new SizzleScalar[] { new SizzleInt(),
new SizzleInt() }));
// float to string
this.setFunction("string", new SizzleFunction("java.lang.Double.toString", new SizzleString(), new SizzleScalar[] { new SizzleFloat() }));
// time to string
this.setFunction("string", new SizzleFunction("sizzle.functions.SizzleCasts.timeToString", new SizzleString(), new SizzleScalar[] { new SizzleTime() }));
// fingerprint to string
this.setFunction("string", new SizzleFunction("java.lang.Long.toHexString", new SizzleString(), new SizzleScalar[] { new SizzleFingerprint() }));
// bytes to string
this.setFunction("string", new SizzleFunction("new java.lang.String", new SizzleString(), new SizzleScalar[] { new SizzleBytes() }));
// bytes to string
this.setFunction("string", new SizzleFunction("new java.lang.String", new SizzleString(), new SizzleScalar[] { new SizzleBytes(), new SizzleString() }));
// int to bytes with param encoding format
this.setFunction("bytes", new SizzleFunction("sizzle.functions.SizzleCasts.longToBytes", new SizzleInt(), new SizzleScalar[] { new SizzleInt(),
new SizzleString() }));
// fingerprint to bytes
this.setFunction("bytes", new SizzleFunction("sizzle.functions.SizzleCasts.fingerprintToBytes", new SizzleBytes(),
new SizzleScalar[] { new SizzleFingerprint() }));
// string to bytes
new SizzleFunction("sizzle.functions.SizzleCasts.stringToBytes", new SizzleBytes(), new SizzleScalar[] { new SizzleString() }));
/* expose the java.lang.Math class to Sawzall */
this.setFunction("highbit", new SizzleFunction("java.lang.Long.highestOneBit", new SizzleInt(), new SizzleScalar[] { new SizzleInt() }));
// abs just needs to be overloaded
this.setFunction("abs", new SizzleFunction("java.lang.Math.abs", new SizzleFloat(), new SizzleScalar[] { new SizzleInt() }));
this.setFunction("abs", new SizzleFunction("java.lang.Math.abs", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// abs is also named fabs in Sawzall
this.setFunction("fabs", new SizzleFunction("java.lang.Math.abs", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// log is named ln in Sawzall
this.setFunction("ln", new SizzleFunction("java.lang.Math.log", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// expose the rest of the unary functions
for (final String s : Arrays.asList("log10", "exp", "sqrt", "sin", "cos", "tan", "asin", "acos", "atan", "cosh", "sinh", "tanh", "ceil", "floor",
this.setFunction(s, new SizzleFunction("java.lang.Math." + s, new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// expose the binary functions
for (final String s : Arrays.asList("pow", "atan2"))
this.setFunction(s, new SizzleFunction("java.lang.Math." + s, new SizzleFloat(), new SizzleScalar[] { new SizzleFloat(), new SizzleFloat() }));
for (final String s : Arrays.asList("max", "min"))
for (final SizzleScalar t : Arrays.asList(new SizzleInt(), new SizzleFloat()))
this.setFunction(s, new SizzleFunction("java.lang.Math." + s, new SizzleFloat(), new SizzleScalar[] { t, t }));
this.setFunction("max", new SizzleFunction(new SizzleFloat(), new SizzleScalar[] { new SizzleTime(), new SizzleTime() }, "(${0} > ${1} ? ${0} : ${1})"));
this.setFunction("min", new SizzleFunction(new SizzleFloat(), new SizzleScalar[] { new SizzleTime(), new SizzleTime() }, "(${0} < ${1} ? ${0} : ${1})"));
this.setFunction("max", new SizzleFunction(new SizzleFloat(), new SizzleScalar[] { new SizzleString(), new SizzleString() },
"(${0}.compareTo(${1}) > 0 ? ${0} : ${1})"));
this.setFunction("min", new SizzleFunction(new SizzleFloat(), new SizzleScalar[] { new SizzleString(), new SizzleString() },
"(${0}.compareTo(${1}) < 0 ? ${0} : ${1})"));
// expose whatever is left, assuming we are not aiming for strict
// compatibility
if (!this.strictCompatibility) {
// random takes no argument
// these three have capitals in the name
this.setFunction("ieeeremainder", new SizzleFunction("java.lang.Math.IEEEremainder", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat(),
new SizzleFloat() }));
this.setFunction("todegrees", new SizzleFunction("java.lang.Math.toDegrees", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
this.setFunction("toradians", new SizzleFunction("java.lang.Math.toRadians", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// the unaries
for (final String s : Arrays.asList("cbrt", "expm1", "log1p", "rint", "signum", "ulp"))
this.setFunction(s, new SizzleFunction("java.lang.Math." + s, new SizzleFloat(), new SizzleScalar[] { new SizzleFloat() }));
// and binaries
new SizzleFunction("java.lang.Math.hypot", new SizzleFloat(), new SizzleScalar[] { new SizzleFloat(), new SizzleFloat() }));
// add in the default tables
// FIXME: support format strings and files
this.set("stdout", new SizzleTable(new SizzleString()));
this.set("stderr", new SizzleTable(new SizzleString()));
this.set("output", new SizzleTable(new SizzleBytes()));
public SymbolTable cloneNonLocals() throws IOException {
SymbolTable st;
try {
st = new SymbolTable();
} catch (final MalformedURLException e) {
throw new RuntimeException(e.getClass().getSimpleName() + " caught", e);
st.globals = this.globals;
st.functions = this.functions;
st.locals = new HashMap<String, SizzleType>(this.locals);
return st;
public void set(final String id, final SizzleType type) {
this.set(id, type, false);
public void set(final String id, final SizzleType type, final boolean global) {
if (this.idmap.containsKey(id))
throw new TypeException(id + " already declared as " + this.idmap.get(id));
if (global)
this.globals.put(id, type);
this.locals.put(id, type);
public boolean contains(final String id) {
return this.globals.containsKey(id) || this.locals.containsKey(id);
public SizzleType get(final String id) {
if (this.idmap.containsKey(id))
return new SizzleName(this.idmap.get(id));
if (this.globals.containsKey(id))
return this.globals.get(id);
if (this.locals.containsKey(id))
return this.locals.get(id);
throw new TypeException("no such identifier " + id);
public boolean hasType(final String id) {
return this.idmap.containsKey(id);
public SizzleType getType(final String id) {
if (this.idmap.containsKey(id))
return this.idmap.get(id);
if (id.startsWith("array of "))
return new SizzleArray(this.getType(id.substring("array of ".length())));
throw new TypeException("no such type " + id);
public void setType(final String id, final SizzleType sizzleType) {
this.idmap.put(id, sizzleType);
private void importAggregator(final Class<?> clazz) {
if (!clazz.isAnnotationPresent(AggregatorSpec.class))
final AggregatorSpec annotation = clazz.getAnnotation(AggregatorSpec.class);
final String type = annotation.type();
if (type.equals("any"))
this.aggregators.put(, clazz);
this.aggregators.put( + ":" + type, clazz);
private void importAggregator(final String c) {
try {
this.importAggregator(Class.forName(c, false, this.loader));
} catch (final ClassNotFoundException e) {
throw new TypeException("no such class " + c, e);
public Class<?> getAggregator(final String name, final SizzleScalar type) {
if (this.aggregators.containsKey(name + ":" + type))
return this.aggregators.get(name + ":" + type);
else if (this.aggregators.containsKey(name))
return this.aggregators.get(name);
throw new TypeException("no such aggregator " + name + " of " + type);
public List<Class<?>> getAggregators(final String name, final SizzleType type) {
final List<Class<?>> aggregators = new ArrayList<Class<?>>();
if (type instanceof SizzleTuple)
for (final SizzleType subType : ((SizzleTuple) type).getTypes())
aggregators.add(this.getAggregator(name, (SizzleScalar) subType));
aggregators.add(this.getAggregator(name, (SizzleScalar) type));
return aggregators;
private void importFunction(final Method m) {
final FunctionSpec annotation = m.getAnnotation(FunctionSpec.class);
final String[] formalParameters = annotation.formalParameters();
final SizzleType[] formalParameterTypes = new SizzleType[formalParameters.length];
for (int i = 0; i < formalParameters.length; i++) {
final String id = formalParameters[i];
// check for varargs
if (id.endsWith("..."))
formalParameterTypes[i] = new SizzleVarargs(this.getType(id.substring(0, id.indexOf('.'))));
formalParameterTypes[i] = this.getType(id);
for (final String dep : annotation.typeDependencies())
if (dep.endsWith(".proto"))
else if (dep.endsWith(".avro"))
throw new TypeException("unknown dependency in " + dep);
new SizzleFunction(m.getDeclaringClass().getCanonicalName() + '.' + m.getName(), this.getType(annotation.returnType()), formalParameterTypes));
private void importFunctions(final Class<?> c) {
for (final Method m : c.getMethods())
if (m.isAnnotationPresent(FunctionSpec.class))
private void importFunctions(final String c) {
try {
} catch (final ClassNotFoundException e) {
throw new TypeException("no such class " + c, e);
private void importLibs(final List<URL> urls) throws IOException {
final AnnotationDB db = new AnnotationDB();
// let's assume the entire runtime is in the same classpath entry as the
// int sum aggregator
for (final URL s : ClasspathUrlFinder.findClassPaths())
if (s.getPath().endsWith("/"))
for (final URL url : urls)
final Map<String, Set<String>> annotationIndex = db.getAnnotationIndex();
for (final String c : annotationIndex.get(AggregatorSpec.class.getCanonicalName()))
try {
} catch (final NoClassDefFoundError e) {
SymbolTable.LOG.error("unable to import aggregator " + c + ": " + e.getClass().getSimpleName() + " for " + e.getMessage());
for (final String c : annotationIndex.get(FunctionSpec.class.getCanonicalName())) {
void importProto(final String name) {
final String camelCased = SymbolTable.camelCase(name.substring(0, name.indexOf('.')));
Class<?> wrapper;
try {
wrapper = Class.forName("sizzle.types." + camelCased);
} catch (final ClassNotFoundException e) {
throw new TypeException("no such proto " + name);
for (final Class<?> c : wrapper.getClasses()) {
final List<SizzleType> members = new ArrayList<SizzleType>();
final Map<String, Integer> names = new HashMap<String, Integer>();
int i = 0;
for (final Field field : c.getDeclaredFields()) {
if (!field.getName().endsWith("_"))
final String member = SymbolTable.deCamelCase(field.getName().substring(0, field.getName().length() - 1));
final Class<?> type = field.getType();
names.put(member, i++);
this.idmap.put(c.getSimpleName(), new SizzleTuple(members, names));
// TODO support protocol buffer casts
private void importAvro(final String dep) {
throw new RuntimeException("unimplemented");
public SizzleFunction getFunction(final String id) {
return this.getFunction(id, new SizzleType[0]);
public SizzleFunction getFunction(final String id, final List<SizzleType> formalParameters) {
return this.getFunction(id, formalParameters.toArray(new SizzleType[formalParameters.size()]));
public SizzleFunction getFunction(final String id, final SizzleType[] formalParameters) {
final SizzleFunction func = this.functions.getFunction(id, formalParameters);
return func;
public void setFunction(final String id, final SizzleFunction sizzleFunction) {
this.functions.addFunction(id, sizzleFunction);
public boolean hasCast(final SizzleType from, final SizzleType to) {
try {
this.getFunction(to.toString(), new SizzleType[] { from });
return true;
} catch (final TypeException e) {
return false;
public SizzleFunction getCast(final SizzleType from, final SizzleType to) {
return this.getFunction(to.toString(), new SizzleType[] { from });
public void setId(final String id) { = id;
public String getId() {
public void setOperand(final Operand operand) {
this.operand = operand;
public Operand getOperand() {
return this.operand;
public String toString() {
final List<String> r = new ArrayList<String>();
for (final Entry<String, SizzleType> entry : this.locals.entrySet())
r.add(entry.getKey() + ":" + entry.getValue());
return r.toString();
private static String camelCase(final String string) {
final StringBuilder camelized = new StringBuilder();
boolean lower = false;
for (final char c : string.toCharArray())
if (c == '_')
lower = false;
else if (Character.isDigit(c)) {
lower = false;
} else if (Character.isLetter(c)) {
if (lower)
lower = true;
return camelized.toString();
private static String deCamelCase(final String string) {
final StringBuilder decamelized = new StringBuilder();
for (final char c : string.toCharArray())
if (Character.isUpperCase(c))
decamelized.append(Character.toString('_') + Character.toLowerCase(c));
return decamelized.toString();