* compute the multimaps <group_column_number, alias> and <group_column_number, operator>
* and <alias, expression_operator>
* Also set the lookup table for each alias to false
*/
Schema groupBySchema = null;
List<Schema.FieldSchema> groupByFss = new ArrayList<Schema.FieldSchema>();
Map<String, Boolean> aliasLookup = new HashMap<String, Boolean>();
MultiMap<String, ExpressionOperator> aliasExop = new MultiMap<String, ExpressionOperator>();
MultiMap<Integer, String> positionAlias = new MultiMap<Integer, String>();
MultiMap<Integer, ExpressionOperator> positionOperators = new MultiMap<Integer, ExpressionOperator>();
for (LogicalOperator op : inputs) {
int position = 0;
for(LogicalPlan plan: mGroupByPlans.get(op)) {
for(LogicalOperator eOp: plan.getLeaves()) {
Schema.FieldSchema fs = ((ExpressionOperator)eOp).getFieldSchema();
if (null != fs) {
String alias = fs.alias;
if(null != alias) {
aliasLookup.put(alias, false);
aliasExop.put(alias, (ExpressionOperator)eOp);
positionAlias.put(position, alias);
}
//store the operators for each position in the group
} else {
log.warn("Field Schema of an expression operator cannot be null");
}
positionOperators.put(position, (ExpressionOperator)eOp);
}
++position;
}
}
/*
* Now that the multi maps and the look up table are computed, do the following:
* for each column in the group, in order check if the alias is alaready used or not
* If the alias is already used, check for the next unused alias.
* IF none of the aliases can be used then the alias of that column is null
* If an alias is found usable, then use that alias and the schema of the expression operator
* corresponding to that position. Note that the first operator for that position is
* picked. The type checker will ensure that the correct schema is merged
*/
int arity = mGroupByPlans.get(inputs.get(0)).size();
for (int i = 0; i < arity; ++i) {
Schema.FieldSchema groupByFs;
Collection<String> cAliases = positionAlias.get(i);
if(null != cAliases) {
Object[] aliases = cAliases.toArray();
for(int j = 0; j < aliases.length; ++j) {
String alias = (String) aliases[j];
if(null != alias) {
//Collection<ExpressionOperator> cEops = aliasExop.get(alias);
Collection<ExpressionOperator> cEops = positionOperators.get(i);
if(null != cEops) {
ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0];
if(null != eOp) {
if(!aliasLookup.get(alias)) {
Schema.FieldSchema fs = eOp.getFieldSchema();
if(null != fs) {
groupByFs = new Schema.FieldSchema(alias, fs.schema, fs.type);
groupByFss.add(groupByFs);
aliasLookup.put(alias, true);
} else {
groupByFs = new Schema.FieldSchema(alias, null, DataType.BYTEARRAY);
groupByFss.add(groupByFs);
}
setFieldSchemaParent(groupByFs, positionOperators, i);
break;
} else {
if((j + 1) < aliases.length) {
continue;
} else {
//we have seen this alias before
//just add the schema of the expression operator with the null alias
Schema.FieldSchema fs = eOp.getFieldSchema();
if(null != fs) {
groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type);
groupByFss.add(groupByFs);
for(ExpressionOperator op: cEops) {
Schema.FieldSchema opFs = op.getFieldSchema();
if(null != opFs) {
groupByFs.setParent(opFs.canonicalName, eOp);
} else {
groupByFs.setParent(null, eOp);
}
}
} else {
groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY);
groupByFss.add(groupByFs);
}
setFieldSchemaParent(groupByFs, positionOperators, i);
break;
}
}
} else {
//should not be here
log.debug("Cannot be here: we cannot have a collection of null expression operators");
}
} else {
//should not be here
log.debug("Cannot be here: we should have an expression operator at each position");
}
} else {
//should not be here
log.debug("Cannot be here: we cannot have a collection of null aliases ");
}
}
} else {
//We do not have any alias for this position in the group by columns
//We have positions $1, $2, etc.
Collection<ExpressionOperator> cEops = positionOperators.get(i);
if(null != cEops) {
ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0];
if(null != eOp) {
Schema.FieldSchema fs = eOp.getFieldSchema();
if(null != fs) {
groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type);
groupByFss.add(groupByFs);
} else {
groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY);
groupByFss.add(groupByFs);
}
} else {
groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
groupByFss.add(groupByFs);
}
} else {
groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
groupByFss.add(groupByFs);
}
setFieldSchemaParent(groupByFs, positionOperators, i);
}
}
groupBySchema = new Schema(groupByFss);
if(1 == arity) {
byte groupByType = getAtomicGroupByType();
Schema groupSchema = groupByFss.get(0).schema;
Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", groupSchema, groupByType);
setFieldSchemaParent(groupByFs, positionOperators, 0);
fss.add(groupByFs);
} else {
Schema mergedGroupSchema = getTupleGroupBySchema();
if(mergedGroupSchema.size() != groupBySchema.size()) {
mSchema = null;
mIsSchemaComputed = false;
int errCode = 2000;
String msg = "Internal error. Mismatch in group by arities. Expected: " + mergedGroupSchema + ". Found: " + groupBySchema;
throw new FrontendException(msg, errCode, PigException.BUG, false, null);
} else {
for(int i = 0; i < mergedGroupSchema.size(); ++i) {
Schema.FieldSchema mergedFs = mergedGroupSchema.getField(i);
Schema.FieldSchema groupFs = groupBySchema.getField(i);
mergedFs.alias = groupFs.alias;
mergedGroupSchema.addAlias(mergedFs.alias, mergedFs);
}
}
Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", mergedGroupSchema);
fss.add(groupByFs);
for(int i = 0; i < arity; ++i) {
setFieldSchemaParent(groupByFs, positionOperators, i);
}
}
for (LogicalOperator op : inputs) {
try {
Schema.FieldSchema bagFs = new Schema.FieldSchema(op.getAlias(),
op.getSchema(), DataType.BAG);
fss.add(bagFs);
setFieldSchemaParent(bagFs, op);
} catch (FrontendException ioe) {
mIsSchemaComputed = false;
mSchema = null;
throw ioe;
}
}
mIsSchemaComputed = true;
mSchema = new Schema(fss);
mType = DataType.BAG;//mType is from the super class
}
return mSchema;
}