try {
mergedSchema = loUnion.getSchema();
}catch(FrontendException e) {
String msg = "Error creating merged schemas for union-onschema operator : "
+ e.getMessage();
throw new UnionOnSchemaSetException(msg, 1116, PigException.INPUT, e);
}
//create a user defined schema list for use in LOForeach
// using merged schema
ArrayList<Schema> mergedSchemaList = new ArrayList<Schema>();
for(Schema.FieldSchema fs : mergedSchema.getFields()){
// Use NULL datatype because the type will be set by the TypeChecking
// visitors
mergedSchemaList.add(
new Schema(new Schema.FieldSchema(fs.alias, DataType.NULL))
);
}
// add a foreach for inputs that don't match mergedSchema, projecting
// null for columns that don't exist in the input
for(LogicalOperator lop : preds)
{
try {
if(! lop.getSchema().equals(mergedSchema))
{
//the mergedSchema is different from this operators schema
// so add a foreach to project columns appropriately
int mergeSchSz = mergedSchema.size();
ArrayList<LogicalPlan> generatePlans =
new ArrayList<LogicalPlan>(mergeSchSz);
ArrayList<Boolean> flattenList =
new ArrayList<Boolean>(mergeSchSz);
String scope = loUnion.getOperatorKey().getScope();
for(Schema.FieldSchema fs : mergedSchema.getFields()) {
LogicalPlan projectPlan = new LogicalPlan();
Schema inpSchema = lop.getSchema();
flattenList.add(Boolean.FALSE);
int inpPos = inpSchema.getPositionSubName(fs.alias);
LogicalOperator columnProj = null;
boolean isCastNeeded = false;
if(inpPos == -1){
//the column is not present in schema of this input,
// so project null
columnProj =
new LOConst(mPlan, getNextId(scope), null);
// cast is necessary if the type in schema is
// not a BYTEARRAY
if(fs.type != DataType.BYTEARRAY){
isCastNeeded = true;
}
}else {
//project the column from input
columnProj =
new LOProject(projectPlan,
new OperatorKey(
scope,
NodeIdGenerator.getGenerator().getNextNodeId(scope)
),
lop, inpPos
);
//cast is needed if types are different.
//compatibility of types has already been checked
//during creation of mergedSchema
Schema.FieldSchema inpFs = inpSchema.getFieldSubNameMatch(fs.alias);
if(inpFs.type != fs.type)
isCastNeeded = true;
}
projectPlan.add(columnProj);
//add a LOCast if necessary
if(isCastNeeded){
LOCast loCast = new LOCast(
projectPlan,
getNextId(scope),
fs.type
);
loCast.setFieldSchema(fs);
projectPlan.add(loCast);
projectPlan.connect(columnProj, loCast);
}
generatePlans.add(projectPlan);
}
LogicalOperator foreach = new LOForEach(
mPlan,
getNextId(scope),
generatePlans, flattenList,
mergedSchemaList
);
mPlan.add(foreach);
mPlan.insertBetween(lop, foreach, loUnion);
}
}
catch (FrontendException e) {
String msg = "Error adding union operator " + loUnion.getAlias()
+ ":" + e.getMessage();
UnionOnSchemaSetException pe = new UnionOnSchemaSetException(msg);
pe.initCause(e);
throw pe;
}
}