}
try {
planFs = ((ExpressionOperator)op).getFieldSchema();
log.debug("planFs: " + planFs);
Schema userDefinedSchema = null;
if(null != mUserDefinedSchema) {
userDefinedSchema = mUserDefinedSchema.get(planCtr);
}
if(null != planFs) {
String outerCanonicalAlias = op.getAlias();
if(null == outerCanonicalAlias) {
outerCanonicalAlias = planFs.alias;
}
log.debug("Outer canonical alias: " + outerCanonicalAlias);
if(mFlatten.get(planCtr)) {
//need to extract the children and create the aliases
//assumption here is that flatten is only for one column
//i.e., flatten(A), flatten(A.x) and NOT
//flatten(B.(x,y,z))
Schema s = planFs.schema;
if(null != s && s.isTwoLevelAccessRequired()) {
// this is the case where the schema is that of
// a bag which has just one tuple fieldschema which
// in turn has a list of fieldschemas. The schema
// after flattening would consist of the fieldSchemas
// present in the tuple
// check that indeed we only have one field schema
// which is that of a tuple
if(s.getFields().size() != 1) {
int errCode = 1008;
String msg = "Expected a bag schema with a single " +
"element of type "+ DataType.findTypeName(DataType.TUPLE) +
" but got a bag schema with multiple elements.";
throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
}
Schema.FieldSchema tupleFS = s.getField(0);
if(tupleFS.type != DataType.TUPLE) {
int errCode = 1009;
String msg = "Expected a bag schema with a single " +
"element of type "+ DataType.findTypeName(DataType.TUPLE) +
" but got an element of type " +
DataType.findTypeName(tupleFS.type);
throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
}
s = tupleFS.schema;
}
if(null != s && s.size()!=0) {
for(int i = 0; i < s.size(); ++i) {
Schema.FieldSchema fs;
fs = Schema.FieldSchema.copyAndLink(s.getField(i), op);
log.debug("fs: " + fs);
if(null != userDefinedSchema) {
Schema.FieldSchema userDefinedFieldSchema;
try {
if(i < userDefinedSchema.size()) {
userDefinedFieldSchema = userDefinedSchema.getField(i);
fs = fs.mergePrefixFieldSchema(userDefinedFieldSchema);
}
} catch (SchemaMergeException sme) {
int errCode = 1016;
String msg = "Problems in merging user defined schema";
throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
}
outerCanonicalAlias = null;
}
String innerCanonicalAlias = fs.alias;
Schema.FieldSchema newFs;
if((null != outerCanonicalAlias) && (null != innerCanonicalAlias)) {
String disambiguatorAlias = outerCanonicalAlias + "::" + innerCanonicalAlias;
newFs = new Schema.FieldSchema(disambiguatorAlias, fs.schema, fs.type);
newFs.setParent(s.getField(i).canonicalName, op);
fss.add(newFs);
mSchemaPlanMapping.add(plan);
updateAliasCount(aliases, disambiguatorAlias);
//it's fine if there are duplicates
//we just need to record if its due to
//flattening
} else {
newFs = new Schema.FieldSchema(fs);
newFs.setParent(s.getField(i).canonicalName, op);
fss.add(newFs);
mSchemaPlanMapping.add(plan);
}
updateAliasCount(aliases, innerCanonicalAlias);
flattenAlias.put(newFs, innerCanonicalAlias);
inverseFlattenAlias.put(innerCanonicalAlias, true);
}
} else {
Schema.FieldSchema newFs;
if(null != userDefinedSchema) {
if(!DataType.isSchemaType(planFs.type)) {
if(userDefinedSchema.size() > 1) {
int errCode = 1017;
String msg = "Schema mismatch. A basic type on flattening cannot have more than one column. User defined schema: " + userDefinedSchema;
throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
}
newFs = new Schema.FieldSchema(null, planFs.type);
try {
newFs = newFs.mergePrefixFieldSchema(userDefinedSchema.getField(0));
} catch (SchemaMergeException sme) {
int errCode = 1016;
String msg = "Problems in merging user defined schema";
throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
}
updateAliasCount(aliases, newFs.alias);
fss.add(newFs);
mSchemaPlanMapping.add(plan);
newFs.setParent(planFs.canonicalName, op);
} else {
for(Schema.FieldSchema ufs: userDefinedSchema.getFields()) {
Schema.FieldSchema.setFieldSchemaDefaultType(ufs, DataType.BYTEARRAY);
newFs = new Schema.FieldSchema(ufs);
fss.add(newFs);
mSchemaPlanMapping.add(plan);
newFs.setParent(null, op);
updateAliasCount(aliases, ufs.alias);
}
}
} else {
if(!DataType.isSchemaType(planFs.type)) {
newFs = new Schema.FieldSchema(planFs.alias, planFs.type);
} else {
newFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
}
fss.add(newFs);
mSchemaPlanMapping.add(plan);
newFs.setParent( planFs.canonicalName, op );
}
}
} else {
//just populate the schema with the field schema of the expression operator
//check if the user has defined a schema for the operator; compare the schema
//with that of the expression operator field schema and then add it to the list
Schema.FieldSchema newFs = Schema.FieldSchema.copyAndLink(planFs, op);
if(null != userDefinedSchema) {
try {
newFs = newFs.mergePrefixFieldSchema(userDefinedSchema.getField(0));
updateAliasCount(aliases, newFs.alias);
} catch (SchemaMergeException sme) {
int errCode = 1016;
String msg = "Problems in merging user defined schema";
throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
}
}
newFs.setParent(planFs.canonicalName, op);
fss.add(newFs);
mSchemaPlanMapping.add(plan);
}
} else {
//did not get a valid list of field schemas
String outerCanonicalAlias = null;
if(null != userDefinedSchema) {
Schema.FieldSchema userDefinedFieldSchema = new Schema.FieldSchema(userDefinedSchema.getField(0));
fss.add(userDefinedFieldSchema);
mSchemaPlanMapping.add(plan);
userDefinedFieldSchema.setParent(null, op);
updateAliasCount(aliases, userDefinedFieldSchema.alias);
} else {
mSchema = null;
mIsSchemaComputed = true;
return mSchema;
}
}
} catch (FrontendException fee) {
mSchema = null;
mIsSchemaComputed = false;
throw fee;
}
}
//check for duplicate column names and throw an error if there are duplicates
//ensure that flatten gets rid of duplicate column names when the checks are
//being done
log.debug(" flattenAlias: " + flattenAlias);
log.debug(" inverseFlattenAlias: " + inverseFlattenAlias);
log.debug(" aliases: " + aliases);
log.debug(" fss.size: " + fss.size());
boolean duplicates = false;
Map<String, Integer> duplicateAliases = new HashMap<String, Integer>();
for(Map.Entry<String, Integer> e: aliases.entrySet()) {
Integer count = e.getValue();
if(count > 1) {//not checking for null here as counts are intitalized to 1
Boolean inFlatten = false;
log.debug("inFlatten: " + inFlatten + " inverseFlattenAlias: " + inverseFlattenAlias);
inFlatten = inverseFlattenAlias.get(e.getKey());
log.debug("inFlatten: " + inFlatten + " inverseFlattenAlias: " + inverseFlattenAlias);
if((null == inFlatten) || (!inFlatten)) {
duplicates = true;
duplicateAliases.put(e.getKey(), count);
}
}
}
if(duplicates) {
String errMessage;
StringBuffer sb = new StringBuffer("Found duplicates in schema. ");
if(duplicateAliases.size() > 0) {
Set<Map.Entry<String, Integer>> es = duplicateAliases.entrySet();
Iterator<Map.Entry<String, Integer>> iter = es.iterator();
Map.Entry<String, Integer> e = iter.next();
sb.append(": ");
sb.append(e.getValue());
sb.append(" columns");
while(iter.hasNext()) {
e = iter.next();
sb.append(", ");
sb.append(e.getKey());
sb.append(": ");
sb.append(e.getValue());
sb.append(" columns");
}
}
sb.append(". Please alias the columns with unique names.");
errMessage = sb.toString();
log.debug(errMessage);
int errCode = 1007;
throw new FrontendException(errMessage, errCode, PigException.INPUT, false, null);
}
mSchema = new Schema(fss);
//add the aliases that are unique after flattening
for(int i=0;i<mSchema.getFields().size();i++) {
Schema.FieldSchema fs = mSchema.getFields().get(i);
String alias = flattenAlias.get(fs);
Integer count = aliases.get(alias);