Pair<Map<Integer,Set<String>>,Set<Integer>> required =
requiredItems.get(load);
RequiredFieldList requiredFields = new RequiredFieldList();
LogicalSchema s = load.getSchema();
for (int i=0;i<s.size();i++) {
RequiredField requiredField = null;
// As we have done processing ahead, we assume that
// a column is not present in both ColumnPruner and
// MapPruner
if( required.first != null && required.first.containsKey(i) ) {
requiredField = new RequiredField();
requiredField.setIndex(i);
requiredField.setType(s.getField(i).type);
List<RequiredField> subFields = new ArrayList<RequiredField>();
for( String key : required.first.get(i) ) {
RequiredField subField = new RequiredField(key,-1,null,DataType.BYTEARRAY);
subFields.add(subField);
}
requiredField.setSubFields(subFields);
requiredFields.add(requiredField);
}
if( required.second != null && required.second.contains(i) ) {
requiredField = new RequiredField();
requiredField.setIndex(i);
requiredField.setType(s.getField(i).type);
requiredFields.add(requiredField);
}
}
log.info("Loader for " + load.getAlias() + " is pruned. Load fields " + requiredFields);
for(RequiredField rf: requiredFields.getFields()) {
List<RequiredField> sub = rf.getSubFields();
if (sub != null) {
// log.info("For column " + rf.getIndex() + ", set map keys: " + sub.toString());
log.info("Map key required for " + load.getAlias() + ": $" + rf.getIndex() + "->" + sub);
}
}
LoadPushDown.RequiredFieldResponse response = null;
try {
LoadFunc loadFunc = load.getLoadFunc();
if (loadFunc instanceof LoadPushDown) {
response = ((LoadPushDown)loadFunc).pushProjection(requiredFields);
}
} catch (FrontendException e) {
log.warn("pushProjection on "+load+" throw an exception, skip it");
}
// Loader does not support column pruning, insert foreach
if (columnPrune) {
if (response==null || !response.getRequiredFieldResponse()) {
LogicalPlan p = (LogicalPlan)load.getPlan();
Operator next = p.getSuccessors(load).get(0);
// if there is already a LOForEach after load, we don't need to
// add another LOForEach
if (next instanceof LOForEach) {
return;
}
LOForEach foreach = new LOForEach(load.getPlan());
// add foreach to the base plan
p.add(foreach);
Pair<Integer,Integer> disconnectedPos = p.disconnect(load, next);
p.connect(load, disconnectedPos.first.intValue(), foreach, 0 );
p.connect(foreach, 0, next, disconnectedPos.second.intValue());
// add foreach to the subplan
subPlan.add(foreach);
LogicalPlan innerPlan = new LogicalPlan();
foreach.setInnerPlan(innerPlan);
// build foreach inner plan
List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();
LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[requiredFields.getFields().size()]);
innerPlan.add(gen);
for (int i=0; i<requiredFields.getFields().size(); i++) {
LoadPushDown.RequiredField rf = requiredFields.getFields().get(i);
LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex());
innerLoad.getProjection().setUid(foreach);
innerPlan.add(innerLoad);
innerPlan.connect(innerLoad, gen);
LogicalExpressionPlan exp = new LogicalExpressionPlan();
ProjectExpression prj = new ProjectExpression(exp, rf.getType(), i, 0);
prj.setUid(gen);
exp.add(prj);
exps.add(exp);
}
} else {
// columns are pruned, reset schema for LOLoader
LogicalSchema newSchema = new LogicalSchema();
List<LoadPushDown.RequiredField> fieldList = requiredFields.getFields();
for (int i=0; i<fieldList.size(); i++) {
newSchema.addField(s.getField(fieldList.get(i).getIndex()));
}
load.setScriptSchema(newSchema);
}
}