// Outer list corresponds to join predicates and inner list corresponds to type of keys for each predicate.
List<List<Byte>> keyTypes = new ArrayList<List<Byte>>();
for (int i=0; i<inputs.size(); i++) {
Operator op = inputs.get(i);
if( ! ( op instanceof LogicalRelationalOperator ) ) {
continue;
}
LogicalRelationalOperator lop = (LogicalRelationalOperator)op;
PhysicalOperator physOp = logToPhyMap.get(op);
inp.add(physOp);
List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) loj.getJoinPlan(i);
// Convert the expression plan into physical Plan
List<PhysicalPlan> exprPlans = translateExpressionPlans(loj, plans);
ppLists.add(exprPlans);
joinPlans.put(physOp, exprPlans);
// Key could potentially be a tuple. So, we visit all exprPlans to get types of members of tuples.
List<Byte> tupleKeyMemberTypes = new ArrayList<Byte>();
for(PhysicalPlan exprPlan : exprPlans)
tupleKeyMemberTypes.add(exprPlan.getLeaves().get(0).getResultType());
keyTypes.add(tupleKeyMemberTypes);
}
if (loj.getJoinType() == LOJoin.JOINTYPE.SKEWED) {
POSkewedJoin skj;
try {
skj = new POSkewedJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),loj.getRequestedParallelisam(),
inp, loj.getInnerFlags());
skj.setAlias(loj.getAlias());
skj.setJoinPlans(joinPlans);
}
catch (Exception e) {
int errCode = 2015;
String msg = "Skewed Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
skj.setResultType(DataType.TUPLE);
boolean[] innerFlags = loj.getInnerFlags();
for (int i=0; i < inputs.size(); i++) {
LogicalRelationalOperator op = (LogicalRelationalOperator) inputs.get(i);
if (!innerFlags[i]) {
try {
LogicalSchema s = op.getSchema();
// if the schema cannot be determined
if (s == null) {
throw new FrontendException();
}
skj.addSchema(translateSchema(s));
} catch (FrontendException e) {
int errCode = 2015;
String msg = "Couldn't set the schema for outer join" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
} else {
// This will never be retrieved. It just guarantees that the index will be valid when
// MRCompiler is trying to read the schema
skj.addSchema(null);
}
}
currentPlan.add(skj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), skj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, skj);
}
else if(loj.getJoinType() == LOJoin.JOINTYPE.REPLICATED) {
int fragment = 0;
POFRJoin pfrj;
try {
boolean []innerFlags = loj.getInnerFlags();
boolean isLeftOuter = false;
// We dont check for bounds issue as we assume that a join
// involves atleast two inputs
isLeftOuter = !innerFlags[1];
Tuple nullTuple = null;
if( isLeftOuter ) {
try {
// We know that in a Left outer join its only a two way
// join, so we assume index of 1 for the right input
LogicalSchema inputSchema = ((LogicalRelationalOperator)inputs.get(1)).getSchema();
// We check if we have a schema before the join
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + ((LogicalRelationalOperator) inputs.get(1)).getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
// Using the schema we decide the number of columns/fields
// in the nullTuple
nullTuple = TupleFactory.getInstance().newTuple(inputSchema.size());
for(int j = 0; j < inputSchema.size(); j++) {
nullTuple.set(j, null);
}
} catch( FrontendException e ) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
pfrj = new POFRJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),loj.getRequestedParallelisam(),
inp, ppLists, keyTypes, null, fragment, isLeftOuter, nullTuple);
pfrj.setAlias(loj.getAlias());
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
pfrj.setResultType(DataType.TUPLE);
currentPlan.add(pfrj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), pfrj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, pfrj);
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.MERGE && validateMergeJoin(loj)) {
POMergeJoin smj;
try {
smj = new POMergeJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),loj.getRequestedParallelisam(),inp,joinPlans,keyTypes);
}
catch (Exception e) {
int errCode = 2042;
String msg = "Merge Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
smj.setResultType(DataType.TUPLE);
currentPlan.add(smj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), smj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, smj);
return;
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), loj
.getRequestedParallelisam());
poGlobal.setAlias(loj.getAlias());
POPackage poPackage = new POPackage(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), loj.getRequestedParallelisam());
poPackage.setAlias(loj.getAlias());
currentPlan.add(poGlobal);
currentPlan.add(poPackage);
int count = 0;
Byte type = null;
try {
currentPlan.connect(poGlobal, poPackage);
for (int i=0; i<inputs.size(); i++) {
Operator op = inputs.get(i);
List<LogicalExpressionPlan> plans =
(List<LogicalExpressionPlan>) loj.getJoinPlan(i);
POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), loj
.getRequestedParallelisam());
List<PhysicalPlan> exprPlans = translateExpressionPlans(loj, plans);
// currentPlans.push(currentPlan);
// for (LogicalExpressionPlan lp : plans) {
// currentPlan = new PhysicalPlan();
// PlanWalker childWalker = currentWalker
// .spawnChildWalker(lp);
// pushWalker(childWalker);
// //currentWalker.walk(this);
// currentWalker.walk(
// new ExpToPhyTranslationVisitor(currentWalker.getPlan(),
// childWalker) );
// exprPlans.add(currentPlan);
// popWalker();
//
// }
// currentPlan = currentPlans.pop();
try {
physOp.setPlans(exprPlans);
} catch (PlanException pe) {
int errCode = 2071;
String msg = "Problem with setting up local rearrange's plans.";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, pe);
}
try {
physOp.setIndex(count++);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
if (plans.size() > 1) {
type = DataType.TUPLE;
physOp.setKeyType(type);
} else {
type = exprPlans.get(0).getLeaves().get(0).getResultType();
physOp.setKeyType(type);
}
physOp.setResultType(DataType.TUPLE);
currentPlan.add(physOp);
try {
currentPlan.connect(logToPhyMap.get(op), physOp);
currentPlan.connect(physOp, poGlobal);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
} catch (PlanException e1) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
}
poPackage.setKeyType(type);
poPackage.setResultType(DataType.TUPLE);
poPackage.setNumInps(count);
boolean[] innerFlags = loj.getInnerFlags();
poPackage.setInner(innerFlags);
List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
List<Boolean> flattenLst = new ArrayList<Boolean>();
try{
for(int i=0;i< count;i++){
PhysicalPlan fep1 = new PhysicalPlan();
POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
loj.getRequestedParallelisam(), i+1); //i+1 since the first column is the "group" field
feproj1.setAlias(loj.getAlias());
feproj1.setResultType(DataType.BAG);
feproj1.setOverloaded(false);
fep1.add(feproj1);
fePlans.add(fep1);
// the parser would have marked the side
// where we need to keep empty bags on
// non matched as outer (innerFlags[i] would be
// false)
if(!(innerFlags[i])) {
Operator joinInput = inputs.get(i);
// for outer join add a bincond
// which will project nulls when bag is
// empty
updateWithEmptyBagCheck(fep1, joinInput);
}