String alias = loj.getAlias();
int parallel = loj.getRequestedParallelisam();
for (int i=0;i<inputs.size();i++) {
Operator op = inputs.get(i);
PhysicalOperator physOp = logToPhyMap.get(op);
inp.add(physOp);
List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) loj.getJoinPlan(i);
List<PhysicalPlan> exprPlans = translateExpressionPlans(loj, plans);
ppLists.add(exprPlans);
joinPlans.put(physOp, exprPlans);
// Key could potentially be a tuple. So, we visit all exprPlans to get types of members of tuples.
List<Byte> tupleKeyMemberTypes = new ArrayList<Byte>();
for(PhysicalPlan exprPlan : exprPlans)
tupleKeyMemberTypes.add(exprPlan.getLeaves().get(0).getResultType());
keyTypes.add(tupleKeyMemberTypes);
}
if (loj.getJoinType() == LOJoin.JOINTYPE.SKEWED) {
POSkewedJoin skj;
try {
skj = new POSkewedJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),
parallel,inp, innerFlags);
skj.setAlias(alias);
skj.setJoinPlans(joinPlans);
}
catch (Exception e) {
int errCode = 2015;
String msg = "Skewed Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
skj.setResultType(DataType.TUPLE);
for (int i=0; i < inputs.size(); i++) {
Operator op = inputs.get(i);
if (!innerFlags[i]) {
try {
LogicalSchema s = ((LogicalRelationalOperator)op).getSchema();
// if the schema cannot be determined
if (s == null) {
throw new FrontendException("Cannot determine skewed join schema", 2247);
}
skj.addSchema(Util.translateSchema(s));
} catch (FrontendException e) {
int errCode = 2015;
String msg = "Couldn't set the schema for outer join" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
} else {
// This will never be retrieved. It just guarantees that the index will be valid when
// MRCompiler is trying to read the schema
skj.addSchema(null);
}
}
currentPlan.add(skj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), skj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, skj);
}
else if(loj.getJoinType() == LOJoin.JOINTYPE.REPLICATED) {
int fragment = 0;
POFRJoin pfrj;
try {
boolean isLeftOuter = false;
// We dont check for bounds issue as we assume that a join
// involves atleast two inputs
isLeftOuter = !innerFlags[1];
Tuple nullTuple = null;
if( isLeftOuter ) {
try {
// We know that in a Left outer join its only a two way
// join, so we assume index of 1 for the right input
LogicalSchema inputSchema = ((LogicalRelationalOperator)inputs.get(1)).getSchema();
// We check if we have a schema before the join
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + ((LogicalRelationalOperator)inputs.get(1)).getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
// Using the schema we decide the number of columns/fields
// in the nullTuple
nullTuple = TupleFactory.getInstance().newTuple(inputSchema.size());
for(int j = 0; j < inputSchema.size(); j++) {
nullTuple.set(j, null);
}
} catch( FrontendException e ) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
pfrj = new POFRJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),parallel,
inp, ppLists, keyTypes, null, fragment, isLeftOuter, nullTuple);
pfrj.setAlias(alias);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
pfrj.setResultType(DataType.TUPLE);
currentPlan.add(pfrj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), pfrj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, pfrj);
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.MERGE && validateMapSideMerge(inputs,loj.getPlan())) {
PhysicalOperator smj;
boolean usePOMergeJoin = inputs.size() == 2 && innerFlags[0] && innerFlags[1] ;
if(usePOMergeJoin){
// inner join on two sorted inputs. We have less restrictive
// implementation here in a form of POMergeJoin which doesn't
// require loaders to implement collectable interface.
try {
smj = new POMergeJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),
parallel,inp,joinPlans,keyTypes);
}
catch (PlanException e) {
int errCode = 2042;
String msg = "Merge Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(loj, smj);
}
else{
// in all other cases we fall back to POMergeCogroup + Flattening FEs
smj = compileToMergeCogrp(loj, loj.getExpressionPlans());
}
smj.setResultType(DataType.TUPLE);
currentPlan.add(smj);
smj.setAlias(alias);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), smj);
} catch (PlanException e) {
int errCode = 2015;