skj.setJoinPlans(joinPlans);
}
catch (Exception e) {
int errCode = 2015;
String msg = "Skewed Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
skj.setResultType(DataType.TUPLE);
for (int i=0; i < inputs.size(); i++) {
Operator op = inputs.get(i);
if (!innerFlags[i]) {
try {
LogicalSchema s = ((LogicalRelationalOperator)op).getSchema();
// if the schema cannot be determined
if (s == null) {
throw new FrontendException("Cannot determine skewed join schema", 2247);
}
skj.addSchema(Util.translateSchema(s));
} catch (FrontendException e) {
int errCode = 2015;
String msg = "Couldn't set the schema for outer join" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
} else {
// This will never be retrieved. It just guarantees that the index will be valid when
// MRCompiler is trying to read the schema
skj.addSchema(null);
}
}
currentPlan.add(skj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), skj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, skj);
}
else if(loj.getJoinType() == LOJoin.JOINTYPE.REPLICATED) {
int fragment = 0;
POFRJoin pfrj;
try {
boolean isLeftOuter = false;
// We dont check for bounds issue as we assume that a join
// involves atleast two inputs
isLeftOuter = !innerFlags[1];
Tuple nullTuple = null;
if( isLeftOuter ) {
try {
// We know that in a Left outer join its only a two way
// join, so we assume index of 1 for the right input
LogicalSchema inputSchema = ((LogicalRelationalOperator)inputs.get(1)).getSchema();
// We check if we have a schema before the join
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + ((LogicalRelationalOperator)inputs.get(1)).getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
// Using the schema we decide the number of columns/fields
// in the nullTuple
nullTuple = TupleFactory.getInstance().newTuple(inputSchema.size());
for(int j = 0; j < inputSchema.size(); j++) {
nullTuple.set(j, null);
}
} catch( FrontendException e ) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
pfrj = new POFRJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),parallel,
inp, ppLists, keyTypes, null, fragment, isLeftOuter, nullTuple);
pfrj.setAlias(alias);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
pfrj.setResultType(DataType.TUPLE);
currentPlan.add(pfrj);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), pfrj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, pfrj);
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.MERGE && validateMapSideMerge(inputs,loj.getPlan())) {
PhysicalOperator smj;
boolean usePOMergeJoin = inputs.size() == 2 && innerFlags[0] && innerFlags[1] ;
if(usePOMergeJoin){
// inner join on two sorted inputs. We have less restrictive
// implementation here in a form of POMergeJoin which doesn't
// require loaders to implement collectable interface.
try {
smj = new POMergeJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),
parallel,inp,joinPlans,keyTypes);
}
catch (PlanException e) {
int errCode = 2042;
String msg = "Merge Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(loj, smj);
}
else{
// in all other cases we fall back to POMergeCogroup + Flattening FEs
smj = compileToMergeCogrp(loj, loj.getExpressionPlans());
}
smj.setResultType(DataType.TUPLE);
currentPlan.add(smj);
smj.setAlias(alias);
for (Operator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), smj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
if(!usePOMergeJoin){
// Now create and configure foreach which will flatten the output
// of cogroup.
POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, inputs);
currentPlan.add(fe);
try {
currentPlan.connect(smj, fe);
} catch (PlanException e) {
throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
}
logToPhyMap.put(loj, fe);
}
return;
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans());
POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, inputs);
currentPlan.add(fe);
try {
currentPlan.connect(poPackage, fe);
} catch (PlanException e) {
throw new LogicalToPhysicalTranslatorException(e.getDetailedMessage(),
e.getErrorCode(),e.getErrorSource(),e);
}
logToPhyMap.put(loj, fe);
poPackage.setPackageType(POPackage.PackageType.JOIN);
}