*
* @param subQuery
* @return
*/
public static int calculatePartitionNum(SubQuery subQuery, DataChannel channel) {
TajoConf conf = subQuery.context.getConf();
MasterPlan masterPlan = subQuery.getMasterPlan();
ExecutionBlock parent = masterPlan.getParent(subQuery.getBlock());
GroupbyNode grpNode = null;
if (parent != null) {
grpNode = PlannerUtil.findTopNode(parent.getPlan(), NodeType.GROUP_BY);
}
// Is this subquery the first step of join?
if (parent != null && parent.getScanNodes().length == 2) {
List<ExecutionBlock> childs = masterPlan.getChilds(parent);
// for inner
ExecutionBlock outer = childs.get(0);
long outerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, outer);
// for inner
ExecutionBlock inner = childs.get(1);
long innerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, inner);
LOG.info("Outer volume: " + Math.ceil((double)outerVolume / 1048576));
LOG.info("Inner volume: " + Math.ceil((double)innerVolume / 1048576));
long smaller = Math.min(outerVolume, innerVolume);
int mb = (int) Math.ceil((double)smaller / 1048576);
LOG.info("Smaller Table's volume is approximately " + mb + " MB");
// determine the number of task
int taskNum = (int) Math.ceil((double)mb /
conf.getIntVar(ConfVars.DIST_QUERY_JOIN_PARTITION_VOLUME));
LOG.info("The determined number of join partitions is " + taskNum);
return taskNum;
// Is this subquery the first step of group-by?
} else if (grpNode != null) {
if (grpNode.getGroupingColumns().length == 0) {
return 1;
} else {
long volume = getInputVolume(subQuery.masterPlan, subQuery.context, subQuery.block);
int mb = (int) Math.ceil((double)volume / 1048576);
LOG.info("Table's volume is approximately " + mb + " MB");
// determine the number of task
int taskNum = (int) Math.ceil((double)mb /
conf.getIntVar(ConfVars.DIST_QUERY_GROUPBY_PARTITION_VOLUME));
LOG.info("The determined number of aggregation partitions is " + taskNum);
return taskNum;
}
} else {
LOG.info("============>>>>> Unexpected Case! <<<<<================");