private final static String UNKNOWN_HOST = "unknown";
public static void scheduleFragmentsForJoinQuery(TaskSchedulerContext schedulerContext, SubQuery subQuery)
throws IOException {
MasterPlan masterPlan = subQuery.getMasterPlan();
ExecutionBlock execBlock = subQuery.getBlock();
QueryMasterTask.QueryMasterTaskContext masterContext = subQuery.getContext();
AbstractStorageManager storageManager = subQuery.getStorageManager();
ScanNode[] scans = execBlock.getScanNodes();
Path tablePath;
FileFragment[] fragments = new FileFragment[scans.length];
long[] stats = new long[scans.length];
// initialize variables from the child operators
for (int i = 0; i < scans.length; i++) {
TableDesc tableDesc = masterContext.getTableDescMap().get(scans[i].getCanonicalName());
if (tableDesc == null) { // if it is a real table stored on storage
// TODO - to be fixed (wrong directory)
ExecutionBlock [] childBlocks = new ExecutionBlock[2];
childBlocks[0] = masterPlan.getChild(execBlock.getId(), 0);
childBlocks[1] = masterPlan.getChild(execBlock.getId(), 1);
tablePath = storageManager.getTablePath(scans[i].getTableName());
stats[i] = masterContext.getSubQuery(childBlocks[i].getId()).getResultStats().getNumBytes();
fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
} else {
tablePath = tableDesc.getPath();
try {
stats[i] = GlobalPlanner.computeDescendentVolume(scans[i]);
} catch (PlanningException e) {
throw new IOException(e);
}
// if table has no data, storageManager will return empty FileFragment.
// So, we need to handle FileFragment by its size.
// If we don't check its size, it can cause IndexOutOfBoundsException.
List<FileFragment> fileFragments = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), tablePath);
if (fileFragments.size() > 0) {
fragments[i] = fileFragments.get(0);
} else {
fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
}
}
}
// If one of inner join tables has no input data,
// it should return zero rows.
JoinNode joinNode = PlannerUtil.findMostBottomNode(execBlock.getPlan(), NodeType.JOIN);
if (joinNode != null) {
if ( (joinNode.getJoinType().equals(JoinType.INNER))) {
for (int i = 0; i < stats.length; i++) {
if (stats[i] == 0) {
return;
}
}
}
}
// Assigning either fragments or fetch urls to query units
boolean isAllBroadcastTable = true;
int baseScanIdx = -1;
for (int i = 0; i < scans.length; i++) {
if (!execBlock.isBroadcastTable(scans[i].getCanonicalName())) {
isAllBroadcastTable = false;
baseScanIdx = i;
}
}
if (isAllBroadcastTable) {
LOG.info("[Distributed Join Strategy] : Immediate " + fragments.length + " Way Join on Single Machine");
SubQuery.scheduleFragment(subQuery, fragments[0], Arrays.asList(Arrays.copyOfRange(fragments, 1, fragments.length)));
schedulerContext.setEstimatedTaskNum(1);
} else if (!execBlock.getBroadcastTables().isEmpty()) {
LOG.info(String.format("[Distributed Join Strategy] : Broadcast Join, base_table=%s, base_volume=%d",
scans[baseScanIdx].getCanonicalName(), stats[baseScanIdx]));
scheduleLeafTasksWithBroadcastTable(schedulerContext, subQuery, baseScanIdx, fragments);
} else {
LOG.info("[Distributed Join Strategy] : Symmetric Repartition Join");