Package org.apache.tajo.engine.planner.global

Examples of org.apache.tajo.engine.planner.global.ExecutionBlock


      SubQueryEvent, SubQueryState> {

    @Override
    public SubQueryState transition(SubQuery subQuery, SubQueryEvent subQueryEvent) {
      subQuery.setStartTime();
      ExecutionBlock execBlock = subQuery.getBlock();
      SubQueryState state;

      try {
        // Union operator does not require actual query processing. It is performed logically.
        if (execBlock.hasUnion()) {
          subQuery.finalizeStats();
          state = SubQueryState.SUCCEEDED;
        } else {
          ExecutionBlock parent = subQuery.getMasterPlan().getParent(subQuery.getBlock());
          DataChannel channel = subQuery.getMasterPlan().getChannel(subQuery.getId(), parent.getId());
          setShuffleIfNecessary(subQuery, channel);
          initTaskScheduler(subQuery);
          schedule(subQuery);
          subQuery.totalScheduledObjectsCount = subQuery.getTaskScheduler().remainingScheduledObjectNum();
          LOG.info(subQuery.totalScheduledObjectsCount + " objects are scheduled");
View Full Code Here


     * @return
     */
    public static int calculateShuffleOutputNum(SubQuery subQuery, DataChannel channel) {
      TajoConf conf = subQuery.context.getConf();
      MasterPlan masterPlan = subQuery.getMasterPlan();
      ExecutionBlock parent = masterPlan.getParent(subQuery.getBlock());

      GroupbyNode grpNode = null;
      if (parent != null) {
        grpNode = PlannerUtil.findMostBottomNode(parent.getPlan(), NodeType.GROUP_BY);
      }

      // Is this subquery the first step of join?
      if (parent != null && parent.getScanNodes().length == 2) {
        List<ExecutionBlock> childs = masterPlan.getChilds(parent);

        // for outer
        ExecutionBlock outer = childs.get(0);
        long outerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, outer);

        // for inner
        ExecutionBlock inner = childs.get(1);
        long innerVolume = getInputVolume(subQuery.masterPlan, subQuery.context, inner);
        LOG.info(subQuery.getId() + ", Outer volume: " + Math.ceil((double) outerVolume / 1048576) + "MB, "
            + "Inner volume: " + Math.ceil((double) innerVolume / 1048576) + "MB");

        long bigger = Math.max(outerVolume, innerVolume);

        int mb = (int) Math.ceil((double) bigger / 1048576);
        LOG.info(subQuery.getId() + ", Bigger Table's volume is approximately " + mb + " MB");

        int taskNum = (int) Math.ceil((double) mb /
            conf.getIntVar(ConfVars.DIST_QUERY_JOIN_PARTITION_VOLUME));

        int totalMem = getClusterTotalMemory(subQuery);
        LOG.info(subQuery.getId() + ", Total memory of cluster is " + totalMem + " MB");
        int slots = Math.max(totalMem / conf.getIntVar(ConfVars.TASK_DEFAULT_MEMORY), 1);

        // determine the number of task
        taskNum = Math.min(taskNum, slots);
        LOG.info(subQuery.getId() + ", The determined number of join partitions is " + taskNum);

        // The shuffle output numbers of join may be inconsistent by execution block order.
        // Thus, we need to compare the number with DataChannel output numbers.
        // If the number is right, the number and DataChannel output numbers will be consistent.
        int outerShuffleOutptNum = 0, innerShuffleOutputNum = 0;
        for (DataChannel eachChannel : masterPlan.getOutgoingChannels(outer.getId())) {
          outerShuffleOutptNum = Math.max(outerShuffleOutptNum, eachChannel.getShuffleOutputNum());
        }

        for (DataChannel eachChannel : masterPlan.getOutgoingChannels(inner.getId())) {
          innerShuffleOutputNum = Math.max(innerShuffleOutputNum, eachChannel.getShuffleOutputNum());
        }

        if (outerShuffleOutptNum != innerShuffleOutputNum
            && taskNum != outerShuffleOutptNum
View Full Code Here

      }
    }

    private static void schedule(SubQuery subQuery) throws IOException {
      MasterPlan masterPlan = subQuery.getMasterPlan();
      ExecutionBlock execBlock = subQuery.getBlock();
      if (subQuery.getMasterPlan().isLeaf(execBlock.getId()) && execBlock.getScanNodes().length == 1) { // Case 1: Just Scan
        scheduleFragmentsForLeafQuery(subQuery);
      } else if (execBlock.getScanNodes().length > 1) { // Case 2: Join
        Repartitioner.scheduleFragmentsForJoinQuery(subQuery.schedulerContext, subQuery);
      } else { // Case 3: Others (Sort or Aggregation)
        int numTasks = getNonLeafTaskNum(subQuery);
        Repartitioner.scheduleFragmentsForNonLeafTasks(subQuery.schedulerContext, masterPlan, subQuery, numTasks);
      }
View Full Code Here

        return aggregatedVolume;
      }
    }

    public static void allocateContainers(SubQuery subQuery) {
      ExecutionBlock execBlock = subQuery.getBlock();

      //TODO consider disk slot
      int requiredMemoryMBPerTask = 512;

      int numRequest = subQuery.getContext().getResourceAllocator().calculateNumRequestContainers(
View Full Code Here

              subQuery.masterPlan.isLeaf(execBlock), 0.0f);
      subQuery.eventHandler.handle(event);
    }

    private static void scheduleFragmentsForLeafQuery(SubQuery subQuery) throws IOException {
      ExecutionBlock execBlock = subQuery.getBlock();
      ScanNode[] scans = execBlock.getScanNodes();
      Preconditions.checkArgument(scans.length == 1, "Must be Scan Query");
      ScanNode scan = scans[0];
      TableDesc table = subQuery.context.getTableDescMap().get(scan.getCanonicalName());

      Collection<FileFragment> fragments;
View Full Code Here

  private boolean checkIfInterQuery(MasterPlan masterPlan, ExecutionBlock block) {
    if (masterPlan.isRoot(block)) {
      return false;
    }

    ExecutionBlock parent = masterPlan.getParent(block);
    if (masterPlan.isRoot(parent) && parent.hasUnion()) {
      return false;
    }

    return true;
  }
View Full Code Here

  private final static String UNKNOWN_HOST = "unknown";

  public static void scheduleFragmentsForJoinQuery(TaskSchedulerContext schedulerContext, SubQuery subQuery)
      throws IOException {
    MasterPlan masterPlan = subQuery.getMasterPlan();
    ExecutionBlock execBlock = subQuery.getBlock();
    QueryMasterTask.QueryMasterTaskContext masterContext = subQuery.getContext();
    AbstractStorageManager storageManager = subQuery.getStorageManager();

    ScanNode[] scans = execBlock.getScanNodes();

    Path tablePath;
    FileFragment[] fragments = new FileFragment[scans.length];
    long[] stats = new long[scans.length];

    // initialize variables from the child operators
    for (int i = 0; i < scans.length; i++) {
      TableDesc tableDesc = masterContext.getTableDescMap().get(scans[i].getCanonicalName());
      if (tableDesc == null) { // if it is a real table stored on storage
        // TODO - to be fixed (wrong directory)
        ExecutionBlock [] childBlocks = new ExecutionBlock[2];
        childBlocks[0] = masterPlan.getChild(execBlock.getId(), 0);
        childBlocks[1] = masterPlan.getChild(execBlock.getId(), 1);

        tablePath = storageManager.getTablePath(scans[i].getTableName());
        stats[i] = masterContext.getSubQuery(childBlocks[i].getId()).getResultStats().getNumBytes();
        fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
      } else {
        tablePath = tableDesc.getPath();
        try {
          stats[i] = GlobalPlanner.computeDescendentVolume(scans[i]);
        } catch (PlanningException e) {
          throw new IOException(e);
        }

        // if table has no data, storageManager will return empty FileFragment.
        // So, we need to handle FileFragment by its size.
        // If we don't check its size, it can cause IndexOutOfBoundsException.
        List<FileFragment> fileFragments = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), tablePath);
        if (fileFragments.size() > 0) {
          fragments[i] = fileFragments.get(0);
        } else {
          fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
        }
      }
    }

    // If one of inner join tables has no input data,
    // it should return zero rows.
    JoinNode joinNode = PlannerUtil.findMostBottomNode(execBlock.getPlan(), NodeType.JOIN);
    if (joinNode != null) {
      if ( (joinNode.getJoinType().equals(JoinType.INNER))) {
        for (int i = 0; i < stats.length; i++) {
          if (stats[i] == 0) {
            return;
          }
        }
      }
    }

    // Assigning either fragments or fetch urls to query units
    boolean isAllBroadcastTable = true;
    int baseScanIdx = -1;
    for (int i = 0; i < scans.length; i++) {
      if (!execBlock.isBroadcastTable(scans[i].getCanonicalName())) {
        isAllBroadcastTable = false;
        baseScanIdx = i;
      }
    }

    if (isAllBroadcastTable) {
      LOG.info("[Distributed Join Strategy] : Immediate " +  fragments.length + " Way Join on Single Machine");
      SubQuery.scheduleFragment(subQuery, fragments[0], Arrays.asList(Arrays.copyOfRange(fragments, 1, fragments.length)));
      schedulerContext.setEstimatedTaskNum(1);
    } else if (!execBlock.getBroadcastTables().isEmpty()) {
      LOG.info(String.format("[Distributed Join Strategy] : Broadcast Join, base_table=%s, base_volume=%d",
          scans[baseScanIdx].getCanonicalName(), stats[baseScanIdx]));
      scheduleLeafTasksWithBroadcastTable(schedulerContext, subQuery, baseScanIdx, fragments);
    } else {
      LOG.info("[Distributed Join Strategy] : Symmetric Repartition Join");
View Full Code Here

    return fragments;
  }

  private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext schedulerContext, SubQuery subQuery,
                                                          int baseScanId, FileFragment[] fragments) throws IOException {
    ExecutionBlock execBlock = subQuery.getBlock();
    ScanNode[] scans = execBlock.getScanNodes();
    //Preconditions.checkArgument(scans.length == 2, "Must be Join Query");

    for (int i = 0; i < scans.length; i++) {
      if (i != baseScanId) {
        scans[i].setBroadcastTable(true);
View Full Code Here

  }

  public static void scheduleRangeShuffledFetches(TaskSchedulerContext schedulerContext, MasterPlan masterPlan,
                                                  SubQuery subQuery, DataChannel channel, int maxNum)
      throws IOException {
    ExecutionBlock execBlock = subQuery.getBlock();
    ScanNode scan = execBlock.getScanNodes()[0];
    Path tablePath;
    tablePath = subQuery.getContext().getStorageManager().getTablePath(scan.getTableName());

    ExecutionBlock sampleChildBlock = masterPlan.getChild(subQuery.getId(), 0);
    SortNode sortNode = PlannerUtil.findTopNode(sampleChildBlock.getPlan(), NodeType.SORT);
    SortSpec [] sortSpecs = sortNode.getSortKeys();
    Schema sortSchema = new Schema(channel.getShuffleKeys());

    // calculate the number of maximum query ranges
    TableStats totalStat = computeChildBlocksStats(subQuery.getContext(), masterPlan, subQuery.getId());
View Full Code Here

  }

  public static void scheduleHashShuffledFetches(TaskSchedulerContext schedulerContext, MasterPlan masterPlan,
                                                 SubQuery subQuery, DataChannel channel,
                                                 int maxNum) {
    ExecutionBlock execBlock = subQuery.getBlock();
    TableStats totalStat = computeChildBlocksStats(subQuery.getContext(), masterPlan, subQuery.getId());

    if (totalStat.getNumRows() == 0) {
      return;
    }

    ScanNode scan = execBlock.getScanNodes()[0];
    Path tablePath;
    tablePath = subQuery.getContext().getStorageManager().getTablePath(scan.getTableName());

    FileFragment frag = new FileFragment(scan.getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
    List<FileFragment> fragments = new ArrayList<FileFragment>();
View Full Code Here

TOP

Related Classes of org.apache.tajo.engine.planner.global.ExecutionBlock

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.