Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.MapredLocalWork$BucketMapJoinContext


      MapJoinTableContainer[] mapJoinTables,
      MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {

    String currentInputPath = context.getCurrentInputPath().toString();
    LOG.info("******* Load from HashTable for input file: " + currentInputPath);
    MapredLocalWork localWork = context.getLocalWork();
    try {
      if (localWork.getDirectFetchOp() != null) {
        loadDirectly(mapJoinTables, currentInputPath);
      }
      Path baseDir = getBaseDir(localWork);
      if (baseDir == null) {
        return;
      }
      String fileName = localWork.getBucketFileName(currentInputPath);
      for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable() || mapJoinTables[pos] != null) {
          continue;
        }
        Path path = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte)pos, fileName);
View Full Code Here


    return null;
  }

  private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName)
      throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
      return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
View Full Code Here

    if (StringUtils.isNotBlank(addedArchives)) {
      initializeFiles("tmparchives", addedArchives);
    }

    try{
      MapredLocalWork localwork = mWork.getMapLocalWork();
      if (localwork != null && localwork.hasStagedAlias()) {
        if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
          Path localPath = localwork.getTmpPath();
          Path hdfsPath = mWork.getTmpHDFSPath();

          FileSystem hdfs = hdfsPath.getFileSystem(job);
          FileSystem localFS = localPath.getFileSystem(job);
          FileStatus[] hashtableFiles = localFS.listStatus(localPath);
          int fileNumber = hashtableFiles.length;
          String[] fileNames = new String[fileNumber];

          for ( int i = 0; i < fileNumber; i++){
            fileNames[i] = hashtableFiles[i].getPath().getName();
          }

          //package and compress all the hashtable files to an archive file
          String stageId = this.getId();
          String archiveFileName = Utilities.generateTarFileName(stageId);
          localwork.setStageID(stageId);

          CompressionUtils.tar(localPath.toUri().getPath(), fileNames,archiveFileName);
          Path archivePath = Utilities.generateTarPath(localPath, stageId);
          LOG.info("Archive "+ hashtableFiles.length+" hash table files to " + archivePath);
View Full Code Here

      throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
      memoryMXBean = ManagementFactory.getMemoryMXBean();
      MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf);
      MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
      ret = ed.executeFromChildJVM(new DriverContext());

    } else {
      MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf);
View Full Code Here

          dummyOp.close(abort);
        }
      }

      if (fetchOperators != null) {
        MapredLocalWork localWork = mo.getConf().getMapLocalWork();
        for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) {
          Operator<? extends OperatorDesc> forwardOp = localWork
              .getAliasToWork().get(entry.getKey());
          forwardOp.close(abort);
        }
      }
View Full Code Here

      String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
      conf.setVar(ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR));
      // write out the plan to a local file
      Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
      OutputStream out = FileSystem.getLocal(conf).create(planPath);
      MapredLocalWork plan = getWork();
      LOG.info("Generating plan file " + planPath.toString());
      Utilities.serializePlan(plan, out, conf);

      String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
View Full Code Here

        // alias confict should not happen here.
        return;
      }
    }

    MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapLocalWork();
    MapredLocalWork childLocalWork = childMapWork.getMapLocalWork();

    if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) ||
        (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
      // Right now, we do not handle the case that either of them is bucketed.
      // We should relax this constraint with a follow-up jira.
      return;
    }

    // We need to check if the total size of local tables is under the limit.
    // At here, we are using a strong condition, which is the total size of
    // local tables used by all input paths. Actually, we can relax this condition
    // to check the total size of local tables for every input path.
    // Example:
    //               UNION_ALL
    //              /         \
    //             /           \
    //            /             \
    //           /               \
    //       MapJoin1          MapJoin2
    //      /   |   \         /   |   \
    //     /    |    \       /    |    \
    //   Big1   S1   S2    Big2   S3   S4
    // In this case, we have two MapJoins, MapJoin1 and MapJoin2. Big1 and Big2 are two
    // big tables, and S1, S2, S3, and S4 are four small tables. Hash tables of S1 and S2
    // will only be used by Map tasks processing Big1. Hash tables of S3 and S4 will only
    // be used by Map tasks processing Big2. If Big1!=Big2, we should only check if the size
    // of S1 + S2 is under the limit, and if the size of S3 + S4 is under the limit.
    // But, right now, we are checking the size of S1 + S2 + S3 + S4 is under the limit.
    // If Big1=Big2, we will only scan a path once. So, MapJoin1 and MapJoin2 will be executed
    // in the same Map task. In this case, we need to make sure the size of S1 + S2 + S3 + S4
    // is under the limit.
    if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)){
      // The total size of local tables may not be under
      // the limit after we merge mapJoinLocalWork and childLocalWork.
      // Do not merge.
      return;
    }

    TableScanOperator childMRTaskTableScanOperator =
        OperatorUtils.findSingleOperator(
            childMapWork.getAliasToWork().get(childMRAlias), TableScanOperator.class);
    if (childMRTaskTableScanOperator == null) {
      throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() +
          " operator as the work associated with alias " + childMRAlias +
          ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
    }

    List<Operator<? extends OperatorDesc>> parentsInMapJoinTask =
        mapJoinTaskFileSinkOperator.getParentOperators();
    List<Operator<? extends OperatorDesc>> childrenInChildMRTask =
        childMRTaskTableScanOperator.getChildOperators();
    if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
      // Do not merge if we do not know how to connect two operator trees.
      return;
    }

    // Step 2: Merge mapJoinTask into the Map-side of its child.
    // Step 2.1: Connect the operator trees of two MapRedTasks.
    Operator<? extends OperatorDesc> parentInMapJoinTask = parentsInMapJoinTask.get(0);
    Operator<? extends OperatorDesc> childInChildMRTask = childrenInChildMRTask.get(0);
    parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask);
    childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);

    // Step 2.2: Replace the corresponding part childMRWork's MapWork.
    GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias, mapJoinMapWork, childMapWork);

    // Step 2.3: Fill up stuff in local work
    if (mapJoinLocalWork != null) {
      if (childLocalWork == null) {
        childMapWork.setMapLocalWork(mapJoinLocalWork);
      } else {
        childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork());
        childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork());
      }
    }

    // Step 2.4: Remove this MapJoin task
    List<Task<? extends Serializable>> parentTasks = mapJoinTask.getParentTasks();
View Full Code Here

    private void processCurrentTask(Task<? extends Serializable> currTask,
        ConditionalTask conditionalTask) throws SemanticException {
      // get current mapred work and its local work
      MapredWork mapredWork = (MapredWork) currTask.getWork();
      MapredLocalWork localwork = mapredWork.getMapWork().getMapLocalWork();
      if (localwork != null) {
        // get the context info and set up the shared tmp URI
        Context ctx = physicalContext.getContext();
        Path tmpPath = Utilities.generateTmpPath(ctx.getLocalTmpPath(), currTask.getId());
        localwork.setTmpPath(tmpPath);
        mapredWork.getMapWork().setTmpHDFSPath(Utilities.generateTmpPath(
          ctx.getMRTmpPath(), currTask.getId()));
        // create a task for this local work; right now, this local work is shared
        // by the original MapredTask and this new generated MapredLocalTask.
        MapredLocalTask localTask = (MapredLocalTask) TaskFactory.get(localwork, physicalContext
            .getParseContext().getConf());

        // set the backup task from curr task
        localTask.setBackupTask(currTask.getBackupTask());
        localTask.setBackupChildrenTasks(currTask.getBackupChildrenTasks());
        currTask.setBackupChildrenTasks(null);
        currTask.setBackupTask(null);

        if (currTask.getTaskTag() == Task.CONVERTED_MAPJOIN) {
          localTask.setTaskTag(Task.CONVERTED_MAPJOIN_LOCAL);
        } else {
          localTask.setTaskTag(Task.HINTED_MAPJOIN_LOCAL);
          currTask.setTaskTag(Task.HINTED_MAPJOIN);
        }
        // replace the map join operator to local_map_join operator in the operator tree
        // and return all the dummy parent
        LocalMapJoinProcCtx localMapJoinProcCtx = adjustLocalTask(localTask);
        List<Operator<? extends OperatorDesc>> dummyOps =
         localMapJoinProcCtx.getDummyParentOp();

        // create new local work and setup the dummy ops
        MapredLocalWork newLocalWork = localwork.extractDirectWorks(
            localMapJoinProcCtx.getDirectWorks());
        newLocalWork.setDummyParentOp(dummyOps);
        mapredWork.getMapWork().setMapLocalWork(newLocalWork);

        if (localwork.getAliasToFetchWork().isEmpty()) {
          // no alias to stage.. no local task
          newLocalWork.setHasStagedAlias(false);
          currTask.setBackupTask(localTask.getBackupTask());
          currTask.setBackupChildrenTasks(localTask.getBackupChildrenTasks());
          return;
        }
        newLocalWork.setHasStagedAlias(true);
        // get all parent tasks
        List<Task<? extends Serializable>> parentTasks = currTask.getParentTasks();
        currTask.setParentTasks(null);
        if (parentTasks != null) {
          for (Task<? extends Serializable> tsk : parentTasks) {
View Full Code Here

          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends OperatorDesc> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = smallTblDirs.get(small_alias);
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir, tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);
View Full Code Here

      currWork.getAliasToPartnInfo().remove(alias);
      currWork.getAliasToWork().remove(alias);
    }

    // Get the MapredLocalWork
    MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork();

    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : localWork.getAliasToWork()
        .entrySet()) {
      String alias = entry.getKey();
      Operator<? extends OperatorDesc> op = entry.getValue();
      FetchWork fetchWork = localWork.getAliasToFetchWork().get(alias);

      // Add the entry in mapredwork
      currWork.getAliasToWork().put(alias, op);

      PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapredLocalWork$BucketMapJoinContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.