Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.MapredLocalWork$BucketMapJoinContext


          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends Serializable>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, String> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends Serializable> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = new Path(smallTblDirs.get(small_alias));
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);
View Full Code Here


    private void processCurrentTask(Task<? extends Serializable> currTask,
        ConditionalTask conditionalTask) throws SemanticException {
      // get current mapred work and its local work
      MapredWork mapredWork = (MapredWork) currTask.getWork();
      MapredLocalWork localwork = mapredWork.getMapLocalWork();
      if (localwork != null) {
        // get the context info and set up the shared tmp URI
        Context ctx = physicalContext.getContext();
        String tmpFileURI = Utilities.generateTmpURI(ctx.getLocalTmpFileURI(), currTask.getId());
        localwork.setTmpFileURI(tmpFileURI);
        String hdfsTmpURI = Utilities.generateTmpURI(ctx.getMRTmpFileURI(), currTask.getId());
        mapredWork.setTmpHDFSFileURI(hdfsTmpURI);
        // create a task for this local work; right now, this local work is shared
        // by the original MapredTask and this new generated MapredLocalTask.
        MapredLocalTask localTask = (MapredLocalTask) TaskFactory.get(localwork, physicalContext
            .getParseContext().getConf());

        // set the backup task from curr task
        localTask.setBackupTask(currTask.getBackupTask());
        localTask.setBackupChildrenTasks(currTask.getBackupChildrenTasks());
        currTask.setBackupChildrenTasks(null);
        currTask.setBackupTask(null);

        if (currTask.getTaskTag() == Task.CONVERTED_MAPJOIN) {
          localTask.setTaskTag(Task.CONVERTED_LOCAL_MAPJOIN);
        } else {
          localTask.setTaskTag(Task.LOCAL_MAPJOIN);
        }
        // replace the map join operator to local_map_join operator in the operator tree
        // and return all the dummy parent
        LocalMapJoinProcCtx  localMapJoinProcCtx= adjustLocalTask(localTask);
        List<Operator<? extends Serializable>> dummyOps = localMapJoinProcCtx.getDummyParentOp();

        // create new local work and setup the dummy ops
        MapredLocalWork newLocalWork = new MapredLocalWork();
        newLocalWork.setDummyParentOp(dummyOps);
        newLocalWork.setTmpFileURI(tmpFileURI);
        newLocalWork.setInputFileChangeSensitive(localwork.getInputFileChangeSensitive());
        mapredWork.setMapLocalWork(newLocalWork);
        // get all parent tasks
        List<Task<? extends Serializable>> parentTasks = currTask.getParentTasks();
        currTask.setParentTasks(null);
        if (parentTasks != null) {
View Full Code Here

      AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp, boolean createLocalPlan) {
    if (currMapJoinOp != null) {
      LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>> aliasBucketFileNameMapping =
        currMapJoinOp.getConf().getAliasBucketFileNameMapping();
      if(aliasBucketFileNameMapping!= null) {
        MapredLocalWork localPlan = plan.getMapLocalWork();
        if(localPlan == null) {
          if(currMapJoinOp instanceof SMBMapJoinOperator) {
            localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
          }
          if (localPlan == null && createLocalPlan) {
            localPlan = new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
          }
        } else {
          //local plan is not null, we want to merge it into SMBMapJoinOperator's local work
          if(currMapJoinOp instanceof SMBMapJoinOperator) {
            MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
            if(smbLocalWork != null) {
              localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
              localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
            }
          }
        }

        if(localPlan == null) {
View Full Code Here

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan = new MapredLocalWork(
            new LinkedHashMap<String, Operator<? extends Serializable>>(),
            new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan.getAliasToFetchWork().put(
            alias_id,
            new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id,
            new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }
View Full Code Here

      plan.getPathToAliases().get(path).add(alias);
      plan.getPathToPartitionInfo().put(path, new PartitionDesc(tt_desc, null));
      plan.getAliasToWork().put(alias, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan = new MapredLocalWork(
            new LinkedHashMap<String, Operator<? extends Serializable>>(),
            new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias) == null;
      assert localPlan.getAliasToFetchWork().get(alias) == null;
      localPlan.getAliasToWork().put(alias, topOp);
      localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc));
      plan.setMapLocalWork(localPlan);
    }
  }
View Full Code Here

          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends Serializable>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, String> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends Serializable> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = new Path(smallTblDirs.get(small_alias));
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir.toString(), tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);
View Full Code Here

    // keep the small table alias to avoid concurrent modification exception
    ArrayList<String> smallTableAliasList = new ArrayList<String>();
    String bigTableAlias = null;

    // create a new  MapredLocalWork
    MapredLocalWork newLocalWork = new MapredLocalWork(
        new LinkedHashMap<String, Operator<? extends Serializable>>(),
        new LinkedHashMap<String, FetchWork>());

    for (Map.Entry<String, Operator<? extends Serializable>> entry : newWork.getAliasToWork()
        .entrySet()) {
      String alias = entry.getKey();
      Operator<? extends Serializable> op = entry.getValue();

      // if the table scan is for big table; then skip it
      // tracing down the operator tree from the table scan operator
      Operator<? extends Serializable> parentOp = op;
      Operator<? extends Serializable> childOp = op.getChildOperators().get(0);
      while ((childOp != null) && (!childOp.equals(mapJoinOp))) {
        parentOp = childOp;
        assert parentOp.getChildOperators().size() == 1;
        childOp = parentOp.getChildOperators().get(0);
      }
      if (childOp == null) {
        throw new SemanticException(
            "Cannot find join op by tracing down the table scan operator tree");
      }
      // skip the big table pos
      int i = childOp.getParentOperators().indexOf(parentOp);
      if (i == bigTablePos) {
        bigTableAlias = alias;
        continue;
      }
      // set alias to work and put into smallTableAliasList
      newLocalWork.getAliasToWork().put(alias, op);
      smallTableAliasList.add(alias);
      // get input path and remove this alias from pathToAlias
      // because this file will be fetched by fetch operator
      LinkedHashMap<String, ArrayList<String>> pathToAliases = newWork.getPathToAliases();

      // keep record all the input path for this alias
      HashSet<String> pathSet = new HashSet<String>();
      HashSet<String> emptyPath = new HashSet<String>();
      for (Map.Entry<String, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
        String path = entry2.getKey();
        ArrayList<String> list = entry2.getValue();
        if (list.contains(alias)) {
          // add to path set
          if (!pathSet.contains(path)) {
            pathSet.add(path);
          }
          //remove this alias from the alias list
          list.remove(alias);
          if(list.size() == 0) {
            emptyPath.add(path);
          }
        }
      }
      //remove the path, with which no alias associates
      for (String path : emptyPath) {
        pathToAliases.remove(path);
      }

      // create fetch work
      FetchWork fetchWork = null;
      List<String> partDir = new ArrayList<String>();
      List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();

      for (String tablePath : pathSet) {
        PartitionDesc partitionDesc = newWork.getPathToPartitionInfo().get(tablePath);
        // create fetchwork for non partitioned table
        if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
          fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
          break;
        }
        // if table is partitioned,add partDir and partitionDesc
        partDir.add(tablePath);
        partDesc.add(partitionDesc);
      }
      // create fetchwork for partitioned table
      if (fetchWork == null) {
        fetchWork = new FetchWork(partDir, partDesc);
      }
      // set alias to fetch work
      newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
    }
    // remove small table ailias from aliasToWork;Avoid concurrent modification
    for (String alias : smallTableAliasList) {
      newWork.getAliasToWork().remove(alias);
    }
View Full Code Here

  public static MapredLocalWork deserializeMapRedLocalWork(InputStream in, Configuration conf) {
    XMLDecoder d = null;
    try {
      d = new XMLDecoder(in, null, null);
      MapredLocalWork ret = (MapredLocalWork) d.readObject();
      return (ret);
    } finally {
      if (null != d) {
        d.close();
      }
View Full Code Here

      AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp, boolean createLocalPlan) {
    if (currMapJoinOp != null) {
      LinkedHashMap<String, LinkedHashMap<String, ArrayList<String>>> aliasBucketFileNameMapping =
        currMapJoinOp.getConf().getAliasBucketFileNameMapping();
      if(aliasBucketFileNameMapping!= null) {
        MapredLocalWork localPlan = plan.getMapLocalWork();
        if(localPlan == null) {
          if(currMapJoinOp instanceof SMBMapJoinOperator) {
            localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
          }
          if (localPlan == null && createLocalPlan) {
            localPlan = new MapredLocalWork(
                new LinkedHashMap<String, Operator<? extends Serializable>>(),
                new LinkedHashMap<String, FetchWork>());
          }
        } else {
          //local plan is not null, we want to merge it into SMBMapJoinOperator's local work
          if(currMapJoinOp instanceof SMBMapJoinOperator) {
            MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
            if(smbLocalWork != null) {
              localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
              localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
            }
          }
        }

        if(localPlan == null) {
View Full Code Here

      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan = new MapredLocalWork(
            new LinkedHashMap<String, Operator<? extends Serializable>>(),
            new LinkedHashMap<String, FetchWork>());
      }

      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        localPlan.getAliasToFetchWork().put(
            alias_id,
            new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id,
            new FetchWork(tblDir.toString(), tblDesc));
      }
      plan.setMapLocalWork(localPlan);
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapredLocalWork$BucketMapJoinContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.