Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.MapredWork


  public static void joinPlan(ReduceSinkOperator op,
                              Task<? extends Serializable> oldTask,
                              Task<? extends Serializable> task,
                              GenMRProcContext opProcCtx) throws SemanticException {
    Task<? extends Serializable> currTask = task;
    mapredWork plan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    // terminate the old task and make current task dependent on it
    if (oldTask != null) {
      splitTasks(op, oldTask, currTask, opProcCtx);
View Full Code Here


   * @param opProcCtx processing context
   */
  public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
    throws SemanticException {
    // Generate a new task             
    mapredWork cplan = getMapRedWork();
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));

    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, redTask);
    Task<? extends Serializable> currTask    = opProcCtx.getCurrTask();

View Full Code Here

  /**
   * create a new plan and return
   * @return the new plan
   */
  public static mapredWork getMapRedWork() {
    mapredWork work = new mapredWork();
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, partitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
    work.setTagToValueDesc(new ArrayList<tableDesc>());
    work.setReducer(null);
    return work;
  }
View Full Code Here

   **/
  private static void splitTasks(ReduceSinkOperator op,
                                 Task<? extends Serializable> parentTask,
                                 Task<? extends Serializable> childTask,
                                 GenMRProcContext opProcCtx) throws SemanticException {
    mapredWork plan = (mapredWork) childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
   
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);

    // Root Task cannot depend on any other task, therefore childTask cannot be a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask))
      rootTasks.remove(childTask);

    // generate the temporary file
    String scratchDir = opProcCtx.getScratchDir();
    int randomid = opProcCtx.getRandomId();
    int pathid   = opProcCtx.getPathId();
     
    String taskTmpDir = (new Path(scratchDir + File.separator + randomid + '.' + pathid)).toString();
    pathid++;
    opProcCtx.setPathId(pathid);
   
    Operator<? extends Serializable> parent = op.getParentOperators().get(0);
    tableDesc tt_desc =
      PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
   
    // Create a file sink operator for this file name
    Operator<? extends Serializable> fs_op =
      putOpInsertMap(OperatorFactory.get
                     (new fileSinkDesc(taskTmpDir, tt_desc,
                                       parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
                      parent.getSchema()), null, parseCtx);
   
    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
    for (int pos = 0; pos < childOpList.size(); pos++) {
      if (childOpList.get(pos) == op) {
        childOpList.set(pos, fs_op);
        break;
      }
    }
   
    List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);
   
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
   
    String streamDesc;
    mapredWork cplan = (mapredWork) childTask.getWork();
   
    if (reducer.getClass() == JoinOperator.class) {
      String origStreamDesc;
      streamDesc = "$INTNAME";
      origStreamDesc = streamDesc;
      int pos = 0;
      while (cplan.getAliasToWork().get(streamDesc) != null)
        streamDesc = origStreamDesc.concat(String.valueOf(++pos));
    }
    else
      streamDesc = taskTmpDir;
   
    // Add the path to alias mapping
    if (cplan.getPathToAliases().get(taskTmpDir) == null) {
      cplan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
    }
   
    cplan.getPathToAliases().get(taskTmpDir).add(streamDesc);
    cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
    cplan.getAliasToWork().put(streamDesc, op);

    // TODO: Allocate work to remove the temporary files and make that
    // dependent on the redTask
    if (reducer.getClass() == JoinOperator.class)
      cplan.setNeedsTagging(true);

    currTopOp = null;
    String currAliasId = null;
   
    opProcCtx.setCurrTopOp(currTopOp);
View Full Code Here

    GenMRProcContext ctx = (GenMRProcContext)opProcCtx;

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork currPlan = (mapredWork) currTask.getWork();
    Operator<? extends Serializable> currTopOp   = mapredCtx.getCurrTopOp();
    String currAliasId = mapredCtx.getCurrAliasId();
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

    ctx.setCurrTopOp(currTopOp);
    ctx.setCurrAliasId(currAliasId);
    ctx.setCurrTask(currTask);

    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      if (currPlan.getReducer() == null)
        GenMapRedUtils.initPlan(op, ctx);
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // This will happen in case of joins. The current plan can be thrown away after being merged with the
View Full Code Here

    // union consisted on a bunch of map-reduce jobs, and it has been split at the union
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask    = mapredCtx.getCurrTask();
    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
    Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);
   
    ctx.setCurrTask(currTask);

    // If the plan for this reducer does not exist, initialize the plan
    if (opMapTask == null) {
      // When the reducer is encountered for the first time
      if (plan.getReducer() == null)
        GenMapRedUtils.initUnionPlan(op, ctx);
      // When union is followed by a multi-table insert
      else
        GenMapRedUtils.splitPlan(op, ctx);
    }
    // The union is already initialized. However, the union is walked from another input
    // initUnionPlan is idempotent
    else if (plan.getReducer() == reducer)
      GenMapRedUtils.initUnionPlan(op, ctx);
    // There is a join after union. One of the branches of union has already been initialized.
    // Initialize the current branch, and join with the original plan.
    else {
      GenMapRedUtils.initUnionPlan(ctx, currTask);
View Full Code Here

    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    Task<? extends Serializable> uTask = null;

    pos = UnionProcFactory.getPositionParent(union, stack);
    Operator<? extends Serializable> parent = union.getParentOperators().get(pos);  
    mapredWork uPlan = null;

    // union is encountered for the first time
    if (uCtxTask == null) {
      uCtxTask = new GenMRUnionCtx();
      uPlan = GenMapRedUtils.getMapRedWork();
View Full Code Here

   * @param opProcCtx processing context
   */
  public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
    throws SemanticException {
    // Generate a new task
    mapredWork cplan = getMapRedWork();
    ParseContext parseCtx = opProcCtx.getParseCtx();
    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();

    cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));

    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, redTask);
    Task<? extends Serializable> currTask    = opProcCtx.getCurrTask();

View Full Code Here

  /**
   * create a new plan and return
   * @return the new plan
   */
  public static mapredWork getMapRedWork() {
    mapredWork work = new mapredWork();
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, partitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>());
    work.setTagToValueDesc(new ArrayList<tableDesc>());
    work.setReducer(null);
    return work;
  }
View Full Code Here

  public static void splitTasks(Operator<? extends Serializable> op,
                                 Task<? extends Serializable> parentTask,
                                 Task<? extends Serializable> childTask,
                                 GenMRProcContext opProcCtx, boolean setReducer,
                                 boolean local, int posn) throws SemanticException {
    mapredWork plan = (mapredWork) childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);

    // Root Task cannot depend on any other task, therefore childTask cannot be a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask))
      rootTasks.remove(childTask);

    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();

    Operator<? extends Serializable> parent = op.getParentOperators().get(posn);
    tableDesc tt_desc =
      PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));

    // Create a file sink operator for this file name
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    fileSinkDesc desc = new fileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends Serializable> fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx);

    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
    for (int pos = 0; pos < childOpList.size(); pos++) {
      if (childOpList.get(pos) == op) {
        childOpList.set(pos, fs_op);
        break;
      }
    }

    List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);

    // create a dummy tableScan operator on top of op
    Operator<? extends Serializable> ts_op =
      putOpInsertMap(OperatorFactory.get(tableScanDesc.class, parent.getSchema()), null, parseCtx);

    childOpList = new ArrayList<Operator<? extends Serializable>>();
    childOpList.add(op);
    ts_op.setChildOperators(childOpList);
    op.getParentOperators().set(posn, ts_op);

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null));

    String streamDesc = taskTmpDir;
    mapredWork cplan = (mapredWork) childTask.getWork();

    if (setReducer) {
      Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

      if (reducer.getClass() == JoinOperator.class) {
        String origStreamDesc;
        streamDesc = "$INTNAME";
        origStreamDesc = streamDesc;
        int pos = 0;
        while (cplan.getAliasToWork().get(streamDesc) != null)
          streamDesc = origStreamDesc.concat(String.valueOf(++pos));
      }

      // TODO: Allocate work to remove the temporary files and make that
      // dependent on the redTask
      if (reducer.getClass() == JoinOperator.class)
        cplan.setNeedsTagging(true);
    }

    // Add the path to alias mapping
    setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapredWork

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.