Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc


    opTaskMap.put(reducer, unionTask);

    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());

    if (needsTagging(plan.getReduceWork())) {
      plan.getReduceWork().setNeedsTagging(true);
    }
View Full Code Here


    // Add the reducer
    ReduceWork rWork = new ReduceWork();
    childPlan.setReduceWork(rWork);
    rWork.setReducer(reducer);
    ReduceSinkDesc desc = cRS.getConf();
    childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers()));

    opProcCtx.getOpTaskMap().put(reducer, childTask);

    splitTasks(cRS, parentTask, childTask, opProcCtx);
  }
View Full Code Here

    });

    fs = new FileSinkOperator();
    fs.setConf(new FileSinkDesc());
    rs = new ReduceSinkOperator();
    rs.setConf(new ReduceSinkDesc());
    ts = new TableScanOperator();
    ts.setConf(new TableScanDesc());
    ts.getChildOperators().add(rs);
    rs.getParentOperators().add(ts);
    rs.getChildOperators().add(fs);
View Full Code Here

      Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap();
      for (ColumnInfo ci : valColInfo) {
        newValueCols.add(new ExprNodeColumnDesc(ci));
        colExprMap.put(ci.getInternalName(), newValueCols.get(newValueCols.size() - 1));
      }
      ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder,
          newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType());

      if (!bucketColumns.isEmpty()) {
        String tableAlias = outRR.getColumnInfos().get(0).getTabAlias();
        ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo,
View Full Code Here

          outValColNames, 0, "");
      TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
      List<List<Integer>> distinctColumnIndices = Lists.newArrayList();

      // Number of reducers is set to default (-1)
      ReduceSinkDesc rsConf = new ReduceSinkDesc(newKeyCols, newKeyCols.size(), newValueCols,
          outputKeyCols, distinctColumnIndices, outValColNames, -1, newPartCols, -1, keyTable,
          valueTable, writeType);
      rsConf.setBucketCols(bucketColumns);
      rsConf.setNumBuckets(numBuckets);

      return rsConf;
    }
View Full Code Here

    Map<Byte, List<ExprNodeDesc>> keyExprMap  = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
      ReduceSinkDesc rsconf = ((ReduceSinkOperator)parentOp).getConf();
      Byte tag = (byte) rsconf.getTag();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(tag, keys);
    }

    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context
View Full Code Here

    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
View Full Code Here

    mapredWork plan = (mapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    //    rootTasks.add(currTask);
    if (reducer.getClass() == JoinOperator.class)
View Full Code Here

    Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx.getConf());
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

    // Add the reducer
    cplan.setReducer(reducer);
    reduceSinkDesc desc = (reduceSinkDesc)op.getConf();
   
    cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));

    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, redTask);
    Task<? extends Serializable> currTask    = opProcCtx.getCurrTask();
View Full Code Here

      ReduceSinkOperator op = (ReduceSinkOperator)nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
      HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap =
          cppCtx.getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRR();
      reduceSinkDesc conf = op.getConf();
      List<Operator<? extends Serializable>> childOperators = op.getChildOperators();
      List<Operator<? extends Serializable>> parentOperators = op.getParentOperators();
      List<String> childColLists = new ArrayList<String>();

      for(Operator<? extends Serializable> child: childOperators)
        childColLists = Utilities.mergeUniqElems(childColLists, cppCtx.getPrunedColLists().get(child));

      List<String> colLists = new ArrayList<String>();
      ArrayList<exprNodeDesc> keys = conf.getKeyCols();
      for (exprNodeDesc key : keys)
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());

      if ((childOperators.size() == 1) && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends Serializable> par = parentOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRR();
        RowResolver childRR = opToParseCtxMap.get(childOperators.get(0)).getRR();

        for (String childCol : childColLists) {
          String [] nm = childRR.reverseLookup(childCol);
          ColumnInfo cInfo = redSinkRR.get(nm[0],nm[1]);
          if (cInfo != null) {
            cInfo = parRR.get(nm[0], nm[1]);
            if (!colLists.contains(cInfo.getInternalName()))
              colLists.add(cInfo.getInternalName());
          }
        }
      }
      else {
        // Reduce Sink contains the columns needed - no need to aggregate from children
        ArrayList<exprNodeDesc> vals = conf.getValueCols();
        for (exprNodeDesc val : vals)
          colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }

      cppCtx.getPrunedColLists().put(op, colLists);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.