Examples of ReduceSinkOperator

org.apache.hadoop.hive.ql.exec.ReduceSinkOperator
Reduce Sink Operator sends output to the reduce stage

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

            new ColumnInfo(field, type, null, false));
        colExprMap.put(field, exprDesc);
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(
            PlanUtils.getReduceSinkDesc(reduceKeys,
                groupingSetsPresent ? grpByExprs.size() + 1 : grpByExprs.size(),
                reduceValues, distinctColIndices,
                outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields,
                numReducers),
            new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo),
        reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

      pos++;
    }


    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < newParentOps.size(); pos++) {
      ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = oldPar.getConf();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(pos, keys);
    }


    // removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap)

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

        // In test mode, dont change the query plan. However, setup a query property
        pGraphContext.getQueryProperties().setHasMapGroupBy(true);
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
          return;
        }
        ReduceSinkOperator reduceSinkOp =
            (ReduceSinkOperator)groupByOp.getChildOperators().get(0);
        GroupByDesc childGroupByDesc =
            ((GroupByOperator)
            (reduceSinkOp.getChildOperators().get(0))).getConf();


        for (int pos = 0; pos < childGroupByDesc.getAggregators().size(); pos++) {
          AggregationDesc aggr = childGroupByDesc.getAggregators().get(pos);
          // Partial aggregation is not done for distincts on the mapper
          // However, if the data is bucketed/sorted on the distinct key, partial aggregation
          // can be performed on the mapper.
          if (aggr.getDistinct()) {
            ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
            ExprNodeDesc param = aggr.getParameters().get(0);
            assert param instanceof ExprNodeColumnDesc;
            ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param;
            paramC.setIsPartitionColOrVirtualCol(false);
            paramC.setColumn("VALUE._col" + pos);
            parameters.add(paramC);
            aggr.setParameters(parameters);
            aggr.setDistinct(false);
            aggr.setMode(Mode.FINAL);
          }
        }
        // Partial aggregation is performed on the mapper, no distinct processing at the reducer
        childGroupByDesc.setDistinct(false);
        groupByOpDesc.setDontResetAggrsDistinct(true);
        groupByOpDesc.setBucketGroup(true);
        groupByOp.setUseBucketizedHiveInputFormat(true);
        // no distinct processing at the reducer
        // A query like 'select count(distinct key) from T' is transformed into
        // 'select count(key) from T' as far as the reducer is concerned.
        reduceSinkOp.getConf().setDistinctColumnIndices(new ArrayList<List<Integer>>());
      }
      else if (setBucketGroup) {
        groupByOpDesc.setBucketGroup(true);
      }
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

   * The Node Processor for Column Pruning on Reduce Sink Operators.
   */
  public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      HashMap<Operator<? extends OperatorDesc>, OpParseContext> opToParseCtxMap = cppCtx
          .getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRowResolver();
      ReduceSinkDesc conf = op.getConf();
      List<Operator<? extends OperatorDesc>> childOperators = op
          .getChildOperators();
      List<Operator<? extends OperatorDesc>> parentOperators = op
          .getParentOperators();


      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }


      if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends OperatorDesc> par = parentOperators.get(0);
        JoinOperator childJoin = (JoinOperator) childOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRowResolver();
        List<String> childJoinCols = cppCtx.getJoinPrunedColLists().get(
            childJoin).get((byte) conf.getTag());
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childJoinCols != null && childJoinCols.size() > 0) {
          Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
          for (String childCol : childJoinCols) {
            ExprNodeDesc desc = exprMap.get(childCol);
            int index = conf.getValueCols().indexOf(desc);
            flags[index] = true;
            String[] nm = redSinkRR.reverseLookup(childCol);
            if (nm != null) {
              ColumnInfo cInfo = parRR.get(nm[0], nm[1]);
              if (!colLists.contains(cInfo.getInternalName())) {
                colLists.add(cInfo.getInternalName());
              }
            }
          }
        }
        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
      } else if ((childOperators.size() == 1)
          && (childOperators.get(0) instanceof ExtractOperator )
          && (childOperators.get(0).getChildOperators().size() == 1)
          && (childOperators.get(0).getChildOperators().get(0) instanceof PTFOperator )
          && ((PTFOperator)childOperators.get(0).
              getChildOperators().get(0)).getConf().forWindowing() )  {


        /*
         * For RS that are followed by Extract & PTFOp for windowing
         * - do the same thing as above. Reconstruct ValueColumn list based on what is required
         *   by the PTFOp.
         */


        assert parentOperators.size() == 1;


        PTFOperator ptfOp = (PTFOperator) childOperators.get(0).getChildOperators().get(0);
        List<String> childCols = cppCtx.getPrunedColList(ptfOp);
        boolean[] flags = new boolean[conf.getValueCols().size()];
        for (int i = 0; i < flags.length; i++) {
          flags[i] = false;
        }
        if (childCols != null && childCols.size() > 0) {
          ArrayList<String> outColNames = op.getConf().getOutputValueColumnNames();
          for(int i=0; i < outColNames.size(); i++ ) {
            if ( childCols.contains(outColNames.get(i))) {
              ExprNodeDesc exprNode = op.getConf().getValueCols().get(i);
              flags[i] = true;
              Utilities.mergeUniqElems(colLists, exprNode.getCols());
            }
          }
        }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

                current, backtrackedKeyCols, backtrackedPartitionCols, childRSOrder,
                parent, correlation));
          }
        }
      } else if (current.getColumnExprMap() != null && current instanceof ReduceSinkOperator) {
        ReduceSinkOperator rsop = (ReduceSinkOperator) current;
        List<ExprNodeDesc> backtrackedKeyCols =
            ExprNodeDescUtils.backtrack(childKeyCols, child, current);
        List<ExprNodeDesc> backtrackedPartitionCols =
            ExprNodeDescUtils.backtrack(childPartitionCols, child, current);
        List<ExprNodeDesc> rsKeyCols = rsop.getConf().getKeyCols();
        List<ExprNodeDesc> rsPartitionCols = rsop.getConf().getPartitionCols();


        // Two ReduceSinkOperators are correlated means that
        // they have same sorting columns (key columns), same partitioning columns,
        // same sorting orders, and no conflict on the numbers of reducers.
        // TODO: we should relax this condition
        // TODO: we need to handle aggregation functions with distinct keyword. In this case,
        // distinct columns will be added to the key columns.
        boolean isCorrelated = sameKeys(rsKeyCols, backtrackedKeyCols) &&
            sameOrder(rsop.getConf().getOrder(), childRSOrder) &&
            sameKeys(backtrackedPartitionCols, rsPartitionCols) &&
            correlation.adjustNumReducers(rsop.getConf().getNumReducers());
        GroupByOperator cGBY =
            CorrelationUtilities.getSingleChild(rsop, GroupByOperator.class);
        if (cGBY != null) {
          if (CorrelationUtilities.hasGroupingSet(rsop) ||
              cGBY.getConf().isGroupingSetsPresent()) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

    }


    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      CorrelationNodeProcCtx corrCtx = (CorrelationNodeProcCtx) ctx;
      ReduceSinkOperator op = (ReduceSinkOperator) nd;


      // Check if we have visited this operator
      if (corrCtx.isWalked(op)) {
        return null;
      }


      LOG.info("Walk to operator " + op.getIdentifier() + " " + op.getName());


      Operator<? extends OperatorDesc> child = CorrelationUtilities.getSingleChild(op, true);
      if (!(child instanceof JoinOperator) && !(child instanceof GroupByOperator)) {
        corrCtx.addWalked(op);
        return null;

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

    if (topOp == null) {
      return;
    }


    if (topOp instanceof ReduceSinkOperator) {
      ReduceSinkOperator rs = (ReduceSinkOperator) topOp;
      plan.setKeyDesc(rs.getConf().getKeySerializeInfo());
      int tag = Math.max(0, rs.getConf().getTag());
      List<TableDesc> tagToSchema = plan.getTagToValueDesc();
      while (tag + 1 > tagToSchema.size()) {
        tagToSchema.add(null);
      }
      tagToSchema.set(tag, rs.getConf().getValueSerializeInfo());
    } else {
      List<Operator<? extends OperatorDesc>> children = topOp.getChildOperators();
      if (children != null) {
        for (Operator<? extends OperatorDesc> op : children) {
          setKeyAndValueDesc(plan, op);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

          colExprMap.put(field, expression);
        }
      }
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
            grpByExprs.size(), reduceValues, distinctColIndices,
            outputKeyColumnNames, outputValueColumnNames, true, -1, grpByExprs.size(),
            -1), new RowSchema(reduceSinkOutputRowResolver
            .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

          Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "",
          false));
      colExprMap.put(col, exprDesc);
    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
            reduceValues, outputColumnNames, true, -1, numPartitionFields,
            numReducers), new RowSchema(reduceSinkOutputRowResolver2
            .getColumnInfos()), groupByOperatorInfo),
        reduceSinkOutputRowResolver2);


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator

    if (!groupingSets.isEmpty()) {
      throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg());
    }


    // ////// 1. Generate ReduceSinkOperator
    ReduceSinkOperator reduceSinkOperatorInfo =
        genGroupByPlanReduceSinkOperator(qb,
            dest,
            input,
            grpByExprs,
            grpByExprs.size(),

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.