Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.GroupByOperator


    QBParseInfo parseInfo = qb.getParseInfo();

    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);

    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
View Full Code Here


    QBParseInfo parseInfo = qb.getParseInfo();

    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);

    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
View Full Code Here

        return nd;
      }

      for (Node op : stack) {
        if (op instanceof GroupByOperator) {
          GroupByOperator gby = (GroupByOperator) op;
          if (!gby.getConf().isDistinctLike()) {
            // GroupBy not distinct like, disabling
            walkerCtx.convertNotMetadataOnly();
            return nd;
          }
        }
View Full Code Here

        -1 : Integer.MAX_VALUE), -1, false);

    // ////// 2. Generate GroupbyOperator
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator(
        parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
        genericUDAFEvaluators);

    int numReducers = -1;
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
View Full Code Here

    QBParseInfo parseInfo = qb.getParseInfo();

    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);

    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
View Full Code Here

    QBParseInfo parseInfo = qb.getParseInfo();

    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
      new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(
        qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH,
        genericUDAFEvaluators);

    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
        inputOperatorInfo).getRowResolver().getTableNames());
View Full Code Here

  public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator gop = (GroupByOperator) nd;
      Operator<? extends OperatorDesc> parent = gop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      HiveConf conf = aspCtx.getConf();
      int mapSideParallelism =
          HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM);
      List<AggregationDesc> aggDesc = gop.getConf().getAggregators();
      Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap();
      RowSchema rs = gop.getSchema();
      Statistics stats = null;

      try {
        if (satisfyPrecondition(parentStats)) {
          stats = parentStats.clone();

          List<ColStatistics> colStats =
              StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
          stats.setColumnStats(colStats);
          long dvProd = 1;
          long newNumRows = 0;

          // compute product of distinct values of grouping columns
          for (ColStatistics cs : colStats) {
            if (cs != null) {
              long dv = cs.getCountDistint();
              if (cs.getNumNulls() > 0) {
                dv += 1;
              }
              dvProd *= dv;
            } else {

              // partial column statistics on grouping attributes case.
              // if column statistics on grouping attribute is missing, then
              // assume worst case.
              // GBY rule will emit half the number of rows if dvProd is 0
              dvProd = 0;
              break;
            }
          }

          // map side
          if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) {

            // since we do not know if hash-aggregation will be enabled or disabled
            // at runtime we will assume that map-side group by does not do any
            // reduction.hence no group by rule will be applied

            // map-side grouping set present. if grouping set is present then
            // multiply the number of rows by number of elements in grouping set
            if (gop.getConf().isGroupingSetsPresent()) {
              int multiplier = gop.getConf().getListGroupingSets().size();

              // take into account the map-side parallelism as well, default is 1
              multiplier *= mapSideParallelism;
              newNumRows = multiplier * stats.getNumRows();
              long dataSize = multiplier * stats.getDataSize();
              stats.setNumRows(newNumRows);
              stats.setDataSize(dataSize);
              for (ColStatistics cs : colStats) {
                if (cs != null) {
                  long oldNumNulls = cs.getNumNulls();
                  long newNumNulls = multiplier * oldNumNulls;
                  cs.setNumNulls(newNumNulls);
                }
              }
            } else {

              // map side no grouping set
              newNumRows = stats.getNumRows() * mapSideParallelism;
              updateStats(stats, newNumRows, true);
            }
          } else {

            // reduce side
            newNumRows = applyGBYRule(stats.getNumRows(), dvProd);
            updateStats(stats, newNumRows, true);
          }
        } else {
          if (parentStats != null) {

            // worst case, in the absence of column statistics assume half the rows are emitted
            if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) {

              // map side
              stats = parentStats.clone();
            } else {

              // reduce side
              stats = parentStats.clone();
              long newNumRows = parentStats.getNumRows() / 2;
              updateStats(stats, newNumRows, false);
            }
          }
        }

        // if UDAFs are present, new columns needs to be added
        if (!aggDesc.isEmpty() && stats != null) {
          List<ColStatistics> aggColStats = Lists.newArrayList();
          for (ColumnInfo ci : rs.getSignature()) {

            // if the columns in row schema is not contained in column
            // expression map, then those are the aggregate columns that
            // are added GBY operator. we will estimate the column statistics
            // for those newly added columns
            if (!colExprMap.containsKey(ci.getInternalName())) {
              String colName = ci.getInternalName();
              colName = StatsUtils.stripPrefixFromColumnName(colName);
              String tabAlias = ci.getTabAlias();
              String colType = ci.getTypeName();
              ColStatistics cs = new ColStatistics(tabAlias, colName, colType);
              cs.setCountDistint(stats.getNumRows());
              cs.setNumNulls(0);
              cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType));
              aggColStats.add(cs);
            }
          }
          stats.addToColumnStats(aggColStats);

          // if UDAF present and if column expression map is empty then it must
          // be full aggregation query like count(*) in which case number of
          // rows will be 1
          if (colExprMap.isEmpty()) {
            stats.setNumRows(1);
            updateStats(stats, 1, true);
          }
        }

        gop.setStatistics(stats);

        if (LOG.isDebugEnabled() && stats != null) {
          LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString());
        }
      } catch (CloneNotSupportedException e) {
        throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
      }
      return null;
View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      // GBY,RS,GBY... (top to bottom)
      GroupByOperator groupByOp = (GroupByOperator) stack.get(stack.size() - 3);

      GroupByOptimizerContext ctx = (GroupByOptimizerContext) procCtx;

      if (!checkGroupByOperatorProcessed(ctx, groupByOp)) {
        processGroupBy(ctx, stack, groupByOp, 2);
View Full Code Here

    }
    return null;
  }

  protected static boolean hasGroupingSet(ReduceSinkOperator cRS) throws SemanticException {
    GroupByOperator cGBYm = getSingleParent(cRS, GroupByOperator.class);
    if (cGBYm != null && cGBYm.getConf().isGroupingSetsPresent()) {
      return true;
    }
    return false;
  }
View Full Code Here

    Operator<?> parent = getSingleParent(cRS);

    if (parent instanceof GroupByOperator) {
      // pRS-cGBYm-cRS-cGBYr (map aggregation) --> pRS-cGBYr(COMPLETE)
      // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
      GroupByOperator cGBYm = (GroupByOperator) parent;

      cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
              .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
      cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
      for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
        aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
      }
      cGBYr.setColumnExprMap(cGBYm.getColumnExprMap());
      cGBYr.setSchema(cGBYm.getSchema());
      RowResolver resolver = context.getOpParseCtx().get(cGBYm).getRowResolver();
      context.getOpParseCtx().get(cGBYr).setRowResolver(resolver);
    } else {
      // pRS-cRS-cGBYr (no map aggregation) --> pRS-cGBYr(COMPLETE)
      // revert expressions of cGBYr to that of cRS
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.GroupByOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.