Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.GroupByDesc


      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }
View Full Code Here


          field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        distPartAgg,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver
        .getColumnInfos()), reduceSinkOperatorInfo),
        groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }
View Full Code Here

          field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver2.getColumnInfos()),
        reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
  }
View Full Code Here

    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);

    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
View Full Code Here

        int depth) throws SemanticException {
      HiveConf hiveConf = ctx.getConf();
      GroupByOptimizerSortMatch match = checkSortGroupBy(stack, groupByOp);
      boolean useMapperSort =
          HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT);
      GroupByDesc groupByOpDesc = groupByOp.getConf();

      boolean removeReduceSink = false;
      boolean optimizeDistincts = false;
      boolean setBucketGroup = false;

      // Dont remove the operator for distincts
      if (useMapperSort &&
          (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
        if (!groupByOpDesc.isDistinct()) {
          removeReduceSink = true;
        }
        else if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
          // Optimize the query: select count(distinct keys) from T, where
          // T is bucketized and sorted by T
          // Partial aggregation can be done by the mappers in this scenario

          List<ExprNodeDesc> keys =
              ((GroupByOperator)
              (groupByOp.getChildOperators().get(0).getChildOperators().get(0)))
                  .getConf().getKeys();
          if ((keys == null) || (keys.isEmpty())) {
            optimizeDistincts = true;
          }
        }
      }

      if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
          (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
        setBucketGroup = true;
      }

      if (removeReduceSink) {
        convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
      }
      else if (optimizeDistincts) {
        // In test mode, dont change the query plan. However, setup a query property
        pGraphContext.getQueryProperties().setHasMapGroupBy(true);
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
          return;
        }
        ReduceSinkOperator reduceSinkOp =
            (ReduceSinkOperator)groupByOp.getChildOperators().get(0);
        GroupByDesc childGroupByDesc =
            ((GroupByOperator)
            (reduceSinkOp.getChildOperators().get(0))).getConf();

        for (int pos = 0; pos < childGroupByDesc.getAggregators().size(); pos++) {
          AggregationDesc aggr = childGroupByDesc.getAggregators().get(pos);
          // Partial aggregation is not done for distincts on the mapper
          // However, if the data is bucketed/sorted on the distinct key, partial aggregation
          // can be performed on the mapper.
          if (aggr.getDistinct()) {
            ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
            ExprNodeDesc param = aggr.getParameters().get(0);
            assert param instanceof ExprNodeColumnDesc;
            ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param;
            paramC.setIsPartitionColOrVirtualCol(false);
            paramC.setColumn("VALUE._col" + pos);
            parameters.add(paramC);
            aggr.setParameters(parameters);
            aggr.setDistinct(false);
            aggr.setMode(Mode.FINAL);
          }
        }
        // Partial aggregation is performed on the mapper, no distinct processing at the reducer
        childGroupByDesc.setDistinct(false);
        groupByOpDesc.setDontResetAggrsDistinct(true);
        groupByOpDesc.setBucketGroup(true);
        groupByOp.setUseBucketizedHiveInputFormat(true);
        // no distinct processing at the reducer
        // A query like 'select count(distinct key) from T' is transformed into
View Full Code Here

    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf
        .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);

    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
            false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
        new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

    // this is the final group by operator, and multiple rows corresponding to the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
            distPartAgg, groupByMemoryUsage, memoryThreshold,
            groupingSets,
            groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
            groupingSetsPosition, containsDistinctAggr),
        new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
View Full Code Here

    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf
        .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
            false, groupByMemoryUsage, memoryThreshold,
            groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr),
        new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.GroupByDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.