Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.GroupByDesc


    // Nothing special needs to be done for grouping sets.
    // This is the final group by operator, so multiple rows corresponding to the
    // grouping sets have been generated upstream.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
      distPartAgg,groupByMemoryUsage,memoryThreshold, null, false, 0),
      new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
      groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here


      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
      false,groupByMemoryUsage,memoryThreshold,
      groupingSetKeys, groupingSetsPresent, groupingSetsPosition),
      new RowSchema(groupByOutputRowResolver.getColumnInfos()),
      inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
View Full Code Here

    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);

    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
      false,groupByMemoryUsage,memoryThreshold, null, false, 0),
      new RowSchema(groupByOutputRowResolver2.getColumnInfos()),
      reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
      false,groupByMemoryUsage,memoryThreshold, null, false, 0),
      new RowSchema(groupByOutputRowResolver.getColumnInfos()),
      inputOperatorInfo), groupByOutputRowResolver);

    op.setColumnExprMap(colExprMap);
View Full Code Here

       //Also, we do not rewrite for cases when same query branch has multiple group-by constructs
       if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) &&
           !canApplyCtx.isQueryHasGroupBy()){

         canApplyCtx.setQueryHasGroupBy(true);
         GroupByDesc conf = (GroupByDesc) operator.getConf();
         List<AggregationDesc> aggrList = conf.getAggregators();
         if(aggrList != null && aggrList.size() > 0){
             for (AggregationDesc aggregationDesc : aggrList) {
               canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1);
               //In the current implementation, we do not support more than 1 agg funcs in group-by
               if(canApplyCtx.getAggFuncCnt() > 1) {
                 return false;
               }
               String aggFunc = aggregationDesc.getGenericUDAFName();
               if(!("count".equals(aggFunc))){
                 canApplyCtx.setAggFuncIsNotCount(true);
               }else{
                List<ExprNodeDesc> para = aggregationDesc.getParameters();
                //for a valid aggregation, it needs to have non-null parameter list
                 if(para == null){
                   canApplyCtx.setAggFuncColsFetchException(true);
                 }else if(para.size() == 0){
                   //count(*) case
                   canApplyCtx.setCountOnAllCols(true);
                   canApplyCtx.setAggFunction("_count_of_all");
                 }else{
                   assert para.size()==1;
                   for(int i=0; i< para.size(); i++){
                     ExprNodeDesc expr = para.get(i);
                     if(expr instanceof ExprNodeColumnDesc){
                       //Add the columns to RewriteCanApplyCtx's selectColumnsList list
                       //to check later if index keys contain all select clause columns
                       //and vice-a-versa. We get the select column 'actual' names only here
                       //if we have a agg func along with group-by
                       //SelectOperator has internal names in its colList data structure
                       canApplyCtx.getSelectColumnsList().add(
                           ((ExprNodeColumnDesc) expr).getColumn());
                       //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
                       //if columns contained in agg func are index key columns
                       canApplyCtx.getAggFuncColList().add(
                           ((ExprNodeColumnDesc) expr).getColumn());
                       canApplyCtx.setAggFunction("_count_of_" +
                           ((ExprNodeColumnDesc) expr).getColumn() + "");
                     }else if(expr instanceof ExprNodeConstantDesc){
                       //count(1) case
                       canApplyCtx.setCountOfOne(true);
                       canApplyCtx.setAggFunction("_count_of_1");
                     }
                   }
                 }
               }
             }
         }

         //we need to have non-null group-by keys for a valid group-by operator
         List<ExprNodeDesc> keyList = conf.getKeys();
         if(keyList == null || keyList.size() == 0){
           canApplyCtx.setGbyKeysFetchException(true);
         }
         for (ExprNodeDesc expr : keyList) {
           checkExpression(expr);
View Full Code Here

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      GroupByOperator op = (GroupByOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      List<String> colLists = new ArrayList<String>();
      GroupByDesc conf = op.getConf();
      ArrayList<ExprNodeDesc> keys = conf.getKeys();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }

      ArrayList<AggregationDesc> aggrs = conf.getAggregators();
      for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
          colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
View Full Code Here

      if (tblNames == null || tblNames.size() == 0) {
        return;
      }

      boolean bucketGroupBy = true;
      GroupByDesc desc = curr.getConf();
      List<ExprNodeDesc> groupByKeys = new LinkedList<ExprNodeDesc>();
      groupByKeys.addAll(desc.getKeys());
      // compute groupby columns from groupby keys
      List<String> groupByCols = new ArrayList<String>();
      while (groupByKeys.size() > 0) {
        ExprNodeDesc node = groupByKeys.remove(0);
        if (node instanceof ExprNodeColumnDesc) {
View Full Code Here

            selReplacementCommand);

        //we get our new GroupByOperator here
        Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables();
        GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
        GroupByDesc oldConf = operator.getConf();

        //we need this information to set the correct colList, outputColumnNames in SelectOperator
        ExprNodeColumnDesc aggrExprNode = null;

        //Construct the new AggregationDesc to get rid of the current
        //internal names and replace them with new internal names
        //as required by the operator tree
        GroupByDesc newConf = newGbyOperator.getConf();
        List<AggregationDesc> newAggrList = newConf.getAggregators();
        if(newAggrList != null && newAggrList.size() > 0){
          for (AggregationDesc aggregationDesc : newAggrList) {
            rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
            aggrExprNode = (ExprNodeColumnDesc)aggregationDesc.getParameters().get(0);
            rewriteQueryCtx.setAggrExprNode(aggrExprNode);
          }
        }

        //Now the GroupByOperator has the new AggregationList; sum(`_count_of_indexed_key`)
        //instead of count(indexed_key)
        OpParseContext gbyOPC = rewriteQueryCtx.getOpc().get(operator);
        RowResolver gbyRR = newDAGContext.getOpParseCtx().get(newGbyOperator).getRowResolver();
        gbyOPC.setRowResolver(gbyRR);
        rewriteQueryCtx.getOpc().put(operator, gbyOPC);

        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
        operator.setConf(oldConf);


      }else{
        //we just need to reset the GenericUDAFEvaluator and its name for this
        //GroupByOperator whose parent is the ReduceSinkOperator
        GroupByDesc childConf = (GroupByDesc) operator.getConf();
        List<AggregationDesc> childAggrList = childConf.getAggregators();
        if(childAggrList != null && childAggrList.size() > 0){
          for (AggregationDesc aggregationDesc : childAggrList) {
            List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
            List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
            for (ExprNodeDesc expr : paraList) {
View Full Code Here

      }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
  }
View Full Code Here

          field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        distPartAgg,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver
        .getColumnInfos()), reduceSinkOperatorInfo),
        groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.GroupByDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.