Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.GroupByOperator


              CorrelationUtilities.findSiblingOperators(rsop);
          for (Operator<? extends OperatorDesc> op: siblingOPs) {
            if (op instanceof DemuxOperator) {
              parentsOfMux.add(op);
            } else if (op instanceof ReduceSinkOperator){
              GroupByOperator pGBYm =
                  CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
              if (pGBYm != null) {
                // We get a semi join at here.
                // This map-side GroupByOperator needs to be removed
                CorrelationUtilities.removeOperator(
View Full Code Here


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      // GBY,RS,GBY,RS,GBY... (top to bottom)
      GroupByOperator groupByOp = (GroupByOperator) stack.get(stack.size() - 5);
      GroupByOptimizerContext ctx = (GroupByOptimizerContext) procCtx;

      if (!checkGroupByOperatorProcessed(ctx, groupByOp)) {
        processGroupBy(ctx, stack, groupByOp, 4);
      }
View Full Code Here

      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

        return nd;
      }

      for (Node op : stack) {
        if (op instanceof GroupByOperator) {
          GroupByOperator gby = (GroupByOperator) op;
          if (!gby.getConf().isDistinctLike()) {
            // GroupBy not distinct like, disabling
            walkerCtx.convertNotMetadataOnly();
            return nd;
          }
        }
View Full Code Here

            false);

    // ////// 2. Generate GroupbyOperator
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
        new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator(
        parseInfo, dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
        genericUDAFEvaluators);

    int numReducers = -1;
    if (grpByExprs.isEmpty()) {
View Full Code Here

    // does it need an additional MR job
    boolean groupingSetsNeedAdditionalMRJob =
        groupingSetsPresent && groupingSets.size() > newMRJobGroupingSetsThreshold ?
            true : false;

    GroupByOperator groupByOperatorInfo =
        (GroupByOperator) genGroupByPlanMapGroupByOperator(
            qb,
            dest,
            grpByExprs,
            inputOperatorInfo,
View Full Code Here

    }

    // ////// Generate GroupbyOperator for a map-side partial aggregation
    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
        new LinkedHashMap<String, GenericUDAFEvaluator>();
    GroupByOperator groupByOperatorInfo =
        (GroupByOperator) genGroupByPlanMapGroupByOperator(
            qb, dest, grpByExprs, inputOperatorInfo, GroupByDesc.Mode.HASH,
            genericUDAFEvaluators, groupingSets, groupingSetsPresent);

    groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(
View Full Code Here

    }

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) {
      GroupByOperator op = (GroupByOperator)nd;
      TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx;

      // Get the key column names, and check if the keys are all constants
      // or columns (not expressions). If yes, proceed.
      List<String> keyColNames =
          TableAccessAnalyzer.getKeyColNames(op.getConf().getKeys());

      if (keyColNames == null) {
        // we are done, since there are no keys to check for
        return null;
      }

      // Walk the operator tree to the TableScan and build the mapping
      // along the way for the columns that the group by uses as keys
      TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
          op.getParentOperators().get(0), keyColNames);

      if (tso == null) {
        // Could not find an allowed path to a table scan operator,
        // hence we are done
        return null;
View Full Code Here

    }

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      FileSinkOperator FS = (FileSinkOperator) nd;
      GroupByOperator cGBY = (GroupByOperator) stack.get(stack.size() - 3);
      ReduceSinkOperator RS = (ReduceSinkOperator) stack.get(stack.size() - 4);
      if (RS.getConf().getNumReducers() != 1 || !RS.getConf().getKeyCols().isEmpty()) {
        return null;
      }
      GroupByOperator pGBY = (GroupByOperator) stack.get(stack.size() - 5);

      Path fileName = FS.getConf().getFinalDirName();
      TableDesc tsDesc = createIntermediateFS(pGBY, fileName);

      for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) {
View Full Code Here

            return null;
          }
        }
        // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS
        // we need not to do any instanceof checks for following.
        GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0);
        ReduceSinkOperator rsOp = (ReduceSinkOperator)gbyOp.getChildren().get(0);
        if (rsOp.getConf().getDistinctColumnIndices().size() > 0) {
          // we can't handle distinct
          return null;
        }

        selOp = (SelectOperator)rsOp.getChildOperators().get(0).getChildOperators().get(0);
        List<AggregationDesc> aggrs = gbyOp.getConf().getAggregators();

        if (!(selOp.getConf().getColList().size() == aggrs.size())) {
          // all select columns must be aggregations
          return null;

        }
        FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0));
        if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) {
          // looks like a subq plan.
          return null;
        }

        Table tbl = pctx.getTopToTable().get(tsOp);
        List<Object> oneRow = new ArrayList<Object>();
        List<ObjectInspector> ois = new ArrayList<ObjectInspector>();

        Hive hive = Hive.get(pctx.getConf());

        for (AggregationDesc aggr : aggrs) {
          if (aggr.getDistinct()) {
            // our stats for NDV is approx, not accurate.
            return null;
          }
          if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation(
              Description.class).name())) {
            if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){
              return null;
            }
            Long rowCnt = getRowCnt(pctx, tsOp, tbl);
            if(rowCnt == null) {
              return null;
            }
            oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0))
                .getValue().toString()).multiply(HiveDecimal.create(rowCnt)));
            ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
                PrimitiveCategory.DECIMAL));
          }
          else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
              Description.class).name())) {
            Long rowCnt = 0L;
            if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
                ExprNodeConstantDesc)) {
              // Its either count (*) or count(1) case
              rowCnt = getRowCnt(pctx, tsOp, tbl);
              if(rowCnt == null) {
                return null;
              }
            } else {
              // Its count(col) case
              if (!(aggr.getParameters().get(0) instanceof ExprNodeColumnDesc)) {
                // this is weird, we got expr or something in there, bail out
                Log.debug("Unexpected expression : " + aggr.getParameters().get(0));
                return null;
              }
              ExprNodeColumnDesc desc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
              String colName = desc.getColumn();
              StatType type = getType(desc.getTypeString());
              if(!tbl.isPartitioned()) {
                if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                  Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                  return null;
                }
                rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
                if (rowCnt < 1) {
                  Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
                  return null;
                }
                List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                    tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
                if (stats.isEmpty()) {
                  Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                  return null;
                }
                Long nullCnt = getNullcountFor(type, stats.get(0).getStatsData());
                if (null == nullCnt) {
                  Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
                } else {
                  rowCnt -= nullCnt;
                }
              } else {
                Set<Partition> parts = pctx.getPrunedPartitions(
                    tsOp.getConf().getAlias(), tsOp).getPartitions();
                for (Partition part : parts) {
                  if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
                    Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
                    return null;
                  }
                  Long partRowCnt = Long.parseLong(part.getParameters()
                      .get(StatsSetupConst.ROW_COUNT));
                  if (partRowCnt < 1) {
                    Log.debug("Partition doesn't have upto date stats " + part.getSpec());
                    return null;
                  }
                  rowCnt += partRowCnt;
                }
                Collection<List<ColumnStatisticsObj>> result =
                    verifyAndGetPartStats(hive, tbl, colName, parts);
                if (result == null) {
                  return null; // logging inside
                }
                for (List<ColumnStatisticsObj> statObj : result) {
                  ColumnStatisticsData statData = validateSingleColStat(statObj);
                  if (statData == null) return null;
                  Long nullCnt = getNullcountFor(type, statData);
                  if (nullCnt == null) {
                    Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
                        "metadata optimizer for column : " + colName);
                    return null;
                  } else {
                    rowCnt -= nullCnt;
                  }
                }
              }
            }
            oneRow.add(rowCnt);
            ois.add(PrimitiveObjectInspectorFactory.
                getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
          } else if (aggr.getGenericUDAFName().equals(GenericUDAFMax.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if(!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
              if (stats.isEmpty()) {
                Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
                return null;
              }
              ColumnStatisticsData statData = stats.get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getHighValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default:
                  // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(
                  tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch (type) {
                case Integeral: {
                  long maxVal = Long.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    long curVal = statData.getLongStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  double maxVal = Double.MIN_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    double curVal = statData.getDoubleStats().getHighValue();
                    maxVal = Math.max(maxVal, curVal);
                  }
                  oneRow.add(maxVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                }
                default:
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            }
          }  else if (aggr.getGenericUDAFName().equals(GenericUDAFMin.class.getAnnotation(
              Description.class).name())) {
            ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)aggr.getParameters().get(0);
            String colName = colDesc.getColumn();
            StatType type = getType(colDesc.getTypeString());
            if (!tbl.isPartitioned()) {
              if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
                Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
                return null;
              }
              ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
                  tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName))
                  .get(0).getStatsData();
              switch (type) {
                case Integeral:
                  oneRow.add(statData.getLongStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                case Double:
                  oneRow.add(statData.getDoubleStats().getLowValue());
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                default: // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;
              }
            } else {
              Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
              switch(type) {
                case Integeral: {
                  long minVal = Long.MAX_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    long curVal = statData.getLongStats().getLowValue();
                    minVal = Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
                  break;
                }
                case Double: {
                  double minVal = Double.MAX_VALUE;
                  Collection<List<ColumnStatisticsObj>> result =
                      verifyAndGetPartStats(hive, tbl, colName, parts);
                  if (result == null) {
                    return null; // logging inside
                  }
                  for (List<ColumnStatisticsObj> statObj : result) {
                    ColumnStatisticsData statData = validateSingleColStat(statObj);
                    if (statData == null) return null;
                    double curVal = statData.getDoubleStats().getLowValue();
                    minVal = Math.min(minVal, curVal);
                  }
                  oneRow.add(minVal);
                  ois.add(PrimitiveObjectInspectorFactory.
                      getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
                  break;
                }
                default: // unsupported type
                  Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                      "metadata optimizer for column : " + colName);
                  return null;

              }
            }
          } else { // Unsupported aggregation.
            Log.debug("Unsupported aggregation for metadata optimizer: "
                + aggr.getGenericUDAFName());
            return null;
          }
        }


        List<List<Object>> allRows = new ArrayList<List<Object>>();
        allRows.add(oneRow);

        List<String> colNames = new ArrayList<String>();
        for (ColumnInfo colInfo: gbyOp.getSchema().getSignature()) {
          colNames.add(colInfo.getInternalName());
        }
        StandardStructObjectInspector sOI = ObjectInspectorFactory.
            getStandardStructObjectInspector(colNames, ois);
        FetchWork fWork = new FetchWork(allRows, sOI);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.GroupByOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.