Examples of org.apache.hadoop.hive.ql.plan.reduceSinkDesc

org.apache.hadoop.hive.ql.plan.ReduceSinkDesc

    }


    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < op.getParentOperators().size(); pos++) {
      ReduceSinkOperator parent = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = parent.getConf();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(pos, keys);
    }


    List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
    StringBuilder keyOrder = new StringBuilder();

View Full Code Here

      throws SemanticException {


    OptimizeTezProcContext context = (OptimizeTezProcContext) procContext;


    ReduceSinkOperator sink = (ReduceSinkOperator) nd;
    ReduceSinkDesc desc = sink.getConf();


    long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
    int maxReducers = context.conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
    int constantReducers = context.conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);


    if (context.visitedReduceSinks.contains(sink)) {
      // skip walking the children
      LOG.debug("Already processed reduce sink: " + sink.getName());
      return true;
    }


    context.visitedReduceSinks.add(sink);


    if (desc.getNumReducers() <= 0) {
      if (constantReducers > 0) {
        LOG.info("Parallelism for reduce sink "+sink+" set by user to "+constantReducers);
        desc.setNumReducers(constantReducers);
      } else {
        long numberOfBytes = 0;


        // we need to add up all the estimates from the siblings of this reduce sink
        for (Operator<? extends OperatorDesc> sibling:
          sink.getChildOperators().get(0).getParentOperators()) {
          if (sibling.getStatistics() != null) {
            numberOfBytes += sibling.getStatistics().getDataSize();
          } else {
            LOG.warn("No stats available from: "+sibling);
          }
        }


        int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer,
            maxReducers, false);
        LOG.info("Set parallelism for reduce sink "+sink+" to: "+numReducers);
        desc.setNumReducers(numReducers);
      }
    } else {
      LOG.info("Number of reducers determined to be: "+desc.getNumReducers());
    }


    return false;
  }

View Full Code Here

    }
  }


  private static boolean[] getPruneReduceSinkOpRetainFlags(
      List<String> retainedParentOpOutputCols, ReduceSinkOperator reduce) {
    ReduceSinkDesc reduceConf = reduce.getConf();
    java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf
        .getValueCols();
    boolean[] flags = new boolean[originalValueEval.size()];
    for (int i = 0; i < originalValueEval.size(); i++) {
      flags[i] = false;
      List<String> current = originalValueEval.get(i).getCols();

View Full Code Here

    return flags;
  }


  private static void pruneReduceSinkOperator(boolean[] retainFlags,
      ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);


    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();


    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();


    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }


        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }


      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }


    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils
        .getFieldSchemasFromColumnList(reduceConf.getValueCols(),
        newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }

View Full Code Here

  public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator) nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      ReduceSinkDesc conf = op.getConf();


      List<String> colLists = new ArrayList<String>();
      ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
      for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
      }


      assert op.getNumChild() == 1;


      Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);


      List<String> childCols;
      if (child instanceof CommonJoinOperator) {
        childCols = cppCtx.getJoinPrunedColLists().get(child)
            .get((byte) conf.getTag());
      } else {
        childCols = cppCtx.getPrunedColList(child);


      }
      if (childCols != null) {
        /*
         * in the case of count(or sum) distinct if we are not able to map
         * a parameter column references back to the ReduceSink value columns
         * we give up and assume all columns are needed.
         */
        boolean hasUnresolvedReference = false;
        boolean[] flags = new boolean[conf.getValueCols().size()];
        Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();
        for (String childCol : childCols) {
          ExprNodeDesc desc = exprMap.get(childCol);
          int index = conf.getValueCols().indexOf(desc);
          if (index < 0) {
            hasUnresolvedReference = desc == null || ExprNodeDescUtils.indexOf(desc, conf.getKeyCols()) < 0;
            if ( hasUnresolvedReference ) {
              break;
            }
            continue;
          }
          flags[index] = true;
          colLists = Utilities.mergeUniqElems(colLists, desc.getCols());
        }
        
        if ( hasUnresolvedReference ) {
          for (ExprNodeDesc val : conf.getValueCols()) {
            colLists = Utilities.mergeUniqElems(colLists, val.getCols());
          }
          cppCtx.getPrunedColLists().put(op, colLists);
          return null;
        }
        
        Collections.sort(colLists);
        pruneReduceSinkOperator(flags, op, cppCtx);
        cppCtx.getPrunedColLists().put(op, colLists);
        return null;
      }
      
      // Reduce Sink contains the columns needed - no need to aggregate from
      // children
      ArrayList<ExprNodeDesc> vals = conf.getValueCols();
      for (ExprNodeDesc val : vals) {
        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
      }


      cppCtx.getPrunedColLists().put(op, colLists);

View Full Code Here

      JoinOperator op = (JoinOperator)nd;
      JoinDesc jd = op.getConf();


      // The input operator to the join is always a reduce sink operator
      ReduceSinkOperator inpOp = (ReduceSinkOperator)getParent(stack);
      ReduceSinkDesc rd = inpOp.getConf();
      int tag = rd.getTag();


      // Iterate over the outputs of the join operator and merge the
      // dependencies of the columns that corresponding to the tag.
      int cnt = 0;
      List<ExprNodeDesc> exprs = jd.getExprs().get((byte)tag);

View Full Code Here

      Map <List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData =
          new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>();


      // The join keys are available in the reduceSinkOperators before join
      for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) {
        ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf();


        if (rsDesc.getKeyCols() != null) {
          Table table = null;
          // Find the skew information corresponding to the table
          List<String> skewedColumns = null;
          List<List<String>> skewedValueList = null;


          // The join columns which are also skewed
          List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols =
            new ArrayList<ExprNodeDescEqualityWrapper>();


          // skewed Keys which intersect with join keys
          List<Integer> positionSkewedKeys = new ArrayList<Integer>();


          // Update the joinKeys appropriately.
          for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) {
            ExprNodeColumnDesc keyCol = null;


            // If the key column is not a column, then dont apply this optimization.
            // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445
            // for type conversion UDFs.

View Full Code Here

    // more key/partition columns than parents
    protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
        throws SemanticException {
      List<Operator<?>> parents = pJoin.getParentOperators();
      ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
      ReduceSinkDesc cRSc = cRS.getConf();
      ReduceSinkDesc pRS0c = pRSs[0].getConf();
      if (cRSc.getKeyCols().size() > pRS0c.getKeyCols().size()) {
        return false;
      }
      if (cRSc.getPartitionCols().size() > pRS0c.getPartitionCols().size()) {
        return false;
      }
      Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
      if (moveReducerNumTo == null ||
          moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
        return false;
      }


      Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
      if (moveRSOrderTo == null) {
        return false;
      }


      boolean[] sorted = CorrelationUtilities.getSortedTags(pJoin);

View Full Code Here

     * 2. for -1, configuration of parent RS is more specific than child RS
     * 3. for 1, configuration of child RS is more specific than parent RS
     */
    private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      ReduceSinkDesc cConf = cRS.getConf();
      ReduceSinkDesc pConf = pRS.getConf();
      Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
      if (moveRSOrderTo == null) {
        return null;
      }
      Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
      if (moveReducerNumTo == null ||
          moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
        return null;
      }
      List<ExprNodeDesc> ckeys = cConf.getKeyCols();
      List<ExprNodeDesc> pkeys = pConf.getKeyCols();
      Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
      if (moveKeyColTo == null) {
        return null;
      }
      List<ExprNodeDesc> cpars = cConf.getPartitionCols();
      List<ExprNodeDesc> ppars = pConf.getPartitionCols();
      Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
      if (movePartitionColTo == null) {
        return null;
      }
      Integer moveNumDistKeyTo = checkNumDistributionKey(cConf.getNumDistributionKeys(),
          pConf.getNumDistributionKeys());
      return new int[] {moveKeyColTo, movePartitionColTo, moveRSOrderTo,
          moveReducerNumTo, moveNumDistKeyTo};
    }

View Full Code Here

    Operator<? extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();


    opTaskMap.put(reducer, currTask);
    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();


    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());


    if (needsTagging(plan.getReduceWork())) {
      plan.getReduceWork().setNeedsTagging(true);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.reduceSinkDesc

org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator

org.apache.hadoop.hive.ql.optimizer.AbstractSMBJoinProc

org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory

org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory$ColumnPrunerReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory$ConstantPropagateReduceSinkProc

org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication$AbsctractReducerReducerProc

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.lineage.OpProcFactory$JoinLineage

org.apache.hadoop.hive.ql.optimizer.lineage.OpProcFactory$ReduceSinkLineage

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.