Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.RowSchema


    }

    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumns, false, joinTree.getNextTag(),
        reduceKeys.size(), numReds), new RowSchema(outputRS
        .getColumnInfos()), child), outputRS);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }
View Full Code Here


      columnNames.add(exprNode.getColumn());
    }

    // create selection operator
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(colList, columnNames, false), new RowSchema(inputRR
        .getColumnInfos()), input), inputRR);

    output.setColumnExprMap(input.getColumnExprMap());
    return output;
  }
View Full Code Here

    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
        false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()),
        inputOperatorInfo), groupByOutputRowResolver);

    op.setColumnExprMap(colExprMap);
    return op;
  }
View Full Code Here

      colList.add(new ExprNodeColumnDesc(col.getType(), col.getInternalName(),
          col.getTabAlias(), col.getIsVirtualCol()));
      columnNames.add(col.getInternalName());
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(colList, columnNames, true), new RowSchema(inputRR
        .getColumnInfos()), input), inputRR);
    output.setColumnExprMap(input.getColumnExprMap());
    return output;
  }
View Full Code Here

    }

    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1),
        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
        reduceSinkOutputRowResolver);

    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }
View Full Code Here

    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(
            reduceKeys, reduceValues,
            outputColumnNames, true, -1,
            reducePartKeys, order.toString(), -1),
            new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
            reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }
View Full Code Here

      curr = createCommonReduceSink1(qb, input);

      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
      // create a forward operator
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
          new RowSchema(currRR.getColumnInfos()), curr), currRR);

      for (String dest : ks) {
        curr = input;
        curr = genGroupByPlan1MRMultiGroupBy(dest, qb, curr);
        curr = genSelectPlan(dest, qb, curr);
        Integer limit = qbp.getDestLimit(dest);
        if (limit != null) {
          curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true);
          qb.getParseInfo().setOuterQueryLimit(limit.intValue());
        }
        curr = genFileSinkPlan(dest, qb, curr);
      }
    }
    // and if there is a single distinct, optimize that. Spray initially by the
    // distinct key,
    // no computation at the mapper. Have multiple group by operators at the
    // reducer - and then
    // proceed
    else if (optimizeMultiGroupBy) {
      curr = createCommonReduceSink(qb, input);

      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
      // create a forward operator
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
          new RowSchema(currRR.getColumnInfos()), curr), currRR);

      for (String dest : ks) {
        curr = input;
        curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
        curr = genSelectPlan(dest, qb, curr);
View Full Code Here

      }
    }

    // Create a new union operator
    Operator<? extends Serializable> unionforward = OperatorFactory
        .getAndMakeChild(new UnionDesc(), new RowSchema(unionoutRR
        .getColumnInfos()));

    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends Serializable>> child =
      new ArrayList<Operator<? extends Serializable>>();
View Full Code Here

      // Create the root of the operator tree
      TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
      setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);

      top = putOpInsertMap(OperatorFactory.get(tsDesc,
          new RowSchema(rwsch.getColumnInfos())), rwsch);

      // Add this to the list of top operators - we always start from a table
      // scan
      topOps.put(alias_id, top);

      // Add a mapping from the table scan operator to Table
      topToTable.put((TableScanOperator) top, tab);
    } else {
      rwsch = opParseCtx.get(top).getRowResolver();
      top.setChildOperators(null);
    }

    // check if this table is sampled and needs more than input pruning
    Operator<? extends Serializable> tableOp = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
      int num = ts.getNumerator();
      int den = ts.getDenominator();
      ArrayList<ASTNode> sampleExprs = ts.getExprs();

      // TODO: Do the type checking of the expressions
      List<String> tabBucketCols = tab.getBucketCols();
      int numBuckets = tab.getNumBuckets();

      // If there are no sample cols and no bucket cols then throw an error
      if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
        throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
            + tab.getTableName());
      }

      if (num > den) {
        throw new SemanticException(
            ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " "
            + tab.getTableName());
      }

      // check if a predicate is needed
      // predicate is needed if either input pruning is not enough
      // or if input pruning is not possible

      // check if the sample columns are the same as the table bucket columns
      boolean colsEqual = true;
      if ((sampleExprs.size() != tabBucketCols.size())
          && (sampleExprs.size() != 0)) {
        colsEqual = false;
      }

      for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
        boolean colFound = false;
        for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
          if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
            break;
          }

          if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
              .equalsIgnoreCase(tabBucketCols.get(j))) {
            colFound = true;
          }
        }
        colsEqual = (colsEqual && colFound);
      }

      // Check if input can be pruned
      ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));

      // check if input pruning is enough
      if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
          && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {

        // input pruning is enough; add the filter for the optimizer to use it
        // later
        LOG.info("No need for sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
            .getDenominator(), tabBucketCols, true)),
            new RowSchema(rwsch.getColumnInfos()), top);
      } else {
        // need to add filter
        // create tableOp to be filterDesc and set as child to 'top'
        LOG.info("Need sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true),
            new RowSchema(rwsch.getColumnInfos()), top);
      }
    } else {
      boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
      if (testMode) {
        String tabName = tab.getTableName();

        // has the user explicitly asked not to sample this table
        String unSampleTblList = conf
            .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
        String[] unSampleTbls = unSampleTblList.split(",");
        boolean unsample = false;
        for (String unSampleTbl : unSampleTbls) {
          if (tabName.equalsIgnoreCase(unSampleTbl)) {
            unsample = true;
          }
        }

        if (!unsample) {
          int numBuckets = tab.getNumBuckets();

          // If the input table is bucketed, choose the first bucket
          if (numBuckets > 0) {
            TableSample tsSample = new TableSample(1, numBuckets);
            tsSample.setInputPruning(true);
            qb.getParseInfo().setTabSample(alias, tsSample);
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
            tableOp = OperatorFactory
                .getAndMakeChild(new FilterDesc(samplePred, true,
                new sampleDesc(tsSample.getNumerator(), tsSample
                .getDenominator(), tab.getBucketCols(), true)),
                new RowSchema(rwsch.getColumnInfos()), top);
            LOG.info("No need for sample filter");
          } else {
            // The table is not bucketed, add a dummy filter :: rand()
            int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
            TableSample tsSample = new TableSample(1, freq);
            tsSample.setInputPruning(false);
            qb.getParseInfo().setTabSample(alias, tsSample);
            LOG.info("Need sample filter");
            ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor
                .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer
                .valueOf(460476415)));
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false,
                alias, rwsch, qb.getMetaData(), randFunc);
            tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
                samplePred, true),
                new RowSchema(rwsch.getColumnInfos()), top);
          }
        }
      }
    }
View Full Code Here

            String[] tabCol = source.reverseLookup(col.getInternalName());
            lvForwardRR.put(tabCol[0], tabCol[1], col);
          }

          Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(
              new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()),
              op), lvForwardRR);

          // The order in which the two paths are added is important. The
          // lateral view join operator depends on having the select operator
          // give it the row first.

          // Get the all path by making a select(*).
          RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
          //Operator allPath = op;
          Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(
                            new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()),
                            lvForward), allPathRR);
          // Get the UDTF Path
          QB blankQb = new QB(null, null, false);
          Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
              .getChild(0), blankQb, lvForward);
          // add udtf aliases to QB
          for (String udtfAlias : blankQb.getAliases()) {
            qb.addAlias(udtfAlias);
          }
          RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();

          // Merge the two into the lateral view join
          // The cols of the merged result will be the combination of both the
          // cols of the UDTF path and the cols of the all path. The internal
          // names have to be changed to avoid conflicts

          RowResolver lateralViewRR = new RowResolver();
          ArrayList<String> outputInternalColNames = new ArrayList<String>();

          LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames);
          LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames);

          // For PPD, we need a column to expression map so that during the walk,
          // the processor knows how to transform the internal col names.
          // Following steps are dependant on the fact that we called
          // LVmerge.. in the above order
          Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();

          int i=0;
          for (ColumnInfo c : allPathRR.getColumnInfos()) {
            String internalName = getColumnInternalName(i);
            i++;
            colExprMap.put(internalName,
                new ExprNodeColumnDesc(c.getType(), c.getInternalName(),
                    c.getTabAlias(), c.getIsVirtualCol()));
          }

          Operator lateralViewJoin = putOpInsertMap(OperatorFactory
              .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames),
                  new RowSchema(lateralViewRR.getColumnInfos()), allPath,
                  udtfPath), lateralViewRR);
          lateralViewJoin.setColumnExprMap(colExprMap);
          op = lateralViewJoin;
        }
        e.setValue(op);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.RowSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.