Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.RowSchema


      curr = createCommonReduceSink(qb, input);

      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
      // create a forward operator
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
          new RowSchema(currRR.getColumnInfos()), curr), currRR);

      for (String dest : ks) {
        curr = input;
        curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
        curr = genSelectPlan(dest, qb, curr);
View Full Code Here


      }
    }

    // Create a new union operator
    Operator<? extends OperatorDesc> unionforward = OperatorFactory
        .getAndMakeChild(new UnionDesc(), new RowSchema(unionoutRR
            .getColumnInfos()));

    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends OperatorDesc>> child =
        new ArrayList<Operator<? extends OperatorDesc>>();
View Full Code Here

      colName.add(name);
      columnExprMap.put(name, columns.get(i));
    }

    Operator<SelectDesc> newInputOp = OperatorFactory.getAndMakeChild(
        new SelectDesc(columns, colName), new RowSchema(rowResolver.getColumnInfos()),
        columnExprMap, origInputOp);
    return putOpInsertMap(newInputOp, rowResolver);
  }
View Full Code Here

        tsDesc.setRowLimit(sample.getRowCount());
        nameToSplitSample.remove(alias_id);
      }

      top = putOpInsertMap(OperatorFactory.get(tsDesc,
          new RowSchema(rwsch.getColumnInfos())), rwsch);

      // Add this to the list of top operators - we always start from a table
      // scan
      topOps.put(alias_id, top);

      // Add a mapping from the table scan operator to Table
      topToTable.put((TableScanOperator) top, tab);
      Map<String, String> props = qb.getTabPropsForAlias(alias);
      if (props != null) {
        topToTableProps.put((TableScanOperator) top, props);
      }
    } else {
      rwsch = opParseCtx.get(top).getRowResolver();
      top.setChildOperators(null);
    }

    // check if this table is sampled and needs more than input pruning
    Operator<? extends OperatorDesc> tableOp = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
      int num = ts.getNumerator();
      int den = ts.getDenominator();
      ArrayList<ASTNode> sampleExprs = ts.getExprs();

      // TODO: Do the type checking of the expressions
      List<String> tabBucketCols = tab.getBucketCols();
      int numBuckets = tab.getNumBuckets();

      // If there are no sample cols and no bucket cols then throw an error
      if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
        throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
            + tab.getTableName());
      }

      if (num > den) {
        throw new SemanticException(
            ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " "
                + tab.getTableName());
      }

      // check if a predicate is needed
      // predicate is needed if either input pruning is not enough
      // or if input pruning is not possible

      // check if the sample columns are the same as the table bucket columns
      boolean colsEqual = true;
      if ((sampleExprs.size() != tabBucketCols.size())
          && (sampleExprs.size() != 0)) {
        colsEqual = false;
      }

      for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
        boolean colFound = false;
        for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
          if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
            break;
          }

          if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
              .equalsIgnoreCase(tabBucketCols.get(j))) {
            colFound = true;
          }
        }
        colsEqual = (colsEqual && colFound);
      }

      // Check if input can be pruned
      ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));

      // check if input pruning is enough
      if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
          && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {

        // input pruning is enough; add the filter for the optimizer to use it
        // later
        LOG.info("No need for sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
                .getDenominator(), tabBucketCols, true)),
            new RowSchema(rwsch.getColumnInfos()), top);
      } else {
        // need to add filter
        // create tableOp to be filterDesc and set as child to 'top'
        LOG.info("Need sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true),
            new RowSchema(rwsch.getColumnInfos()), top);
      }
    } else {
      boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
      if (testMode) {
        String tabName = tab.getTableName();

        // has the user explicitly asked not to sample this table
        String unSampleTblList = conf
            .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
        String[] unSampleTbls = unSampleTblList.split(",");
        boolean unsample = false;
        for (String unSampleTbl : unSampleTbls) {
          if (tabName.equalsIgnoreCase(unSampleTbl)) {
            unsample = true;
          }
        }

        if (!unsample) {
          int numBuckets = tab.getNumBuckets();

          // If the input table is bucketed, choose the first bucket
          if (numBuckets > 0) {
            TableSample tsSample = new TableSample(1, numBuckets);
            tsSample.setInputPruning(true);
            qb.getParseInfo().setTabSample(alias, tsSample);
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
            tableOp = OperatorFactory
                .getAndMakeChild(new FilterDesc(samplePred, true,
                    new sampleDesc(tsSample.getNumerator(), tsSample
                        .getDenominator(), tab.getBucketCols(), true)),
                    new RowSchema(rwsch.getColumnInfos()), top);
            LOG.info("No need for sample filter");
          } else {
            // The table is not bucketed, add a dummy filter :: rand()
            int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
            TableSample tsSample = new TableSample(1, freq);
            tsSample.setInputPruning(false);
            qb.getParseInfo().setTabSample(alias, tsSample);
            LOG.info("Need sample filter");
            ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor
                .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer
                    .valueOf(460476415)));
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false,
                alias, rwsch, qb.getMetaData(), randFunc);
            tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
                samplePred, true),
                new RowSchema(rwsch.getColumnInfos()), top);
          }
        }
      }
    }
View Full Code Here

      String[] tabCol = source.reverseLookup(col.getInternalName());
      lvForwardRR.put(tabCol[0], tabCol[1], col);
    }

    Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()),
        op), lvForwardRR);

    // The order in which the two paths are added is important. The
    // lateral view join operator depends on having the select operator
    // give it the row first.

    // Get the all path by making a select(*).
    RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
    // Operator allPath = op;
    Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()),
        lvForward), allPathRR);
    // Get the UDTF Path
    QB blankQb = new QB(null, null, false);
    Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
        .getChild(0), blankQb, lvForward,
        lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
    // add udtf aliases to QB
    for (String udtfAlias : blankQb.getAliases()) {
      qb.addAlias(udtfAlias);
    }
    RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();

    // Merge the two into the lateral view join
    // The cols of the merged result will be the combination of both the
    // cols of the UDTF path and the cols of the all path. The internal
    // names have to be changed to avoid conflicts

    RowResolver lateralViewRR = new RowResolver();
    ArrayList<String> outputInternalColNames = new ArrayList<String>();

    LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames);
    LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames);

    // For PPD, we need a column to expression map so that during the walk,
    // the processor knows how to transform the internal col names.
    // Following steps are dependant on the fact that we called
    // LVmerge.. in the above order
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();

    int i = 0;
    for (ColumnInfo c : allPathRR.getColumnInfos()) {
      String internalName = getColumnInternalName(i);
      i++;
      colExprMap.put(internalName,
          new ExprNodeColumnDesc(c.getType(), c.getInternalName(),
              c.getTabAlias(), c.getIsVirtualCol()));
    }

    Operator lateralViewJoin = putOpInsertMap(OperatorFactory
        .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames),
            new RowSchema(lateralViewRR.getColumnInfos()), allPath,
            udtfPath), lateralViewRR);
    lateralViewJoin.setColumnExprMap(colExprMap);
    return lateralViewJoin;
  }
View Full Code Here

      {
        RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();

        ptfDesc.setMapSide(true);
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
            new RowSchema(ptfMapRR.getColumnInfos()),
            input), ptfMapRR);
        rr = opParseCtx.get(input).getRowResolver();
      }

      /*
       * b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
       */

      ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
      ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
      ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
      Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
      List<String> outputColumnNames = new ArrayList<String>();
      StringBuilder orderString = new StringBuilder();

      /*
       * Use the input RR of TableScanOperator in case there is no map-side
       * reshape of input.
       * If the parent of ReduceSinkOperator is PTFOperator, use it's
       * output RR.
       */
      buildPTFReduceSinkDetails(tabDef,
          rr,
          partCols,
          valueCols,
          orderCols,
          colExprMap,
          outputColumnNames,
          orderString,
          rsOpRR,
          extractOpRR);

      input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
          .getReduceSinkDesc(orderCols,
              valueCols, outputColumnNames, false,
              -1, partCols, orderString.toString(), -1),
          new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
      input.setColumnExprMap(colExprMap);
    }

    /*
     * 3. build Reduce-side Op Graph
     */
    {
      /*
       * b. Construct Extract Operator.
       */
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(
          new ExtractDesc(
              new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
                  Utilities.ReduceField.VALUE
                  .toString(), "", false)),
          new RowSchema(extractOpRR.getColumnInfos()),
          input), extractOpRR);

      /*
       * c. Rebuilt the QueryDef.
       * Why?
       * - so that the ExprNodeDescriptors in the QueryDef are based on the
       *   Extract Operator's RowResolver
       */
      rr = opParseCtx.get(input).getRowResolver();
      ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);

      /*
       * d. Construct PTF Operator.
       */
      RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
          new RowSchema(ptfOpRR.getColumnInfos()),
          input), ptfOpRR);

    }

    return input;
View Full Code Here

      rr = opParseCtx.get(input).getRowResolver();
      PTFTranslator translator = new PTFTranslator();
      PTFDesc ptfDesc = translator.translate(wSpec, this, conf, rr, unparseTranslator);
      RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
          new RowSchema(ptfOpRR.getColumnInfos()),
          input), ptfOpRR);
      rr = ptfOpRR;
    }

    return input;
View Full Code Here

    input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
        .getReduceSinkDesc(orderCols,
            valueCols, outputColumnNames, false,
            -1, partCols, orderString.toString(), -1),
        new RowSchema(rsNewRR.getColumnInfos()), input), rsNewRR);
    input.setColumnExprMap(colExprMap);


// Construct the RR for extract operator
    RowResolver extractRR = new RowResolver();
    LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
        new LinkedHashMap<String[], ColumnInfo>();
    pos = 0;

    for (ColumnInfo colInfo : colInfoList) {
      String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
      /*
       * if we have already encountered this colInfo internalName.
       * We encounter it again because it must be put for the Having clause.
       * We will add these entries in the end; in a loop on colsAddedByHaving. See below.
       */
      if ( colsAddedByHaving.containsKey(alias)) {
        continue;
      }
      ASTNode astNode = PTFTranslator.getASTNode(colInfo, inputRR);
      ColumnInfo eColInfo = new ColumnInfo(
          SemanticAnalyzer.getColumnInternalName(pos++), colInfo.getType(), alias[0],
          colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());

      if ( astNode == null ) {
        extractRR.put(alias[0], alias[1], eColInfo);
      }
      else {
        /*
         * in case having clause refers to this column may have been added twice;
         * once with the ASTNode.toStringTree as the alias
         * and then with the real alias.
         */
        extractRR.putExpression(astNode, eColInfo);
        if ( !astNode.toStringTree().toLowerCase().equals(alias[1]) ) {
          colsAddedByHaving.put(alias, eColInfo);
        }
      }
    }

    for(Map.Entry<String[], ColumnInfo> columnAddedByHaving : colsAddedByHaving.entrySet() ) {
      String[] alias = columnAddedByHaving.getKey();
      ColumnInfo eColInfo = columnAddedByHaving.getValue();
      extractRR.put(alias[0], alias[1], eColInfo);
    }

    /*
     * b. Construct Extract Operator.
     */
    input = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new ExtractDesc(
            new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
                Utilities.ReduceField.VALUE
                .toString(), "", false)),
        new RowSchema(inputRR.getColumnInfos()),
        input), extractRR);


    return input;
  }
View Full Code Here

        valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
        filterMap, op.getConf().getNoOuterJoin());
    mapJoinDescriptor.setTagOrder(tagOrder);

    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar);

    OpParseContext ctx = new OpParseContext(outputRS);
    opParseCtxMap.put(mapJoinOp, ctx);

    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
View Full Code Here

    }

    SelectDesc select = new SelectDesc(exprs, outputs, false);

    SelectOperator sel = (SelectOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(select,
        new RowSchema(inputRR.getColumnInfos()), input), inputRR);

    sel.setColumnExprMap(colExprMap);

    // Insert the select operator in between.
    sel.setChildOperators(childOps);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.RowSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.