Examples of RowSchema

org.apache.hadoop.hive.ql.exec.RowSchema
RowSchema Implementation

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

    }


    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
        reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1),
        new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
        reduceSinkOutputRowResolver);


    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

      curr = createCommonReduceSink(qb, input);


      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
      // create a forward operator
      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
          new RowSchema(currRR.getColumnInfos()), curr), currRR);


      for (String dest : ks) {
        curr = input;
        curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
        curr = genSelectPlan(dest, qb, curr);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

      }
    }


    // Create a new union operator
    Operator<? extends Serializable> unionforward = OperatorFactory
        .getAndMakeChild(new UnionDesc(), new RowSchema(unionoutRR
        .getColumnInfos()));


    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends Serializable>> child =
      new ArrayList<Operator<? extends Serializable>>();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

      // Create the root of the operator tree
      TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
      setupStats(tsDesc, qb.getParseInfo(), tab, alias);


      top = putOpInsertMap(OperatorFactory.get(tsDesc,
          new RowSchema(rwsch.getColumnInfos())), rwsch);


      // Add this to the list of top operators - we always start from a table
      // scan
      topOps.put(alias_id, top);


      // Add a mapping from the table scan operator to Table
      topToTable.put((TableScanOperator) top, tab);
    } else {
      rwsch = opParseCtx.get(top).getRowResolver();
      top.setChildOperators(null);
    }


    // check if this table is sampled and needs more than input pruning
    Operator<? extends Serializable> tableOp = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
      int num = ts.getNumerator();
      int den = ts.getDenominator();
      ArrayList<ASTNode> sampleExprs = ts.getExprs();


      // TODO: Do the type checking of the expressions
      List<String> tabBucketCols = tab.getBucketCols();
      int numBuckets = tab.getNumBuckets();


      // If there are no sample cols and no bucket cols then throw an error
      if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
        throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
            + tab.getTableName());
      }


      if (num > den) {
        throw new SemanticException(
            ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " "
            + tab.getTableName());
      }


      // check if a predicate is needed
      // predicate is needed if either input pruning is not enough
      // or if input pruning is not possible


      // check if the sample columns are the same as the table bucket columns
      boolean colsEqual = true;
      if ((sampleExprs.size() != tabBucketCols.size())
          && (sampleExprs.size() != 0)) {
        colsEqual = false;
      }


      for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
        boolean colFound = false;
        for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
          if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
            break;
          }


          if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
              .equalsIgnoreCase(tabBucketCols.get(j))) {
            colFound = true;
          }
        }
        colsEqual = (colsEqual && colFound);
      }


      // Check if input can be pruned
      ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));


      // check if input pruning is enough
      if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
          && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {


        // input pruning is enough; add the filter for the optimizer to use it
        // later
        LOG.info("No need for sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
            .getDenominator(), tabBucketCols, true)),
            new RowSchema(rwsch.getColumnInfos()), top);
      } else {
        // need to add filter
        // create tableOp to be filterDesc and set as child to 'top'
        LOG.info("Need sample filter");
        ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
            colsEqual, alias, rwsch, qb.getMetaData(), null);
        tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
            samplePredicate, true),
            new RowSchema(rwsch.getColumnInfos()), top);
      }
    } else {
      boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
      if (testMode) {
        String tabName = tab.getTableName();


        // has the user explicitly asked not to sample this table
        String unSampleTblList = conf
            .getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
        String[] unSampleTbls = unSampleTblList.split(",");
        boolean unsample = false;
        for (String unSampleTbl : unSampleTbls) {
          if (tabName.equalsIgnoreCase(unSampleTbl)) {
            unsample = true;
          }
        }


        if (!unsample) {
          int numBuckets = tab.getNumBuckets();


          // If the input table is bucketed, choose the first bucket
          if (numBuckets > 0) {
            TableSample tsSample = new TableSample(1, numBuckets);
            tsSample.setInputPruning(true);
            qb.getParseInfo().setTabSample(alias, tsSample);
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
                .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
            tableOp = OperatorFactory
                .getAndMakeChild(new FilterDesc(samplePred, true,
                new sampleDesc(tsSample.getNumerator(), tsSample
                .getDenominator(), tab.getBucketCols(), true)),
                new RowSchema(rwsch.getColumnInfos()), top);
            LOG.info("No need for sample filter");
          } else {
            // The table is not bucketed, add a dummy filter :: rand()
            int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
            TableSample tsSample = new TableSample(1, freq);
            tsSample.setInputPruning(false);
            qb.getParseInfo().setTabSample(alias, tsSample);
            LOG.info("Need sample filter");
            ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor
                .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer
                .valueOf(460476415)));
            ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false,
                alias, rwsch, qb.getMetaData(), randFunc);
            tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
                samplePred, true),
                new RowSchema(rwsch.getColumnInfos()), top);
          }
        }
      }
    }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

            String[] tabCol = source.reverseLookup(col.getInternalName());
            lvForwardRR.put(tabCol[0], tabCol[1], col);
          }


          Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(
              new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()),
              op), lvForwardRR);


          // The order in which the two paths are added is important. The
          // lateral view join operator depends on having the select operator
          // give it the row first.


          // Get the all path by making a select(*).
          RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
          //Operator allPath = op;
          Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(
                            new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()),
                            lvForward), allPathRR);
          // Get the UDTF Path
          QB blankQb = new QB(null, null, false);
          Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
              .getChild(0), blankQb, lvForward);
          // add udtf aliases to QB
          for (String udtfAlias : blankQb.getAliases()) {
            qb.addAlias(udtfAlias);
          }
          RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();


          // Merge the two into the lateral view join
          // The cols of the merged result will be the combination of both the
          // cols of the UDTF path and the cols of the all path. The internal
          // names have to be changed to avoid conflicts


          RowResolver lateralViewRR = new RowResolver();
          ArrayList<String> outputInternalColNames = new ArrayList<String>();


          LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames);
          LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames);


          // For PPD, we need a column to expression map so that during the walk,
          // the processor knows how to transform the internal col names.
          // Following steps are dependant on the fact that we called
          // LVmerge.. in the above order
          Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();


          int i=0;
          for (ColumnInfo c : allPathRR.getColumnInfos()) {
            String internalName = getColumnInternalName(i);
            i++;
            colExprMap.put(internalName,
                new ExprNodeColumnDesc(c.getType(), c.getInternalName(),
                    c.getTabAlias(), c.getIsVirtualCol()));
          }


          Operator lateralViewJoin = putOpInsertMap(OperatorFactory
              .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames),
                  new RowSchema(lateralViewRR.getColumnInfos()), allPath,
                  udtfPath), lateralViewRR);
          lateralViewJoin.setColumnExprMap(colExprMap);
          op = lateralViewJoin;
        }
        e.setValue(op);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

      }
    }
    ASTNode condn = (ASTNode) havingExpr.getChild(0);


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
        new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
        inputRR.getColumnInfos()), input), inputRR);


    return output;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

  }


  private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    RowSchema inputRS = fsOp.getSchema();


    // create a reduce Sink operator - key is the first column
    ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
    keyCols.add(TypeCheckProcFactory.DefaultExprProcessor
        .getFuncExprNodeDesc("rand"));


    // value is all the columns in the FileSink operator input
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo ci : inputRS.getSignature()) {
      valueCols.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(),
          ci.getTabAlias(), ci.getIsVirtualCol()));
    }


    // create a dummy tableScan operator
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(
        TableScanDesc.class, inputRS);


    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    }


    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));


    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();


    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);


    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);


    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);


      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema


  @SuppressWarnings("unused")
  private static final Log LOG = LogFactory.getLog(RowResolver.class.getName());
  
  public RowResolver() {
    rowSchema = new RowSchema();
    rslvMap = new HashMap<String, LinkedHashMap<String, ColumnInfo>>();
    invRslvMap = new HashMap<String, String[]>();
    isExprResolver = false;
  }

View Full Code Here

Examples of org.apache.hadoop.hive.ql.exec.RowSchema

                   new ColumnInfo((Integer.valueOf(pos)).toString(), in.getType()));
      col_list.add(new exprNodeColumnDesc(in.getType(), internalName));
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new selectDesc(col_list), new RowSchema(outputRR.getColumnInfos()), input), outputRR);


    return output;
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.