Examples of QBParseInfo

org.apache.hadoop.hive.ql.parse.QBParseInfo
Implementation of the parse information related to a query block

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo


    ParseContext parseContext = context.parseContext;
    Class<? extends InputFormat> inputFormat = parseContext.getTopToTable().get(tableScan)
        .getInputFormatClass();
    QB queryBlock = parseContext.getQB();
    QBParseInfo parseInfo = parseContext.getQB().getParseInfo();
    
    if (parseInfo.isAnalyzeCommand()) {


      assert tableScan.getChildOperators() == null
        || tableScan.getChildOperators().size() == 0;


      String alias = null;
      for (String a: parseContext.getTopOps().keySet()) {
        if (tableScan == parseContext.getTopOps().get(a)) {
          alias = a;
        }
      }


      assert alias != null;


      TezWork tezWork = context.currentTask.getWork();
      boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
      boolean noScan = parseInfo.isNoScanAnalyzeCommand();
      if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {


        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
        // There will not be any Tez job above this task
        StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec());
        snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
            HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
        snjTask.setParentTasks(null);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(snjTask);
        return true;
      } else {


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
      // The plan consists of a simple TezTask followed by a StatsTask.
      // The Tez task is just a simple TableScanOperator
      
      StatsWork statsWork = new StatsWork(parseInfo.getTableSpec());
      statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
      statsWork.setSourceTask(context.currentTask);
      statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
      context.currentTask.addDependentTask(statsTask);


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
      // The plan consists of a StatsTask only.
      if (parseInfo.isNoScanAnalyzeCommand()) {
        statsTask.setParentTasks(null);
        statsWork.setNoScanAnalyzeCommand(true);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(statsTask);
      }


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
      if (parseInfo.isPartialScanAnalyzeCommand()) {
        handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask);
      }


      // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
      // and pass it to setTaskPlan as the last parameter

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {


          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator


          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          ctx.getRootTasks().add(currTask);
          currWork.setGatheringStats(true);
          // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
          // and pass it to setTaskPlan as the last parameter
          Set<Partition> confirmedPartns = new HashSet<Partition>();
          tableSpec tblSpec = parseInfo.getTableSpec();
          if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
            // static partition
            confirmedPartns.add(tblSpec.partHandle);
          } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
            // dynamic partition

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {
          boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
          boolean noScan = parseInfo.isNoScanAnalyzeCommand();
          if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {


            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
            // There will not be any MR or Tez job above this task
            StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec());
            snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(
                HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
            ctx.setCurrTask(snjTask);
            ctx.setCurrTopOp(null);
            ctx.getRootTasks().clear();
            ctx.getRootTasks().add(snjTask);
          } else {
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
            // The plan consists of a simple MapRedTask followed by a StatsTask.
            // The MR task is just a simple TableScanOperator


            StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
            statsWork.setAggKey(op.getConf().getStatsAggPrefix());
            statsWork.setSourceTask(currTask);
            statsWork.setStatsReliable(parseCtx.getConf().getBoolVar(
                HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
            currTask.addDependentTask(statsTask);
            if (!ctx.getRootTasks().contains(currTask)) {
              ctx.getRootTasks().add(currTask);
            }


            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
            // The plan consists of a StatsTask only.
            if (parseInfo.isNoScanAnalyzeCommand()) {
              statsTask.setParentTasks(null);
              statsWork.setNoScanAnalyzeCommand(true);
              ctx.getRootTasks().remove(currTask);
              ctx.getRootTasks().add(statsTask);
            }


            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
            if (parseInfo.isPartialScanAnalyzeCommand()) {
              handlePartialScanCommand(op, ctx, parseCtx, currTask, parseInfo, statsWork, statsTask);
            }


            currWork.getMapWork().setGatheringStats(true);
            if (currWork.getReduceWork() != null) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    Map<TableScanOperator, Table> topToTable = pctx.getTopToTable();


    QB qb = pctx.getQB();
    HiveConf conf = pctx.getConf();
    QBParseInfo qbParseInfo = qb.getParseInfo();


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

   * @return if there is one and only one limit for all subqueries, return the limit
   *         if there is no limit, return 0
   *         otherwise, return null
   */
  private Integer checkQbpForGlobalLimit(QB localQb) {
    QBParseInfo qbParseInfo = localQb.getParseInfo();
    if (localQb.getNumSelDi() == 0 && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()
        && qbParseInfo.getDestToAggregationExprs().size() <= 1
        && qbParseInfo.getDestToDistinctFuncExprs().size() <= 1
        && qbParseInfo.getNameToSample().isEmpty()) {
      if ((qbParseInfo.getDestToAggregationExprs().size() < 1 ||
          qbParseInfo.getDestToAggregationExprs().values().iterator().next().isEmpty()) &&
          (qbParseInfo.getDestToDistinctFuncExprs().size() < 1 ||
              qbParseInfo.getDestToDistinctFuncExprs().values().iterator().next().isEmpty())
          && qbParseInfo.getDestToLimit().size() <= 1) {
        Integer retValue;
        if (qbParseInfo.getDestToLimit().size() == 0) {
          retValue = 0;
        } else {
          retValue = qbParseInfo.getDestToLimit().values().iterator().next();
        }


        for (String alias : localQb.getSubqAliases()) {
          Integer limit = checkQbpForGlobalLimit(localQb.getSubqForAlias(alias).getQB());
          if (limit == null) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo


    ParseContext parseContext = context.parseContext;
    Class<? extends InputFormat> inputFormat = parseContext.getTopToTable().get(tableScan)
        .getInputFormatClass();
    QB queryBlock = parseContext.getQB();
    QBParseInfo parseInfo = parseContext.getQB().getParseInfo();
    
    if (parseInfo.isAnalyzeCommand()) {


      assert tableScan.getChildOperators() == null
        || tableScan.getChildOperators().size() == 0;


      String alias = null;
      for (String a: parseContext.getTopOps().keySet()) {
        if (tableScan == parseContext.getTopOps().get(a)) {
          alias = a;
        }
      }


      assert alias != null;


      TezWork tezWork = context.currentTask.getWork();
      boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
      boolean noScan = parseInfo.isNoScanAnalyzeCommand();
      if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {


        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
        // There will not be any Tez job above this task
        StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec());
        snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
            HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
        snjTask.setParentTasks(null);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(snjTask);
        return true;
      } else {


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
      // The plan consists of a simple TezTask followed by a StatsTask.
      // The Tez task is just a simple TableScanOperator
      
      StatsWork statsWork = new StatsWork(parseInfo.getTableSpec());
      statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
      statsWork.setSourceTask(context.currentTask);
      statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
      Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
      context.currentTask.addDependentTask(statsTask);


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
      // The plan consists of a StatsTask only.
      if (parseInfo.isNoScanAnalyzeCommand()) {
        statsTask.setParentTasks(null);
        statsWork.setNoScanAnalyzeCommand(true);
        context.rootTasks.remove(context.currentTask);
        context.rootTasks.add(statsTask);
      }


      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
      if (parseInfo.isPartialScanAnalyzeCommand()) {
        handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask);
      }


      // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
      // and pass it to setTaskPlan as the last parameter

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {


          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator


          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          statsWork.setStatsReliable(
            parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          if (!ctx.getRootTasks().contains(currTask)) {
            ctx.getRootTasks().add(currTask);
          }


          // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
          // The plan consists of a StatsTask only.
          if (parseInfo.isNoScanAnalyzeCommand()) {
            statsTask.setParentTasks(null);
            statsWork.setNoScanAnalyzeCommand(true);
            ctx.getRootTasks().remove(currTask);
            ctx.getRootTasks().add(statsTask);
          }


          // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
          if (parseInfo.isPartialScanAnalyzeCommand()) {
            handlePartialScanCommand(op, ctx, parseCtx, currTask, parseInfo, statsWork, statsTask);
          }


          currWork.getMapWork().setGatheringStats(true);
          if (currWork.getReduceWork() != null) {
            currWork.getReduceWork().setGatheringStats(true);
          }
          // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
          // and pass it to setTaskPlan as the last parameter
          Set<Partition> confirmedPartns = new HashSet<Partition>();
          tableSpec tblSpec = parseInfo.getTableSpec();
          if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
            // static partition
            if (tblSpec.partHandle != null) {
              confirmedPartns.add(tblSpec.partHandle);
            } else {
              // partial partition spec has null partHandle
              assert parseInfo.isNoScanAnalyzeCommand();
              confirmedPartns.addAll(tblSpec.partitions);
            }
          } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
            // dynamic partition
            confirmedPartns.addAll(tblSpec.partitions);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    Map<TableScanOperator, Table> topToTable = pctx.getTopToTable();


    QB qb = pctx.getQB();
    HiveConf conf = pctx.getConf();
    QBParseInfo qbParseInfo = qb.getParseInfo();


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

   * @return if there is one and only one limit for all subqueries, return the limit
   *         if there is no limit, return 0
   *         otherwise, return null
   */
  private Integer checkQbpForGlobalLimit(QB localQb) {
    QBParseInfo qbParseInfo = localQb.getParseInfo();
    if (localQb.getNumSelDi() == 0 && qbParseInfo.getDestToClusterBy().isEmpty()
        && qbParseInfo.getDestToDistributeBy().isEmpty()
        && qbParseInfo.getDestToOrderBy().isEmpty()
        && qbParseInfo.getDestToSortBy().isEmpty()
        && qbParseInfo.getDestToAggregationExprs().size() <= 1
        && qbParseInfo.getDestToDistinctFuncExprs().size() <= 1
        && qbParseInfo.getNameToSample().isEmpty()) {
      if ((qbParseInfo.getDestToAggregationExprs().size() < 1 ||
          qbParseInfo.getDestToAggregationExprs().values().iterator().next().isEmpty()) &&
          (qbParseInfo.getDestToDistinctFuncExprs().size() < 1 ||
              qbParseInfo.getDestToDistinctFuncExprs().values().iterator().next().isEmpty())
          && qbParseInfo.getDestToLimit().size() <= 1) {
        Integer retValue;
        if (qbParseInfo.getDestToLimit().size() == 0) {
          retValue = 0;
        } else {
          retValue = qbParseInfo.getDestToLimit().values().iterator().next();
        }


        for (String alias : localQb.getSubqAliases()) {
          Integer limit = checkQbpForGlobalLimit(localQb.getSubqForAlias(alias).getQB());
          if (limit == null) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.QBParseInfo

      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {


          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator


          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          statsWork.setStatsReliable(
            parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          if (!ctx.getRootTasks().contains(currTask)) {
            ctx.getRootTasks().add(currTask);
          }


          // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
          // The plan consists of a StatsTask only.
          if (parseInfo.isNoScanAnalyzeCommand()) {
            statsTask.setParentTasks(null);
            statsWork.setNoScanAnalyzeCommand(true);
            ctx.getRootTasks().remove(currTask);
            ctx.getRootTasks().add(statsTask);
          }


          // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
          if (parseInfo.isPartialScanAnalyzeCommand()) {
            handlePartialScanCommand(op, ctx, parseCtx, currTask, parseInfo, statsWork, statsTask);
          }


          currWork.setGatheringStats(true);
          // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
          // and pass it to setTaskPlan as the last parameter
          Set<Partition> confirmedPartns = new HashSet<Partition>();
          tableSpec tblSpec = parseInfo.getTableSpec();
          if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
            // static partition
            if (tblSpec.partHandle != null) {
              confirmedPartns.add(tblSpec.partHandle);
            } else {
              // partial partition spec has null partHandle
              assert parseInfo.isNoScanAnalyzeCommand();
              confirmedPartns.addAll(tblSpec.partitions);
            }
          } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
            // dynamic partition
            confirmedPartns.addAll(tblSpec.partitions);

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.