Examples of org.apache.hadoop.hive.ql.parse.GlobalLimitCtx

org.apache.hadoop.hive.ql.parse.GlobalLimitCtx
context for pruning inputs. populated by GlobalLimitOptimizer

  private final Log LOG = LogFactory.getLog(GlobalLimitOptimizer.class.getName());


  public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, Operator<? extends OperatorDesc>> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<TableScanOperator, ExprNodeDesc> opToPartPruner = pctx.getOpToPartPruner();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    Map<TableScanOperator, Table> topToTable = pctx.getTopToTable();


    QB qb = pctx.getQB();
    HiveConf conf = pctx.getConf();
    QBParseInfo qbParseInfo = qb.getParseInfo();


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList;
            try {
              String alias = (String) topOps.keySet().toArray()[0];
              partsList = PartitionPruner.prune(ts, pctx, alias);
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if (!partsList.hasUnknownPartitions()) {
              globalLimitCtx.enableOpt(tempGlobalLimit);
            }
          }
        }
        if (globalLimitCtx.isEnable()) {
          LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
              + globalLimitCtx.getGlobalLimit());
        }
      }
    }
    return pctx;
  }

View Full Code Here

  private final Log LOG = LogFactory.getLog(GlobalLimitOptimizer.class.getName());


  public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, Operator<? extends OperatorDesc>> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<TableScanOperator, ExprNodeDesc> opToPartPruner = pctx.getOpToPartPruner();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    Map<TableScanOperator, Table> topToTable = pctx.getTopToTable();


    QB qb = pctx.getQB();
    HiveConf conf = pctx.getConf();
    QBParseInfo qbParseInfo = qb.getParseInfo();


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList;
            try {
              String alias = (String) topOps.keySet().toArray()[0];
              partsList = PartitionPruner.prune(ts, pctx, alias);
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // com.facebook.presto.hive.shaded.org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if (!partsList.hasUnknownPartitions()) {
              globalLimitCtx.enableOpt(tempGlobalLimit);
            }
          }
        }
        if (globalLimitCtx.isEnable()) {
          LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
              + globalLimitCtx.getGlobalLimit());
        }
      }
    }
    return pctx;
  }

View Full Code Here

  private final Log LOG = LogFactory.getLog(GlobalLimitOptimizer.class.getName());


  public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, Operator<? extends OperatorDesc>> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<TableScanOperator, ExprNodeDesc> opToPartPruner = pctx.getOpToPartPruner();
    Map<TableScanOperator, PrunedPartitionList> opToPartList = pctx.getOpToPartList();
    Map<String, PrunedPartitionList> prunedPartitions = pctx.getPrunedPartitions();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    Map<TableScanOperator, Table> topToTable = pctx.getTopToTable();


    QB qb = pctx.getQB();
    HiveConf conf = pctx.getConf();
    QBParseInfo qbParseInfo = qb.getParseInfo();


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);
          }
        } else {
          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(tab,
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(tab,
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              globalLimitCtx.enableOpt(tempGlobalLimit);
            }
          }
        }
        if (globalLimitCtx.isEnable()) {
          LOG.info("Qualify the optimize that reduces input size for 'limit' for limit "
              + globalLimitCtx.getGlobalLimit());
        }
      }
    }
    return pctx;
  }

View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.GlobalLimitCtx

org.apache.hadoop.hive.ql.optimizer.GlobalLimitOptimizer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.