Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.hive.ql.exec.TableScanOperator
Table Scan Operator If the data is coming from the map-reduce framework, just forward it. This will be needed as part of local work when data is not being read as part of map-reduce framework

     * */
      Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable();
      Iterator<TableScanOperator> topOpItr = topToTable.keySet().iterator();
      while(topOpItr.hasNext()){


        TableScanOperator topOp = topOpItr.next();
        Table table = topToTable.get(topOp);
        baseTableName = table.getTableName();
        Map<Table, List<Index>> indexes = getIndexesForRewrite();
        if(indexes == null){
          LOG.debug("Error getting valid indexes for rewrite, " +

View Full Code Here

    Iterator<String> tsOpItr = tsOpToProcess.keySet().iterator();


    while(tsOpItr.hasNext()){
      baseTableName = tsOpItr.next();
      RewriteCanApplyCtx canApplyCtx = tsOpToProcess.get(baseTableName);
      TableScanOperator topOp = (TableScanOperator) topOpMap.get(baseTableName);
      RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx =
        RewriteQueryUsingAggregateIndexCtx.getInstance(parseContext, hiveDb,
            indexTableName, baseTableName, canApplyCtx.getAggFunction());
      rewriteQueryCtx.invokeRewriteQueryProc(topOp);
      parseContext = rewriteQueryCtx.getParseContext();

View Full Code Here

    }


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator node = (TableScanOperator) nd;
      WalkerCtx walkerCtx = (WalkerCtx) procCtx;
      if (((node.getNeededColumnIDs() == null) || (node.getNeededColumnIDs().size() == 0))
          && ((node.getConf() == null) || 
              (node.getConf().getVirtualCols() == null) || 
              (node.getConf().getVirtualCols().isEmpty()))) {
        walkerCtx.setMayBeMetadataOnly(node);
      }
      return nd;
    }

View Full Code Here

          walkerCtx.getMetadataOnlyTableScans().size()));
      Iterator<TableScanOperator> iterator 
        = walkerCtx.getMetadataOnlyTableScans().iterator();


      while (iterator.hasNext()) {
        TableScanOperator tso = iterator.next();
        LOG.info("Metadata only table scan for " + tso.getConf().getAlias());
        convertToMetadataOnlyQuery((MapredWork) task.getWork(), tso);
      }


      return null;
    }

View Full Code Here

          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureTableJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVELIMITOPTENABLE)
        && ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0)  {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);

View Full Code Here

        QBJoinTree joinTree, String alias, int pos) throws SemanticException {
      Map<String, Operator<? extends Serializable>> topOps = this.pGraphContext
          .getTopOps();
      Map<TableScanOperator, Table> topToTable = this.pGraphContext
          .getTopToTable();
      TableScanOperator tso = (TableScanOperator) topOps.get(alias);
      if (tso == null) {
        return false;
      }


      List<ExprNodeDesc> keys = op.getConf().getKeys().get((byte) pos);

View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();


    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    Task<? extends Serializable> currTask = TaskFactory.get(currWork, parseCtx.getConf());
    Operator<? extends Serializable> currTopOp = op;
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(currTopOp);


    for (String alias : parseCtx.getTopOps().keySet()) {
      Operator<? extends Serializable> currOp = parseCtx.getTopOps().get(alias);
      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {


          //   ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
          // The plan consists of a simple MapRedTask followed by a StatsTask.
          // The MR task is just a simple TableScanOperator


          StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
          statsWork.setAggKey(op.getConf().getStatsAggPrefix());
          Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
          currTask.addDependentTask(statsTask);
          if (!ctx.getRootTasks().contains(currTask)) {
            ctx.getRootTasks().add(currTask);
          }

View Full Code Here

    }


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator node = (TableScanOperator) nd;
      WalkerCtx walkerCtx = (WalkerCtx) procCtx;
      if (((node.getNeededColumnIDs() == null) || (node.getNeededColumnIDs().size() == 0))
          && ((node.getConf() == null) ||
              (node.getConf().getVirtualCols() == null) ||
              (node.getConf().getVirtualCols().isEmpty()))) {
        walkerCtx.setMayBeMetadataOnly(node);
      }
      return nd;
    }

View Full Code Here

          walkerCtx.getMetadataOnlyTableScans().size()));
      Iterator<TableScanOperator> iterator
        = walkerCtx.getMetadataOnlyTableScans().iterator();


      while (iterator.hasNext()) {
        TableScanOperator tso = iterator.next();
        LOG.info("Metadata only table scan for " + tso.getConf().getAlias());
        convertToMetadataOnlyQuery((MapredWork) task.getWork(), tso);
      }


      return null;
    }

View Full Code Here

          inputs.add(new ReadEntity(tab));
        }
      } else {


        if (topOps.size() == 1) {
          TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];


          // check if the pruner only contains partition columns
          if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
              opToPartPruner.get(ts))) {


            PrunedPartitionList partsList = null;
            try {
              partsList = opToPartList.get(ts);
              if (partsList == null) {
                partsList = PartitionPruner.prune(topToTable.get(ts),
                    opToPartPruner.get(ts), conf, (String) topOps.keySet()
                    .toArray()[0], prunedPartitions);
                opToPartList.put(ts, partsList);
              }
            } catch (HiveException e) {
              // Has to use full name to make sure it does not conflict with
              // org.apache.commons.lang.StringUtils
              LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
              throw new SemanticException(e.getMessage(), e);
            }


            // If there is any unknown partition, create a map-reduce job for
            // the filter to prune correctly
            if ((partsList.getUnknownPartns().size() == 0)) {
              List<String> listP = new ArrayList<String>();
              List<PartitionDesc> partP = new ArrayList<PartitionDesc>();


              Set<Partition> parts = partsList.getConfirmedPartns();
              Iterator<Partition> iterParts = parts.iterator();
              while (iterParts.hasNext()) {
                Partition part = iterParts.next();


                listP.add(part.getPartitionPath().toString());
                try {
                  partP.add(Utilities.getPartitionDesc(part));
                } catch (HiveException e) {
                  throw new SemanticException(e.getMessage(), e);
                }
                inputs.add(new ReadEntity(part));
              }


              fetch = new FetchWork(listP, partP, qb.getParseInfo()
                  .getOuterQueryLimit());
              noMapRed = true;
            }
          }
        }
      }


      if (noMapRed) {
        if (fetch.getTblDesc() != null) {
          PlanUtils.configureInputJobPropertiesForStorageHandler(
            fetch.getTblDesc());
        } else if ( (fetch.getPartDesc() != null) && (!fetch.getPartDesc().isEmpty())){
            PartitionDesc pd0 = fetch.getPartDesc().get(0);
            TableDesc td = pd0.getTableDesc();
            if ((td != null)&&(td.getProperties() != null)
                && td.getProperties().containsKey(
                    org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)){
              PlanUtils.configureInputJobPropertiesForStorageHandler(td);
            }
        }
        fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
        setFetchTask(fetchTask);


        // remove root tasks if any
        rootTasks.clear();
        return;
      }
    }


    // determine the query qualifies reduce input size for LIMIT
    // The query only qualifies when there are only one top operator
    // and there is no transformer or UDTF and no block sampling
    // is used.
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVELIMITOPTENABLE)
        && ctx.getTryCount() == 0 && topOps.size() == 1
        && !globalLimitCtx.ifHasTransformOrUDTF() &&
        nameToSplitSample.isEmpty()) {


      // Here we recursively check:
      // 1. whether there are exact one LIMIT in the query
      // 2. whether there is no aggregation, group-by, distinct, sort by,
      //    distributed by, or table sampling in any of the sub-query.
      // The query only qualifies if both conditions are satisfied.
      //
      // Example qualified queries:
      //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
      //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
      //                               FROM ... LIMIT...
      //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0)  {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.fs.Path

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.exec.mr.ExecDriver

org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask

org.apache.hadoop.hive.ql.io.HiveInputFormat

org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher

org.apache.hadoop.hive.ql.optimizer.AbstractBucketJoinProc

org.apache.hadoop.hive.ql.optimizer.AbstractSMBJoinProc

org.apache.hadoop.hive.ql.optimizer.BucketingSortingReduceSinkOptimizer$BucketSortReduceSinkProcessor

org.apache.hadoop.hive.ql.optimizer.BucketMapJoinOptimizer$BucketMapjoinOptProc

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.