Examples of org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.hive.ql.exec.TableScanOperator
Table Scan Operator If the data is coming from the map-reduce framework, just forward it. This will be needed as part of local work when data is not being read as part of map-reduce framework

   * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
                        Object... nodeOutputs) throws SemanticException {


    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    if (operatorDesc == null) {
      return null;
    }
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();


    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");


    if (predicate == null) {
      LOG.info("null predicate pushed down");
      return null;
    }
    LOG.info(predicate.getExprString());


    // check if we have indexes on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
      queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
      if (queryPartitions == null) { // partitions not covered
        return null;
      }
    } catch (HiveException e) {
      LOG.error("Fatal Error: problem accessing metastore", e);
      throw new SemanticException(e);
    }


    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
      return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();


    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    // make sure we have an index on the table being scanned
    TableDesc tblDesc = operator.getTableDesc();
    Table srcTable = pctx.getTopToTable().get(operator);
    if (indexes == null || indexes.get(srcTable) == null) {
      return null;
    }

View Full Code Here

    }


    Entry<String, Operator<? extends OperatorDesc>> mapJoinAliasToWorkEntry =
        mapJoinAliasToWork.entrySet().iterator().next();
    String mapJoinAlias = mapJoinAliasToWorkEntry.getKey();
    TableScanOperator mapJoinTaskTableScanOperator =
        OperatorUtils.findSingleOperator(
            mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class);
    if (mapJoinTaskTableScanOperator == null) {
      throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() +
          " operator as the work associated with alias " + mapJoinAlias +
          ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator.");
    }
    FileSinkOperator mapJoinTaskFileSinkOperator =
        OperatorUtils.findSingleOperator(
            mapJoinTaskTableScanOperator, FileSinkOperator.class);
    if (mapJoinTaskFileSinkOperator == null) {
      throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() +
          " operator at the last operator of the MapJoin Task.");
    }


    // The mapJoinTaskFileSinkOperator writes to a different directory
    String childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName().toString();
    List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
    if (childMRAliases == null || childMRAliases.size() != 1) {
      return;
    }
    String childMRAlias = childMRAliases.get(0);


    // Sanity check to make sure there is no alias conflict after merge.
    for (Entry<String, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
      String path = entry.getKey();
      List<String> aliases = entry.getValue();


      if (path.equals(childMRPath)) {
        continue;
      }


      if (aliases.contains(mapJoinAlias)) {
        // alias confict should not happen here.
        return;
      }
    }


    MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapLocalWork();
    MapredLocalWork childLocalWork = childMapWork.getMapLocalWork();


    if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) ||
        (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
      // Right now, we do not handle the case that either of them is bucketed.
      // We should relax this constraint with a follow-up jira.
      return;
    }


    // We need to check if the total size of local tables is under the limit.
    // At here, we are using a strong condition, which is the total size of
    // local tables used by all input paths. Actually, we can relax this condition
    // to check the total size of local tables for every input path.
    // Example:
    //               UNION_ALL
    //              /         \
    //             /           \
    //            /             \
    //           /               \
    //       MapJoin1          MapJoin2
    //      /   |   \         /   |   \
    //     /    |    \       /    |    \
    //   Big1   S1   S2    Big2   S3   S4
    // In this case, we have two MapJoins, MapJoin1 and MapJoin2. Big1 and Big2 are two
    // big tables, and S1, S2, S3, and S4 are four small tables. Hash tables of S1 and S2
    // will only be used by Map tasks processing Big1. Hash tables of S3 and S4 will only
    // be used by Map tasks processing Big2. If Big1!=Big2, we should only check if the size
    // of S1 + S2 is under the limit, and if the size of S3 + S4 is under the limit.
    // But, right now, we are checking the size of S1 + S2 + S3 + S4 is under the limit.
    // If Big1=Big2, we will only scan a path once. So, MapJoin1 and MapJoin2 will be executed
    // in the same Map task. In this case, we need to make sure the size of S1 + S2 + S3 + S4
    // is under the limit.
    if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)){
      // The total size of local tables may not be under
      // the limit after we merge mapJoinLocalWork and childLocalWork.
      // Do not merge.
      return;
    }


    TableScanOperator childMRTaskTableScanOperator =
        OperatorUtils.findSingleOperator(
            childMapWork.getAliasToWork().get(childMRAlias), TableScanOperator.class);
    if (childMRTaskTableScanOperator == null) {
      throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() +
          " operator as the work associated with alias " + childMRAlias +
          ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
    }


    List<Operator<? extends OperatorDesc>> parentsInMapJoinTask =
        mapJoinTaskFileSinkOperator.getParentOperators();
    List<Operator<? extends OperatorDesc>> childrenInChildMRTask =
        childMRTaskTableScanOperator.getChildOperators();
    if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
      // Do not merge if we do not know how to connect two operator trees.
      return;
    }

View Full Code Here

  public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor {


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator tsop = (TableScanOperator) nd;
      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
      PrunedPartitionList partList = null;
      try {
        partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop);
      } catch (HiveException e1) {
        throw new SemanticException(e1);
      }
      Table table = aspCtx.getParseContext().getTopToTable().get(tsop);


      // gather statistics for the first time and the attach it to table scan operator
      Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop);
      try {
        tsop.setStatistics(stats.clone());


        if (LOG.isDebugEnabled()) {
          LOG.debug("[0] STATS-" + tsop.toString() + ": " + stats.extendedToString());
        }
      } catch (CloneNotSupportedException e) {
        throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
      }
      return null;

View Full Code Here

    }


    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      TableScanOperator node = (TableScanOperator) nd;
      TableScanOperator tsOp = (TableScanOperator) nd;
      WalkerCtx walkerCtx = (WalkerCtx) procCtx;
      List<Integer> colIDs = tsOp.getNeededColumnIDs();
      TableScanDesc desc = tsOp.getConf();
      boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
      boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
                             || (desc.getVirtualCols().isEmpty());
      if (noColNeeded && noVCneeded) {
        walkerCtx.setMayBeMetadataOnly(tsOp);

View Full Code Here

          walkerCtx.getMetadataOnlyTableScans().size()));
      Iterator<TableScanOperator> iterator
        = walkerCtx.getMetadataOnlyTableScans().iterator();


      while (iterator.hasNext()) {
        TableScanOperator tso = iterator.next();
        ((TableScanDesc)tso.getConf()).setIsMetadataOnly(true);
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        String alias = getAliasForTableScanOperator(work, tso);
        LOG.info("Metadata only table scan for " + alias);
        processAlias(work, alias);
      }

View Full Code Here

      FilterOperator fop = (FilterOperator) nd;
      Operator<? extends OperatorDesc> parent = fop.getParentOperators().get(0);
      Statistics parentStats = parent.getStatistics();
      List<String> neededCols = null;
      if (parent instanceof TableScanOperator) {
        TableScanOperator tsop = (TableScanOperator) parent;
        neededCols = tsop.getNeededColumns();
      }


      try {
        if (parentStats != null) {
          ExprNodeDesc pred = fop.getConf().getPredicate();

View Full Code Here

    List<String> joinCols = toColumns(keys);
    if (joinCols == null || joinCols.isEmpty()) {
      return false;
    }


    TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, joinCols);
    if (tso == null) {
      return false;
    }


    // For nested sub-queries, the alias mapping is not maintained in QB currently.

View Full Code Here

      //
      Integer tempGlobalLimit = checkQbpForGlobalLimit(qb);


      // query qualify for the optimization
      if (tempGlobalLimit != null && tempGlobalLimit != 0) {
        TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
        Table tab = topToTable.get(ts);


        if (!tab.isPartitioned()) {
          if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
            globalLimitCtx.enableOpt(tempGlobalLimit);

View Full Code Here

   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    Class<? extends InputFormat> inputFormat = parseCtx.getTopToTable().get(op)
        .getInputFormatClass();
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();


    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork, parseCtx.getConf());
    Operator<? extends OperatorDesc> currTopOp = op;
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(currTopOp);


    for (String alias : parseCtx.getTopOps().keySet()) {
      Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
      if (currOp == op) {
        String currAliasId = alias;
        ctx.setCurrAliasId(currAliasId);
        mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));


        QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
        if (parseInfo.isAnalyzeCommand()) {
          boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
          boolean noScan = parseInfo.isNoScanAnalyzeCommand();
          if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {


            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
            // There will not be any MR or Tez job above this task
            StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec());
            snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(
                HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
            ctx.setCurrTask(snjTask);
            ctx.setCurrTopOp(null);
            ctx.getRootTasks().clear();
            ctx.getRootTasks().add(snjTask);
          } else {
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
            // The plan consists of a simple MapRedTask followed by a StatsTask.
            // The MR task is just a simple TableScanOperator


            StatsWork statsWork = new StatsWork(parseCtx.getQB().getParseInfo().getTableSpec());
            statsWork.setAggKey(op.getConf().getStatsAggPrefix());
            statsWork.setSourceTask(currTask);
            statsWork.setStatsReliable(parseCtx.getConf().getBoolVar(
                HiveConf.ConfVars.HIVE_STATS_RELIABLE));
            Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
            currTask.addDependentTask(statsTask);

View Full Code Here

    for (Map.Entry<String, Operator<?>> entry : parseCtx.getTopOps().entrySet()) {
      if (!(entry.getValue() instanceof TableScanOperator)) {
        continue;
      }
      String alias = entry.getKey();
      TableScanOperator topOp = (TableScanOperator) entry.getValue();
      ReadEntity parentViewInfo = getParentViewInfo(alias, parseCtx.getViewAliasToInput());


      // Adds tables only for create view (PPD filter can be appended by outer query)
      Table table = parseCtx.getTopToTable().get(topOp);
      PlanUtils.addInput(inputs, new ReadEntity(table, parentViewInfo));

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.fs.Path

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.exec.mr.ExecDriver

org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask

org.apache.hadoop.hive.ql.io.HiveInputFormat

org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher

org.apache.hadoop.hive.ql.optimizer.AbstractBucketJoinProc

org.apache.hadoop.hive.ql.optimizer.AbstractSMBJoinProc

org.apache.hadoop.hive.ql.optimizer.BucketingSortingReduceSinkOptimizer$BucketSortReduceSinkProcessor

org.apache.hadoop.hive.ql.optimizer.BucketMapJoinOptimizer$BucketMapjoinOptProc

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.