Examples of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

org.apache.hadoop.hive.ql.parse.PrunedPartitionList
The list of pruned partitions.

      }


      // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
      // and pass it to setTaskPlan as the last parameter
      Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
      PrunedPartitionList partitions = null;
      if (confirmedPartns.size() > 0) {
        Table source = queryBlock.getMetaData().getTableForAlias(alias);
        partitions = new PrunedPartitionList(source, confirmedPartns, false);
      }


      MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
      w.setGatheringStats(true);

View Full Code Here

    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();


    Path tblDir = null;
    TableDesc tblDesc = null;


    PrunedPartitionList partsList = pList;


    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());


    if (partsList == null) {
      try {
        TableScanOperator tsOp = (TableScanOperator) topOp;
        partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
      } catch (SemanticException e) {
        throw e;
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
    }


    // Generate the map work for this alias_id
    // pass both confirmed and unknown partitions through the map-reduce
    // framework
    Set<Partition> parts = partsList.getPartitions();
    PartitionDesc aliasPartnDesc = null;
    try {
      if (!parts.isEmpty()) {
        aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
      }
    } catch (HiveException e) {
      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
      throw new SemanticException(e.getMessage(), e);
    }


    // The table does not have any partitions
    if (aliasPartnDesc == null) {
      aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx
          .getTopToTable().get(topOp)), null);


    }


    Map<String, String> props = parseCtx.getTopToProps().get(topOp);
    if (props != null) {
      Properties target = aliasPartnDesc.getProperties();
      if (target == null) {
        aliasPartnDesc.setProperties(target = new Properties());
      }
      target.putAll(props);
    }


    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);


    long sizeNeeded = Integer.MAX_VALUE;
    int fileLimit = -1;
    if (parseCtx.getGlobalLimitCtx().isEnable()) {
      long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
          HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
      sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
      // for the optimization that reduce number of input file, we limit number
      // of files allowed. If more than specific number of files have to be
      // selected, we skip this optimization. Since having too many files as
      // inputs can cause unpredictable latency. It's not necessarily to be
      // cheaper.
      fileLimit =
          HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);


      if (sizePerRow <= 0 || fileLimit <= 0) {
        LOG.info("Skip optimization to reduce input size of 'limit'");
        parseCtx.getGlobalLimitCtx().disableOpt();
      } else if (parts.isEmpty()) {
        LOG.info("Empty input: skip limit optimiztion");
      } else {
        LOG.info("Try to reduce input size for 'limit' " +
            "sizeNeeded: " + sizeNeeded +
            "  file limit : " + fileLimit);
      }
    }
    boolean isFirstPart = true;
    boolean emptyInput = true;
    boolean singlePartition = (parts.size() == 1);


    // Track the dependencies for the view. Consider a query like: select * from V;
    // where V is a view of the form: select * from T
    // The dependencies should include V at depth 0, and T at depth 1 (inferred).
    ReadEntity parentViewInfo = getParentViewInfo(alias_id, parseCtx.getViewAliasToInput());


    // The table should also be considered a part of inputs, even if the table is a
    // partitioned table and whether any partition is selected or not
    PlanUtils.addInput(inputs,
        new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo));


    for (Partition part : parts) {
      if (part.getTable().isPartitioned()) {
        PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo));
      } else {
        PlanUtils.addInput(inputs, new ReadEntity(part.getTable(), parentViewInfo));
      }


      // Later the properties have to come from the partition as opposed
      // to from the table in order to support versioning.
      Path[] paths = null;
      sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);


      // Lookup list bucketing pruner
      Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(topOp);
      ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName())
          : null;


      if (sampleDescr != null) {
        assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
        paths = SamplePruner.prune(part, sampleDescr);
        parseCtx.getGlobalLimitCtx().disableOpt();
      } else if (listBucketingPruner != null) {
        assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
        /* Use list bucketing prunner's path. */
        paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
      } else {
        // Now we only try the first partition, if the first partition doesn't
        // contain enough size, we change to normal mode.
        if (parseCtx.getGlobalLimitCtx().isEnable()) {
          if (isFirstPart) {
            long sizeLeft = sizeNeeded;
            ArrayList<Path> retPathList = new ArrayList<Path>();
            SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft,
                fileLimit, retPathList);
            if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
              continue;
            } else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
              LOG.info("Use full input -- first " + fileLimit + " files are more than "
                  + sizeNeeded
                  + " bytes");


              parseCtx.getGlobalLimitCtx().disableOpt();


            } else {
              emptyInput = false;
              paths = new Path[retPathList.size()];
              int index = 0;
              for (Path path : retPathList) {
                paths[index++] = path;
              }
              if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
                // if all files are needed to meet the size limit, we disable
                // optimization. It usually happens for empty table/partition or
                // table/partition with only one file. By disabling this
                // optimization, we can avoid retrying the query if there is
                // not sufficient rows.
                parseCtx.getGlobalLimitCtx().disableOpt();
              }
            }
            isFirstPart = false;
          } else {
            paths = new Path[0];
          }
        }
        if (!parseCtx.getGlobalLimitCtx().isEnable()) {
          paths = part.getPath();
        }
      }


      // is it a partitioned table ?
      if (!part.getTable().isPartitioned()) {
        assert ((tblDir == null) && (tblDesc == null));


        tblDir = paths[0];
        tblDesc = Utilities.getTableDesc(part.getTable());
      } else if (tblDesc == null) {
        tblDesc = Utilities.getTableDesc(part.getTable());
      }


      if (props != null) {
        Properties target = tblDesc.getProperties();
        if (target == null) {
          tblDesc.setProperties(target = new Properties());
        }
        target.putAll(props);
      }


      for (Path p : paths) {
        if (p == null) {
          continue;
        }
        String path = p.toString();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Adding " + path + " of table" + alias_id);
        }


        partDir.add(p);
        try {
          if (part.getTable().isPartitioned()) {
            partDesc.add(Utilities.getPartitionDesc(part));
          }
          else {
            partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part));
          }
        } catch (HiveException e) {
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
      }
    }
    if (emptyInput) {
      parseCtx.getGlobalLimitCtx().disableOpt();
    }


    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();


    if (!local) {
      while (iterPath.hasNext()) {
        assert iterPartnDesc.hasNext();
        String path = iterPath.next().toString();


        PartitionDesc prtDesc = iterPartnDesc.next();


        // Add the path to alias mapping
        if (plan.getPathToAliases().get(path) == null) {
          plan.getPathToAliases().put(path, new ArrayList<String>());
        }
        plan.getPathToAliases().get(path).add(alias_id);
        plan.getPathToPartitionInfo().put(path, prtDesc);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Information added for path " + path);
        }
      }


      assert plan.getAliasToWork().get(alias_id) == null;
      plan.getAliasToWork().put(alias_id, topOp);
    } else {
      // populate local work if needed
      MapredLocalWork localPlan = plan.getMapLocalWork();
      if (localPlan == null) {
        localPlan = new MapredLocalWork(
            new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
            new LinkedHashMap<String, FetchWork>());
      }


      assert localPlan.getAliasToWork().get(alias_id) == null;
      assert localPlan.getAliasToFetchWork().get(alias_id) == null;
      localPlan.getAliasToWork().put(alias_id, topOp);
      if (tblDir == null) {
        tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
        localPlan.getAliasToFetchWork().put(
            alias_id,
            new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc, tblDesc));
      } else {
        localPlan.getAliasToFetchWork().put(alias_id,

View Full Code Here

      if (!table.isPartitioned()) {
        List<String> sortCols = Utilities.getColumnNamesFromSortCols(table.getSortCols());
        List<String> bucketCols = table.getBucketCols();
        return matchBucketSortCols(groupByCols, bucketCols, sortCols);
      } else {
        PrunedPartitionList partsList;
        try {
          partsList = pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp);
        } catch (HiveException e) {
          LOG.error(StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }


        List<Partition> notDeniedPartns = partsList.getNotDeniedPartns();


        GroupByOptimizerSortMatch currentMatch =
            notDeniedPartns.isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH :
                notDeniedPartns.size() > 1 ? GroupByOptimizerSortMatch.PARTIAL_MATCH :
                    GroupByOptimizerSortMatch.COMPLETE_MATCH;

View Full Code Here

      ParseContext pctx,
      Map<Table, List<Index>> indexes)
    throws HiveException {
    Hive hive = Hive.get(pctx.getConf());
    // make sure each partition exists on the index table
    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
    Set<Partition> queryPartitions = queryPartitionList.getPartitions();
    if (queryPartitions == null || queryPartitions.isEmpty()) {
      return null;
    }


    for (Partition part : queryPartitions) {

View Full Code Here

    if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
      ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
      bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner);
    }
    if (aggressive || bypassFilter) {
      PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts);
      if (aggressive || !pruned.hasUnknownPartitions()) {
        bypassFilter &= !pruned.hasUnknownPartitions();
        return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter);
      }
    }
    return null;
  }

View Full Code Here

      return false;
    }


    Table tbl = topToTable.get(tso);
    if (tbl.isPartitioned()) {
      PrunedPartitionList prunedParts;
      try {
          prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
      } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
      }
      List<Partition> partitions = prunedParts.getNotDeniedPartns();
      // Populate the names and order of columns for the first partition of the
      // first table
      if ((pos == 0) && (partitions != null) && (!partitions.isEmpty())) {
        Partition firstPartition = partitions.get(0);
        sortColumnsFirstTable.addAll(firstPartition.getSortCols());
      }


      for (Partition partition : prunedParts.getNotDeniedPartns()) {
        if (!checkSortColsAndJoinCols(partition.getSortCols(),
          joinCols,
          sortColumnsFirstTable)) {
          return false;
        }

View Full Code Here

            // dynamic partition
            confirmedPartns.addAll(tblSpec.partitions);
          }
          if (confirmedPartns.size() > 0) {
            Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
            PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, false);
            GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx, partList);
          } else { // non-partitioned table
            GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx);
          }
        }

View Full Code Here

    String key = tab.getDbName() + "." + tab.getTableName() + ";";


    if (prunerExpr != null) {
      key = key + prunerExpr.getExprString();
    }
    PrunedPartitionList ret = prunedPartitionsMap.get(key);
    if (ret != null) {
      return ret;
    }


    ret = getPartitionsFromServer(tab, prunerExpr, conf, alias);

View Full Code Here

  private static PrunedPartitionList getPartitionsFromServer(Table tab,
      ExprNodeDesc prunerExpr, HiveConf conf, String alias) throws HiveException {
    try {
      if (!tab.isPartitioned()) {
        // If the table is not partitioned, return everything.
        return new PrunedPartitionList(tab, Hive.get().getAllPartitionsForPruner(tab), false);
      }
      LOG.debug("tabname = " + tab.getTableName() + " is partitioned");


      if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE))
          && !hasColumnExpr(prunerExpr)) {
        // If the "strict" mode is on, we have to provide partition pruner for each table.
        throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE
            .getMsg("for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\""));
      }


      if (prunerExpr == null) {
        // This can happen when hive.mapred.mode=nonstrict and there is no predicates at all.
        return new PrunedPartitionList(tab, Hive.get().getAllPartitionsForPruner(tab), false);
      }


      // Remove virtual columns. See javadoc for details.
      prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab));
      // Remove all unknown parts e.g. non-partition columns. See javadoc for details.
      ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
      String oldFilter = prunerExpr.getExprString();
      if (compactExpr == null) {
        // This could happen when hive.mapred.mode=nonstrict and all the predicates
        // are on non-partition columns.
        LOG.debug("Filter " + oldFilter + " was null after compacting");
        return new PrunedPartitionList(tab, Hive.get().getAllPartitionsForPruner(tab), true);
      }


      Set<Partition> partitions = new LinkedHashSet<Partition>();
      boolean hasUnknownPartitions = false;
      String message = Utilities.checkJDOPushDown(tab, compactExpr, null);
      if (message != null) {
        LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '"
            + message + "'"));
        hasUnknownPartitions = pruneBySequentialScan(tab, partitions, prunerExpr, conf);
      } else {
        String filter = compactExpr.getExprString();
        LOG.debug("Filter w/ compacting: " + filter +"; filter w/o compacting: " + oldFilter);
        hasUnknownPartitions = !filter.equals(oldFilter);
        partitions.addAll(Hive.get().getPartitionsByFilter(tab, filter));
      }
      return new PrunedPartitionList(tab, partitions, hasUnknownPartitions);
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
      throw new HiveException(e);
    }

View Full Code Here

        joinKeyOrder = new Integer[keys.size()];
      }


      Table tbl = topToTable.get(tso);
      if (tbl.isPartitioned()) {
        PrunedPartitionList prunedParts;
        try {
          prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
        } catch (HiveException e) {
          // Has to use full name to make sure it does not conflict with
          // com.facebook.presto.hive.shaded.org.apache.commons.lang.StringUtils
          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
          throw new SemanticException(e.getMessage(), e);
        }
        List<Partition> partitions = prunedParts.getNotDeniedPartns();
        // construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
        if (partitions.isEmpty()) {
          if (!alias.equals(baseBigAlias)) {
            tblAliasToNumberOfBucketsInEachPartition.put(alias, Arrays.<Integer> asList());
            tblAliasToBucketedFilePathsInEachPartition.put(alias, new ArrayList<List<String>>());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.hive.ql.parse.PrunedPartitionList

org.apache.hadoop.hive.ql.Driver

org.apache.hadoop.hive.ql.optimizer.AbstractBucketJoinProc

org.apache.hadoop.hive.ql.optimizer.AbstractSMBJoinProc

org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ

org.apache.hadoop.hive.ql.optimizer.BucketingSortingReduceSinkOptimizer$BucketSortReduceSinkProcessor

org.apache.hadoop.hive.ql.optimizer.BucketMapJoinOptimizer$BucketMapjoinOptProc

org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization

org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils

org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1

org.apache.hadoop.hive.ql.optimizer.GlobalLimitOptimizer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.