* pruned partition list. If it is null it will be computed on-the-fly.
public static void setTaskPlan(String alias_id,
Operator<? extends OperatorDesc> topOp, MapredWork plan, boolean local,
GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException {
ParseContext parseCtx = opProcCtx.getParseCtx();
Set<ReadEntity> inputs = opProcCtx.getInputs();
ArrayList<Path> partDir = new ArrayList<Path>();
ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
Path tblDir = null;
TableDesc tblDesc = null;
PrunedPartitionList partsList = pList;
if (partsList == null) {
try {
partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp);
if (partsList == null) {
partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
alias_id, parseCtx.getPrunedPartitions());
parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList);
} catch (SemanticException e) {
throw e;
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
// Generate the map work for this alias_id
Set<Partition> parts = null;
// pass both confirmed and unknown partitions through the map-reduce
// framework
parts = partsList.getConfirmedPartns();
PartitionDesc aliasPartnDesc = null;
try {
if (!parts.isEmpty()) {
aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next());
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
// The table does not have any partitions
if (aliasPartnDesc == null) {
aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx
.getTopToTable().get(topOp)), null);
plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);
long sizeNeeded = Integer.MAX_VALUE;
int fileLimit = -1;
if (parseCtx.getGlobalLimitCtx().isEnable()) {
long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
// for the optimization that reduce number of input file, we limit number
// of files allowed. If more than specific number of files have to be
// selected, we skip this optimization. Since having too many files as
// inputs can cause unpredictable latency. It's not necessarily to be
// cheaper.
fileLimit =
HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
if (sizePerRow <= 0 || fileLimit <= 0) {
LOG.info("Skip optimization to reduce input size of 'limit'");
} else if (parts.isEmpty()) {
LOG.info("Empty input: skip limit optimiztion");
} else {
LOG.info("Try to reduce input size for 'limit' " +
"sizeNeeded: " + sizeNeeded +
" file limit : " + fileLimit);
boolean isFirstPart = true;
boolean emptyInput = true;
boolean singlePartition = (parts.size() == 1);
for (Partition part : parts) {
if (part.getTable().isPartitioned()) {
inputs.add(new ReadEntity(part));
} else {
inputs.add(new ReadEntity(part.getTable()));
// Later the properties have to come from the partition as opposed
// to from the table in order to support versioning.
Path[] paths = null;
sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp);
// Lookup list bucketing pruner
Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(topOp);
ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName())
: null;
if (sampleDescr != null) {
assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
paths = SamplePruner.prune(part, sampleDescr);
} else if (listBucketingPruner != null) {
assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
/* Use list bucketing prunner's path. */
paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
} else {
// Now we only try the first partition, if the first partition doesn't
// contain enough size, we change to normal mode.
if (parseCtx.getGlobalLimitCtx().isEnable()) {
if (isFirstPart) {
long sizeLeft = sizeNeeded;
ArrayList<Path> retPathList = new ArrayList<Path>();
SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft,
fileLimit, retPathList);
if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
} else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
LOG.info("Use full input -- first " + fileLimit + " files are more than "
+ sizeNeeded
+ " bytes");
} else {
emptyInput = false;
paths = new Path[retPathList.size()];
int index = 0;
for (Path path : retPathList) {
paths[index++] = path;
if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
// if all files are needed to meet the size limit, we disable
// optimization. It usually happens for empty table/partition or
// table/partition with only one file. By disabling this
// optimization, we can avoid retrying the query if there is
// not sufficient rows.
isFirstPart = false;
} else {
paths = new Path[0];
if (!parseCtx.getGlobalLimitCtx().isEnable()) {
paths = part.getPath();
// is it a partitioned table ?
if (!part.getTable().isPartitioned()) {
assert ((tblDir == null) && (tblDesc == null));
tblDir = paths[0];
tblDesc = Utilities.getTableDesc(part.getTable());
} else if (tblDesc == null) {
tblDesc = Utilities.getTableDesc(part.getTable());
for (Path p : paths) {
if (p == null) {
String path = p.toString();
if (LOG.isDebugEnabled()) {
LOG.debug("Adding " + path + " of table" + alias_id);
try {
partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part));
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
if (emptyInput) {
Iterator<Path> iterPath = partDir.iterator();
Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();