@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
Table tab = qb.getMetaData().getSrcForAlias(alias);
RowResolver rwsch;
// is the table already present
Operator<? extends Serializable> top = topOps.get(alias_id);
Operator<? extends Serializable> dummySel = topSelOps.get(alias_id);
if (dummySel != null) {
top = dummySel;
}
if (top == null) {
rwsch = new RowResolver();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) tab
.getDeserializer().getObjectInspector();
List<? extends StructField> fields = rowObjectInspector
.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
rwsch.put(alias, fields.get(i).getFieldName(), new ColumnInfo(fields
.get(i).getFieldName(), TypeInfoUtils
.getTypeInfoFromObjectInspector(fields.get(i)
.getFieldObjectInspector()), alias, false));
}
} catch (SerDeException e) {
throw new RuntimeException(e);
}
// Hack!! - refactor once the metadata APIs with types are ready
// Finally add the partitioning columns
for (FieldSchema part_col : tab.getPartCols()) {
LOG.trace("Adding partition col: " + part_col);
// TODO: use the right type by calling part_col.getType() instead of
// String.class
rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(),
TypeInfoFactory.stringTypeInfo, alias, true));
}
//put all virutal columns in RowResolver.
Iterator<VirtualColumn> vcs = VirtualColumn.registry.values().iterator();
//use a list for easy cumtomize
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(),
vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
// Create the root of the operator tree
TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
setupStats(tsDesc, qb.getParseInfo(), tab, alias);
top = putOpInsertMap(OperatorFactory.get(tsDesc,
new RowSchema(rwsch.getColumnInfos())), rwsch);
// Add this to the list of top operators - we always start from a table
// scan
topOps.put(alias_id, top);
// Add a mapping from the table scan operator to Table
topToTable.put((TableScanOperator) top, tab);
} else {
rwsch = opParseCtx.get(top).getRowResolver();
top.setChildOperators(null);
}
// check if this table is sampled and needs more than input pruning
Operator<? extends Serializable> tableOp = top;
TableSample ts = qb.getParseInfo().getTabSample(alias);
if (ts != null) {
int num = ts.getNumerator();
int den = ts.getDenominator();
ArrayList<ASTNode> sampleExprs = ts.getExprs();
// TODO: Do the type checking of the expressions
List<String> tabBucketCols = tab.getBucketCols();
int numBuckets = tab.getNumBuckets();
// If there are no sample cols and no bucket cols then throw an error
if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " "
+ tab.getTableName());
}
if (num > den) {
throw new SemanticException(
ErrorMsg.BUCKETED_NUMBERATOR_BIGGER_DENOMINATOR.getMsg() + " "
+ tab.getTableName());
}
// check if a predicate is needed
// predicate is needed if either input pruning is not enough
// or if input pruning is not possible
// check if the sample columns are the same as the table bucket columns
boolean colsEqual = true;
if ((sampleExprs.size() != tabBucketCols.size())
&& (sampleExprs.size() != 0)) {
colsEqual = false;
}
for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
boolean colFound = false;
for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
break;
}
if (((ASTNode) sampleExprs.get(i).getChild(0)).getText()
.equalsIgnoreCase(tabBucketCols.get(j))) {
colFound = true;
}
}
colsEqual = (colsEqual && colFound);
}
// Check if input can be pruned
ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));
// check if input pruning is enough
if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual)
&& (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
// input pruning is enough; add the filter for the optimizer to use it
// later
LOG.info("No need for sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
colsEqual, alias, rwsch, qb.getMetaData(), null);
tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
samplePredicate, true, new sampleDesc(ts.getNumerator(), ts
.getDenominator(), tabBucketCols, true)),
new RowSchema(rwsch.getColumnInfos()), top);
} else {
// need to add filter
// create tableOp to be filterDesc and set as child to 'top'
LOG.info("Need sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols,
colsEqual, alias, rwsch, qb.getMetaData(), null);
tableOp = OperatorFactory.getAndMakeChild(new FilterDesc(
samplePredicate, true),
new RowSchema(rwsch.getColumnInfos()), top);
}
} else {
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
if (testMode) {
String tabName = tab.getTableName();
// has the user explicitly asked not to sample this table
String unSampleTblList = conf
.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
String[] unSampleTbls = unSampleTblList.split(",");
boolean unsample = false;
for (String unSampleTbl : unSampleTbls) {
if (tabName.equalsIgnoreCase(unSampleTbl)) {
unsample = true;
}
}
if (!unsample) {
int numBuckets = tab.getNumBuckets();
// If the input table is bucketed, choose the first bucket
if (numBuckets > 0) {
TableSample tsSample = new TableSample(1, numBuckets);
tsSample.setInputPruning(true);
qb.getParseInfo().setTabSample(alias, tsSample);
ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab
.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
tableOp = OperatorFactory
.getAndMakeChild(new FilterDesc(samplePred, true,
new sampleDesc(tsSample.getNumerator(), tsSample
.getDenominator(), tab.getBucketCols(), true)),
new RowSchema(rwsch.getColumnInfos()), top);
LOG.info("No need for sample filter");
} else {
// The table is not bucketed, add a dummy filter :: rand()
int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);