// we can only process MapReduce tasks to check input size
if (!context.getCurrentTask().isMapRedTask()) {
return null;
}
MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
// get potential reentrant index queries from each index
Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
// make sure we have an index on the table being scanned
TableDesc tblDesc = operator.getTableDesc();
Table srcTable = pctx.getTopToTable().get(operator);
if (indexes == null || indexes.get(srcTable) == null) {
return null;
}
List<Index> tableIndexes = indexes.get(srcTable);
Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
for (Index indexOnTable : tableIndexes) {
if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
List<Index> newType = new ArrayList<Index>();
newType.add(indexOnTable);
indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
} else {
indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
}
}
// choose index type with most indexes of the same type on the table
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
List<Index> bestIndexes = indexesByType.values().iterator().next();
for (List<Index> indexTypes : indexesByType.values()) {
if (bestIndexes.size() < indexTypes.size()) {
bestIndexes = indexTypes;
}
}
// rewrite index queries for the chosen index type
HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
tmpQueryContext.setQueryPartitions(queryPartitions);
rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
if (indexTasks != null && indexTasks.size() > 0) {
queryContexts.put(bestIndexes.get(0), tmpQueryContext);
}
// choose an index rewrite to use
if (queryContexts.size() > 0) {
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
Index chosenIndex = queryContexts.keySet().iterator().next();
// modify the parse context to use indexing
// we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
// prepare the map reduce job to use indexing
MapredWork work = currentTask.getWork();
work.setInputformat(queryContext.getIndexInputFormat());
work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
// modify inputs based on index query
Set<ReadEntity> inputs = pctx.getSemanticInputs();
inputs.addAll(queryContext.getAdditionalSemanticInputs());