return "STATS";
}
private int aggregateStats() {
StatsAggregator statsAggregator = null;
int ret = 0;
try {
// Stats setup:
Warehouse wh = new Warehouse(conf);
if (!this.getWork().getNoStatsAggregator()) {
String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
StatsFactory.setImplementation(statsImplementationClass, conf);
if (work.isNoScanAnalyzeCommand()){
// initialize stats publishing table for noscan which has only stats task
// the rest of MR task following stats task initializes it in ExecDriver.java
StatsPublisher statsPublisher = StatsFactory.getStatsPublisher();
if (!statsPublisher.init(conf)) { // creating stats table if not exists
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw
new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
statsAggregator = StatsFactory.getStatsAggregator();
// manufacture a StatsAggregator
if (!statsAggregator.connect(conf)) {
throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
}
}
TableStatistics tblStats = new TableStatistics();
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
boolean tableStatsExist = this.existStats(parameters);
for (String statType : supportedStats) {
if (parameters.containsKey(statType)) {
tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
}
}
if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
}
List<Partition> partitions = getPartitionsList();
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
int maxPrefixLength = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH);
if (partitions == null) {
// non-partitioned tables:
if (!tableStatsExist && atomic) {
return 0;
}
long[] summary = summary(conf, table);
tblStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
tblStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);
// In case of a non-partitioned table, the key for stats temporary store is "rootDir"
if (statsAggregator != null) {
String aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength);
updateStats(collectableStats, tblStats, statsAggregator, parameters,
aggKey, atomic);
statsAggregator.cleanUp(aggKey);
}
// The collectable stats for the aggregator needs to be cleared.
// For eg. if a file is being loaded, the old number of rows are not valid
else if (work.isClearAggregatorStats()) {
for (String statType : collectableStats) {
if (parameters.containsKey(statType)) {
tblStats.setStat(statType, 0L);
}
}
}
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
parameters = tPart.getParameters();
boolean hasStats = this.existStats(parameters);
if (!hasStats && atomic) {
continue;
}
Map<String, Long> currentValues = new HashMap<String, Long>();
for (String statType : supportedStats) {
Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
: 0L;
currentValues.put(statType, val);
}
//
// get the new partition stats
//
PartitionStatistics newPartStats = new PartitionStatistics();
// In that case of a partition, the key for stats temporary store is
// "rootDir/[dynamic_partition_specs/]%"
String partitionID = Utilities.getHashedStatsPrefix(
work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength);
LOG.info("Stats aggregator : " + partitionID);
if (statsAggregator != null) {
updateStats(collectableStats, newPartStats, statsAggregator,
parameters, partitionID, atomic);
statsAggregator.cleanUp(partitionID);
} else {
for (String statType : collectableStats) {
// The collectable stats for the aggregator needs to be cleared.
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
if (parameters.containsKey(statType)) {
newPartStats.setStat(statType, 0L);
}
}
else {
newPartStats.setStat(statType, currentValues.get(statType));
}
}
}
long[] summary = summary(conf, partn);
newPartStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);
if (hasStats) {
PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
tblStats.updateStats(oldPartStats, newPartStats);
} else {
tblStats.addPartitionStats(newPartStats);
}
//
// update the metastore
//
for (String statType : supportedStats) {
long statValue = newPartStats.getStat(statType);
if (statValue >= 0) {
parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
}
}
tPart.setParameters(parameters);
String tableFullName = table.getDbName() + "." + table.getTableName();
db.alterPartition(tableFullName, new Partition(table, tPart));
console.printInfo("Partition " + tableFullName + partn.getSpec() +
" stats: [" + newPartStats.toString() + ']');
}
}
//
// write table stats to metastore
//
parameters = tTable.getParameters();
for (String statType : supportedStats) {
parameters.put(statType, Long.toString(tblStats.getStat(statType)));
}
parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
tTable.setParameters(parameters);
String tableFullName = table.getDbName() + "." + table.getTableName();
db.alterTable(tableFullName, new Table(tTable));
console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.",
"Failed with exception " + e.getMessage() + "\n"
+ StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection();
}
}
// The return value of 0 indicates success,
// anything else indicates failure
return ret;