// Nothing to do for StatsTask here.
}
private int aggregateStats() {
StatsAggregator statsAggregator = null;
try {
// Stats setup:
Warehouse wh = new Warehouse(conf);
FileSystem fileSys;
FileStatus[] fileStatus;
if (!this.getWork().getNoStatsAggregator()) {
String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
StatsFactory.setImplementation(statsImplementationClass, conf);
statsAggregator = StatsFactory.getStatsAggregator();
// manufacture a StatsAggregator
if (!statsAggregator.connect(conf)) {
throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
}
}
TableStatistics tblStats = new TableStatistics();
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
boolean tableStatsExist = this.existStats(parameters);
for (String statType : supportedStats) {
if (parameters.containsKey(statType)) {
tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
}
}
if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
}
List<Partition> partitions = getPartitionsList();
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
if (partitions == null) {
// non-partitioned tables:
if (!tableStatsExist && atomic) {
return 0;
}
Path tablePath = wh.getTablePath(db.getDatabase(table.getDbName()), table.getTableName());
fileSys = tablePath.getFileSystem(conf);
fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);
tblStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
long tableSize = 0L;
for (int i = 0; i < fileStatus.length; i++) {
tableSize += fileStatus[i].getLen();
}
tblStats.setStat(StatsSetupConst.TOTAL_SIZE, tableSize);
// In case of a non-partitioned table, the key for stats temporary store is "rootDir"
if (statsAggregator != null) {
updateStats(collectableStats, tblStats, statsAggregator, parameters,
work.getAggKey(), atomic);
statsAggregator.cleanUp(work.getAggKey());
}
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
parameters = tPart.getParameters();
boolean hasStats = this.existStats(parameters);
if (!hasStats && atomic) {
continue;
}
Map<String, Long> currentValues = new HashMap<String, Long>();
for (String statType : supportedStats) {
Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
: 0L;
currentValues.put(statType, val);
}
//
// get the new partition stats
//
PartitionStatistics newPartStats = new PartitionStatistics();
// In that case of a partition, the key for stats temporary store is
// "rootDir/[dynamic_partition_specs/]%"
String partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec());
LOG.info("Stats aggregator : " + partitionID);
if (statsAggregator != null) {
updateStats(collectableStats, newPartStats, statsAggregator,
parameters, partitionID, atomic);
} else {
for (String statType : collectableStats) {
newPartStats.setStat(statType, currentValues.get(statType));
}
}
fileSys = partn.getPartitionPath().getFileSystem(conf);
fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(), 1, fileSys);
newPartStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
long partitionSize = 0L;
for (int i = 0; i < fileStatus.length; i++) {
partitionSize += fileStatus[i].getLen();
}
newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, partitionSize);
if (hasStats) {
PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
tblStats.updateStats(oldPartStats, newPartStats);
} else {
tblStats.addPartitionStats(newPartStats);
}
//
// update the metastore
//
for (String statType : supportedStats) {
long statValue = newPartStats.getStat(statType);
if (statValue >= 0) {
parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
}
}
tPart.setParameters(parameters);
String tableFullName = table.getDbName() + "." + table.getTableName();
db.alterPartition(tableFullName, new Partition(table, tPart));
if (statsAggregator != null) {
statsAggregator.cleanUp(partitionID);
}
console.printInfo("Partition " + tableFullName + partn.getSpec() +
" stats: [" + newPartStats.toString() + ']');
}
}
//
// write table stats to metastore
//
parameters = tTable.getParameters();
for (String statType : supportedStats) {
parameters.put(statType, Long.toString(tblStats.getStat(statType)));
}
parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
tTable.setParameters(parameters);
String tableFullName = table.getDbName() + "." + table.getTableName();
db.alterTable(tableFullName, new Table(tTable));
console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');
} catch (Exception e) {
// return 0 since StatsTask should not fail the whole job
console.printInfo("[Warning] could not update stats.",
"Failed with exception " + e.getMessage() + "\n"
+ StringUtils.stringifyException(e));
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection();
}
}
// StatsTask always return 0 so that the whole job won't fail
return 0;
}