Package org.apache.hadoop.hive.ql.stats

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator


      Warehouse wh = new Warehouse(conf);
      FileSystem fileSys;
      FileStatus[] fileStatus;

      // manufacture a StatsAggregator
      StatsAggregator statsAggregator;
      String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
      StatsFactory.setImplementation(statsImplementationClass, conf);
      statsAggregator = StatsFactory.getStatsAggregator();
      if (!statsAggregator.connect(conf)) {
        // this should not fail the whole job, return 0 so that the job won't fail.
        console.printInfo("[WARNING] Could not update table/partition level stats.",
            "StatsAggregator.connect() failed: stats class = " +
            statsImplementationClass);
        return 0;
      }


      TableStatistics tblStats = new TableStatistics();

      //
      // For partitioned table get the old table statistics for incremental update
      //
      if (table.isPartitioned()) {
        org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
        Map<String, String> parameters = tTable.getParameters();
        if (parameters.containsKey(StatsSetupConst.ROW_COUNT)) {
          tblStats.setNumRows(Long.parseLong(parameters.get(StatsSetupConst.ROW_COUNT)));
        }
        if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
          tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
        }
        if (parameters.containsKey(StatsSetupConst.NUM_FILES)) {
          tblStats.setNumFiles(Integer.parseInt(parameters.get(StatsSetupConst.NUM_FILES)));
        }
        if (parameters.containsKey(StatsSetupConst.TOTAL_SIZE)) {
          tblStats.setSize(Long.parseLong(parameters.get(StatsSetupConst.TOTAL_SIZE)));
        }
      }

      List<Partition> partitions = getPartitionsList();

      if (partitions == null) {
        // non-partitioned tables:

        Path tablePath = wh.getDefaultTablePath(table.getDbName(), table.getTableName());
        fileSys = tablePath.getFileSystem(conf);
        fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);
        tblStats.setNumFiles(fileStatus.length);
        long tableSize = 0L;
        for (int i = 0; i < fileStatus.length; i++) {
          tableSize += fileStatus[i].getLen();
        }
        tblStats.setSize(tableSize);

        // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
        String rows = statsAggregator.aggregateStats(work.getAggKey(), StatsSetupConst.ROW_COUNT);
        if (rows != null) {
          tblStats.setNumRows(Long.parseLong(rows));
        }
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        for (Partition partn : partitions) {
          //
          // get the new partition stats
          //
          PartitionStatistics newPartStats = new PartitionStatistics();

          // In that case of a partition, the key for stats temporary store is "rootDir/[dynamic_partition_specs/]%"
          String partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec());

          String rows = statsAggregator.aggregateStats(partitionID, StatsSetupConst.ROW_COUNT);
          if (rows != null) {
            newPartStats.setNumRows(Long.parseLong(rows));
          }

          fileSys = partn.getPartitionPath().getFileSystem(conf);
          fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(), 1, fileSys);
          newPartStats.setNumFiles(fileStatus.length);

          long partitionSize = 0L;
          for (int i = 0; i < fileStatus.length; i++) {
            partitionSize += fileStatus[i].getLen();
          }
          newPartStats.setSize(partitionSize);

          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          Map<String, String> parameters = tPart.getParameters();

          boolean hasStats =
            parameters.containsKey(StatsSetupConst.NUM_FILES) ||
            parameters.containsKey(StatsSetupConst.ROW_COUNT) ||
            parameters.containsKey(StatsSetupConst.TOTAL_SIZE);

          int  nf = parameters.containsKey(StatsSetupConst.NUM_FILES) ?
                    Integer.parseInt(parameters.get(StatsSetupConst.NUM_FILES)) :
                    0;
          long nr = parameters.containsKey(StatsSetupConst.ROW_COUNT) ?
                    Long.parseLong(parameters.get(StatsSetupConst.ROW_COUNT)) :
                    0L;
          long sz = parameters.containsKey(StatsSetupConst.TOTAL_SIZE) ?
                    Long.parseLong(parameters.get(StatsSetupConst.TOTAL_SIZE)) :
                    0L;
          if (hasStats) {
            PartitionStatistics oldPartStats = new PartitionStatistics(nf, nr, sz);
            tblStats.updateStats(oldPartStats, newPartStats);
          } else {
            tblStats.addPartitionStats(newPartStats);
          }

          //
          // update the metastore
          //
          parameters.put(StatsSetupConst.ROW_COUNT, Long.toString(newPartStats.getNumRows()));
          parameters.put(StatsSetupConst.NUM_FILES, Integer.toString(newPartStats.getNumFiles()));
          parameters.put(StatsSetupConst.TOTAL_SIZE, Long.toString(newPartStats.getSize()));

          tPart.setParameters(parameters);
          db.alterPartition(table.getTableName(), new Partition(table, tPart));

          console.printInfo("Partition " + table.getTableName() + partn.getSpec() +
              " stats: [" + newPartStats.toString() + ']');
        }
      }

      statsAggregator.closeConnection();

      //
      // write table stats to metastore
      //
      org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
View Full Code Here


  /**
   * This method is static as it is called from the shutdown hook at the ExecDriver.
   */
  public static void cleanUp(String jobID, Configuration config) {
    StatsAggregator statsAggregator;
    String statsImplementationClass = HiveConf.getVar(config, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, config);
    statsAggregator = StatsFactory.getStatsAggregator();
    if (statsAggregator.connect(config)) {
      statsAggregator.cleanUp(jobID + Path.SEPARATOR); // Adding the path separator to avoid an Id being a prefix of another ID
    }
  }
View Full Code Here

    return "STATS";
  }

  private int aggregateStats() {

    StatsAggregator statsAggregator = null;
    int ret = 0;

    try {
      // Stats setup:
      Warehouse wh = new Warehouse(conf);

      if (!this.getWork().getNoStatsAggregator()) {
        String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
        StatsFactory.setImplementation(statsImplementationClass, conf);
        if (work.isNoScanAnalyzeCommand()){
          // initialize stats publishing table for noscan which has only stats task
          // the rest of MR task following stats task initializes it in ExecDriver.java
          StatsPublisher statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(conf)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
        statsAggregator = StatsFactory.getStatsAggregator();
        // manufacture a StatsAggregator
        if (!statsAggregator.connect(conf)) {
          throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
        }
      }

      TableStatistics tblStats = new TableStatistics();

      org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
      Map<String, String> parameters = tTable.getParameters();

      boolean tableStatsExist = this.existStats(parameters);

      for (String statType : supportedStats) {
        if (parameters.containsKey(statType)) {
          tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
        }
      }

      if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
        tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
      }

      List<Partition> partitions = getPartitionsList();
      boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
      int maxPrefixLength = HiveConf.getIntVar(conf,
          HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH);

      if (partitions == null) {
        // non-partitioned tables:
        if (!tableStatsExist && atomic) {
          return 0;
        }
        long[] summary = summary(conf, table);
        tblStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);

        // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
        if (statsAggregator != null) {
          String aggKey = Utilities.getHashedStatsPrefix(work.getAggKey(), maxPrefixLength);
          updateStats(collectableStats, tblStats, statsAggregator, parameters,
              aggKey, atomic);
          statsAggregator.cleanUp(aggKey);
        }
        // The collectable stats for the aggregator needs to be cleared.
        // For eg. if a file is being loaded, the old number of rows are not valid
        else if (work.isClearAggregatorStats()) {
          for (String statType : collectableStats) {
            if (parameters.containsKey(statType)) {
              tblStats.setStat(statType, 0L);
            }
          }
        }
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        for (Partition partn : partitions) {
          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          parameters = tPart.getParameters();

          boolean hasStats = this.existStats(parameters);
          if (!hasStats && atomic) {
            continue;
          }

          Map<String, Long> currentValues = new HashMap<String, Long>();
          for (String statType : supportedStats) {
            Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
                : 0L;
            currentValues.put(statType, val);
          }

          //
          // get the new partition stats
          //
          PartitionStatistics newPartStats = new PartitionStatistics();

          // In that case of a partition, the key for stats temporary store is
          // "rootDir/[dynamic_partition_specs/]%"
          String partitionID = Utilities.getHashedStatsPrefix(
              work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength);

          LOG.info("Stats aggregator : " + partitionID);

          if (statsAggregator != null) {
            updateStats(collectableStats, newPartStats, statsAggregator,
                parameters, partitionID, atomic);
            statsAggregator.cleanUp(partitionID);
          } else {
            for (String statType : collectableStats) {
              // The collectable stats for the aggregator needs to be cleared.
              // For eg. if a file is being loaded, the old number of rows are not valid
              if (work.isClearAggregatorStats()) {
                if (parameters.containsKey(statType)) {
                  newPartStats.setStat(statType, 0L);
                }
              }
              else {
                newPartStats.setStat(statType, currentValues.get(statType));
              }
            }
          }

          long[] summary = summary(conf, partn);
          newPartStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);

          if (hasStats) {
            PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
            tblStats.updateStats(oldPartStats, newPartStats);
          } else {
            tblStats.addPartitionStats(newPartStats);
          }

          //
          // update the metastore
          //
          for (String statType : supportedStats) {
            long statValue = newPartStats.getStat(statType);
            if (statValue >= 0) {
              parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
            }
          }

          tPart.setParameters(parameters);
          String tableFullName = table.getDbName() + "." + table.getTableName();
          db.alterPartition(tableFullName, new Partition(table, tPart));

          console.printInfo("Partition " + tableFullName + partn.getSpec() +
              " stats: [" + newPartStats.toString() + ']');
        }

      }

      //
      // write table stats to metastore
      //
      parameters = tTable.getParameters();
      for (String statType : supportedStats) {
        parameters.put(statType, Long.toString(tblStats.getStat(statType)));
      }
      parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
      tTable.setParameters(parameters);

      String tableFullName = table.getDbName() + "." + table.getTableName();

      db.alterTable(tableFullName, new Table(tTable));

      console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');

    } catch (Exception e) {
      console.printInfo("[Warning] could not update stats.",
          "Failed with exception " + e.getMessage() + "\n"
              + StringUtils.stringifyException(e));

      // Fail the query if the stats are supposed to be reliable
      if (work.isStatsReliable()) {
        ret = 1;
      }
    } finally {
      if (statsAggregator != null) {
        statsAggregator.closeConnection();
      }
    }
    // The return value of 0 indicates success,
    // anything else indicates failure
    return ret;
View Full Code Here

  /**
   * This method is static as it is called from the shutdown hook at the ExecDriver.
   */
  public static void cleanUp(String jobID, Configuration config) {
    StatsAggregator statsAggregator;
    String statsImplementationClass = HiveConf.getVar(config, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, config);
    statsAggregator = StatsFactory.getStatsAggregator();
    if (statsAggregator.connect(config)) {
      statsAggregator.cleanUp(jobID + Path.SEPARATOR); // Adding the path separator to avoid an Id
                                                       // being a prefix of another ID
      statsAggregator.closeConnection();
    }
  }
View Full Code Here

    // Nothing to do for StatsTask here.
  }

  private int aggregateStats() {

    StatsAggregator statsAggregator = null;

    try {
      // Stats setup:
      Warehouse wh = new Warehouse(conf);
      FileSystem fileSys;
      FileStatus[] fileStatus;

      if (!this.getWork().getNoStatsAggregator()) {
        String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
        StatsFactory.setImplementation(statsImplementationClass, conf);
        statsAggregator = StatsFactory.getStatsAggregator();
        // manufacture a StatsAggregator
        if (!statsAggregator.connect(conf)) {
          throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
        }
      }

      TableStatistics tblStats = new TableStatistics();

      org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
      Map<String, String> parameters = tTable.getParameters();

      boolean tableStatsExist = this.existStats(parameters);

      for (String statType : supportedStats) {
        if (parameters.containsKey(statType)) {
          tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
        }
      }

      if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
        tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
      }

      List<Partition> partitions = getPartitionsList();
      boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);

      if (partitions == null) {
        // non-partitioned tables:
        if (!tableStatsExist && atomic) {
          return 0;
        }
        Path tablePath = wh.getTablePath(db.getDatabase(table.getDbName()), table.getTableName());
        fileSys = tablePath.getFileSystem(conf);
        fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);

        tblStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
        long tableSize = 0L;
        for (int i = 0; i < fileStatus.length; i++) {
          tableSize += fileStatus[i].getLen();
        }
        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, tableSize);

        // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
        if (statsAggregator != null) {
          updateStats(collectableStats, tblStats, statsAggregator, parameters,
              work.getAggKey(), atomic);
          statsAggregator.cleanUp(work.getAggKey());
        }
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        for (Partition partn : partitions) {
          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          parameters = tPart.getParameters();

          boolean hasStats = this.existStats(parameters);
          if (!hasStats && atomic) {
            continue;
          }

          Map<String, Long> currentValues = new HashMap<String, Long>();
          for (String statType : supportedStats) {
            Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
                : 0L;
            currentValues.put(statType, val);
          }

          //
          // get the new partition stats
          //
          PartitionStatistics newPartStats = new PartitionStatistics();

          // In that case of a partition, the key for stats temporary store is
          // "rootDir/[dynamic_partition_specs/]%"
          String partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec());

          LOG.info("Stats aggregator : " + partitionID);

          if (statsAggregator != null) {
            updateStats(collectableStats, newPartStats, statsAggregator,
                parameters, partitionID, atomic);
          } else {
            for (String statType : collectableStats) {
              newPartStats.setStat(statType, currentValues.get(statType));
            }
          }

          fileSys = partn.getPartitionPath().getFileSystem(conf);
          fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(), 1, fileSys);
          newPartStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);

          long partitionSize = 0L;
          for (int i = 0; i < fileStatus.length; i++) {
            partitionSize += fileStatus[i].getLen();
          }
          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, partitionSize);

          if (hasStats) {
            PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
            tblStats.updateStats(oldPartStats, newPartStats);
          } else {
            tblStats.addPartitionStats(newPartStats);
          }

          //
          // update the metastore
          //
          for (String statType : supportedStats) {
            long statValue = newPartStats.getStat(statType);
            if (statValue >= 0) {
              parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
            }
          }

          tPart.setParameters(parameters);
          String tableFullName = table.getDbName() + "." + table.getTableName();
          db.alterPartition(tableFullName, new Partition(table, tPart));

          if (statsAggregator != null) {
            statsAggregator.cleanUp(partitionID);
          }

          console.printInfo("Partition " + tableFullName + partn.getSpec() +
              " stats: [" + newPartStats.toString() + ']');
        }

      }

      //
      // write table stats to metastore
      //
      parameters = tTable.getParameters();
      for (String statType : supportedStats) {
        parameters.put(statType, Long.toString(tblStats.getStat(statType)));
      }
      parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
      tTable.setParameters(parameters);

      String tableFullName = table.getDbName() + "." + table.getTableName();

      db.alterTable(tableFullName, new Table(tTable));

      console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');

    } catch (Exception e) {
      // return 0 since StatsTask should not fail the whole job
      console.printInfo("[Warning] could not update stats.",
          "Failed with exception " + e.getMessage() + "\n"
              + StringUtils.stringifyException(e));
    } finally {
      if (statsAggregator != null) {
        statsAggregator.closeConnection();
      }
    }
    // StatsTask always return 0 so that the whole job won't fail
    return 0;
  }
View Full Code Here

  /**
   * This method is static as it is called from the shutdown hook at the ExecDriver.
   */
  public static void cleanUp(String jobID, Configuration config) {
    StatsAggregator statsAggregator;
    String statsImplementationClass = HiveConf.getVar(config, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, config);
    statsAggregator = StatsFactory.getStatsAggregator();
    if (statsAggregator.connect(config)) {
      statsAggregator.cleanUp(jobID + Path.SEPARATOR); // Adding the path separator to avoid an Id
                                                       // being a prefix of another ID
      statsAggregator.closeConnection();
    }
  }
View Full Code Here

    // Nothing to do for StatsTask here.
  }

  private int aggregateStats() {

    StatsAggregator statsAggregator = null;
    int ret = 0;

    try {
      // Stats setup:
      Warehouse wh = new Warehouse(conf);
      FileSystem fileSys;
      FileStatus[] fileStatus;

      if (!this.getWork().getNoStatsAggregator()) {
        String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
        StatsFactory.setImplementation(statsImplementationClass, conf);
        statsAggregator = StatsFactory.getStatsAggregator();
        // manufacture a StatsAggregator
        if (!statsAggregator.connect(conf)) {
          throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
        }
      }

      TableStatistics tblStats = new TableStatistics();

      org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
      Map<String, String> parameters = tTable.getParameters();

      boolean tableStatsExist = this.existStats(parameters);

      for (String statType : supportedStats) {
        if (parameters.containsKey(statType)) {
          tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
        }
      }

      if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
        tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
      }

      List<Partition> partitions = getPartitionsList();
      boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);

      if (partitions == null) {
        // non-partitioned tables:
        if (!tableStatsExist && atomic) {
          return 0;
        }
        Path tablePath = wh.getTablePath(db.getDatabase(table.getDbName()), table.getTableName());
        fileSys = tablePath.getFileSystem(conf);
        fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);

        tblStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
        long tableSize = 0L;
        for (int i = 0; i < fileStatus.length; i++) {
          tableSize += fileStatus[i].getLen();
        }
        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, tableSize);

        // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
        if (statsAggregator != null) {
          updateStats(collectableStats, tblStats, statsAggregator, parameters,
              work.getAggKey(), atomic);
          statsAggregator.cleanUp(work.getAggKey());
        }
        // The collectable stats for the aggregator needs to be cleared.
        // For eg. if a file is being loaded, the old number of rows are not valid
        else if (work.isClearAggregatorStats()) {
          for (String statType : collectableStats) {
            if (parameters.containsKey(statType)) {
              tblStats.setStat(statType, 0L);
            }
          }
        }
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        for (Partition partn : partitions) {
          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          parameters = tPart.getParameters();

          boolean hasStats = this.existStats(parameters);
          if (!hasStats && atomic) {
            continue;
          }

          Map<String, Long> currentValues = new HashMap<String, Long>();
          for (String statType : supportedStats) {
            Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
                : 0L;
            currentValues.put(statType, val);
          }

          //
          // get the new partition stats
          //
          PartitionStatistics newPartStats = new PartitionStatistics();

          // In that case of a partition, the key for stats temporary store is
          // "rootDir/[dynamic_partition_specs/]%"
          String partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec());

          LOG.info("Stats aggregator : " + partitionID);

          if (statsAggregator != null) {
            updateStats(collectableStats, newPartStats, statsAggregator,
                parameters, partitionID, atomic);
          } else {
            for (String statType : collectableStats) {
              // The collectable stats for the aggregator needs to be cleared.
              // For eg. if a file is being loaded, the old number of rows are not valid
              if (work.isClearAggregatorStats()) {
                if (parameters.containsKey(statType)) {
                  newPartStats.setStat(statType, 0L);
                }
              }
              else {
                newPartStats.setStat(statType, currentValues.get(statType));
              }
            }
          }

          fileSys = partn.getPartitionPath().getFileSystem(conf);
          fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(), 1, fileSys);
          newPartStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);

          long partitionSize = 0L;
          for (int i = 0; i < fileStatus.length; i++) {
            partitionSize += fileStatus[i].getLen();
          }
          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, partitionSize);

          if (hasStats) {
            PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
            tblStats.updateStats(oldPartStats, newPartStats);
          } else {
            tblStats.addPartitionStats(newPartStats);
          }

          //
          // update the metastore
          //
          for (String statType : supportedStats) {
            long statValue = newPartStats.getStat(statType);
            if (statValue >= 0) {
              parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
            }
          }

          tPart.setParameters(parameters);
          String tableFullName = table.getDbName() + "." + table.getTableName();
          db.alterPartition(tableFullName, new Partition(table, tPart));

          if (statsAggregator != null) {
            statsAggregator.cleanUp(partitionID);
          }

          console.printInfo("Partition " + tableFullName + partn.getSpec() +
              " stats: [" + newPartStats.toString() + ']');
        }

      }

      //
      // write table stats to metastore
      //
      parameters = tTable.getParameters();
      for (String statType : supportedStats) {
        parameters.put(statType, Long.toString(tblStats.getStat(statType)));
      }
      parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
      tTable.setParameters(parameters);

      String tableFullName = table.getDbName() + "." + table.getTableName();

      db.alterTable(tableFullName, new Table(tTable));

      console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');

    } catch (Exception e) {
      console.printInfo("[Warning] could not update stats.",
          "Failed with exception " + e.getMessage() + "\n"
              + StringUtils.stringifyException(e));

      // Fail the query if the stats are supposed to be reliable
      if (work.isStatsReliable()) {
        ret = 1;
      }
    } finally {
      if (statsAggregator != null) {
        statsAggregator.closeConnection();
      }
    }
    // The return value of 0 indicates success,
    // anything else indicates failure
    return ret;
View Full Code Here

  /**
   * This method is static as it is called from the shutdown hook at the ExecDriver.
   */
  public static void cleanUp(String jobID, Configuration config) {
    StatsAggregator statsAggregator;
    String statsImplementationClass = HiveConf.getVar(config, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, config);
    statsAggregator = StatsFactory.getStatsAggregator();
    if (statsAggregator.connect(config)) {
      statsAggregator.cleanUp(jobID + Path.SEPARATOR); // Adding the path separator to avoid an Id
                                                       // being a prefix of another ID
      statsAggregator.closeConnection();
    }
  }
View Full Code Here

    stats = new HashMap<String, String>();
  }

  @Override
  protected void tearDown() {
    StatsAggregator sa = StatsFactory.getStatsAggregator();
    assertNotNull(sa);
    assertTrue(sa.connect(conf));
    assertTrue(sa.cleanUp("file_0"));
    assertTrue(sa.closeConnection());
  }
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000", stats));
      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("200", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("1000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("400", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize1);

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - one stat published per key - aggregating matching key - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsAggregator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.