Examples of org.apache.hadoop.hive.ql.stats.StatsPublisher

org.apache.hadoop.hive.ql.stats.StatsPublisher
An interface for any possible implementation for publishing statics.

      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }


    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }

View Full Code Here

      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }


    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }

View Full Code Here


  private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();


    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
      }
      return;
    }


    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
      }
      return;
    }


    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec();


    int maxKeyLength = conf.getMaxStatsKeyPrefixLength();
    boolean taskIndependent = statsPublisher instanceof StatsCollectionTaskIndependent;


    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
      String fspKey = entry.getKey();     // DP/LB
      FSPaths fspValue = entry.getValue();


      // for bucketed tables, hive.optimize.sort.dynamic.partition optimization
      // adds the taskId to the fspKey.
      if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
        taskID = Utilities.getTaskIdFromFilename(fspKey);
        // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
        // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
        // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
        // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
        // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
        // not be retrieved from staging table and hence not aggregated. To avoid this issue
        // we will remove the taskId from the key which is redundant anyway.
        fspKey = fspKey.split(taskID)[0];
      }


      // split[0] = DP, split[1] = LB
      String[] split = splitKey(fspKey);
      String dpSpec = split[0];
      String lbSpec = split[1];


      String prefix;
      String postfix=null;
      if (taskIndependent) {
        // key = "database.table/SP/DP/"LB/
        prefix = conf.getTableInfo().getTableName();
      } else {
        // key = "prefix/SP/DP/"LB/taskID/
        prefix = conf.getStatsAggPrefix();
        postfix = Utilities.join(lbSpec, taskID);
      }
      prefix = Utilities.join(prefix, spSpec, dpSpec);
      prefix = Utilities.getHashedStatsPrefix(prefix, maxKeyLength);


      String key = Utilities.join(prefix, postfix);


      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      if (!statsPublisher.publishStat(key, statsToPublish)) {
        // The original exception is lost.
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
          throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
        }
      }
    }
    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
      }

View Full Code Here

      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

   *
   * @throws HiveException
   */
  private void publishStats() throws HiveException {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }


    if (!statsPublisher.connect(jc)) {
      // should fail since stats gathering is main purpose of the job
      LOG.error("StatsPublishing error: cannot connect to database");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
    }


    int maxPrefixLength = StatsFactory.getMaxPrefixLength(jc);
    // construct key used to store stats in intermediate db
    String key = Utilities.getHashedStatsPrefix(statsAggKeyPrefix, maxPrefixLength);
    if (!(statsPublisher instanceof CounterStatsPublisher)) {
      String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc));
      key = Utilities.join(key, taskID);
    }


    // construct statistics to be stored
    Map<String, String> statsToPublish = new HashMap<String, String>();
    statsToPublish.put(StatsSetupConst.RAW_DATA_SIZE, Long.toString(uncompressedFileSize));
    statsToPublish.put(StatsSetupConst.ROW_COUNT, Long.toString(rowNo));


    if (!statsPublisher.publishStat(key, statsToPublish)) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
    }


    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
    }
  }

View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

    if (factory == null) {
      throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }
    // initialize stats publishing table for noscan which has only stats task
    // the rest of MR task following stats task initializes it in ExecDriver.java
    StatsPublisher statsPublisher = factory.getStatsPublisher();
    if (!statsPublisher.init(conf)) { // creating stats table if not exists
      throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
    }
    Task sourceTask = getWork().getSourceTask();
    if (sourceTask == null) {
      throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg());

View Full Code Here


  private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();


    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
      }
      return;
    }


    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
      }
      return;
    }


    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec();


    int maxKeyLength = conf.getMaxStatsKeyPrefixLength();
    boolean taskIndependent = statsPublisher instanceof StatsCollectionTaskIndependent;


    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
      String fspKey = entry.getKey();     // DP/LB
      FSPaths fspValue = entry.getValue();


      // for bucketed tables, hive.optimize.sort.dynamic.partition optimization
      // adds the taskId to the fspKey.
      if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
        taskID = Utilities.getTaskIdFromFilename(fspKey);
        // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
        // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
        // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
        // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
        // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
        // not be retrieved from staging table and hence not aggregated. To avoid this issue
        // we will remove the taskId from the key which is redundant anyway.
        fspKey = fspKey.split(taskID)[0];
      }


      // split[0] = DP, split[1] = LB
      String[] split = splitKey(fspKey);
      String dpSpec = split[0];
      String lbSpec = split[1];


      String prefix;
      String postfix=null;
      if (taskIndependent) {
        // key = "database.table/SP/DP/"LB/
        prefix = conf.getTableInfo().getTableName();
      } else {
        // key = "prefix/SP/DP/"LB/taskID/
        prefix = conf.getStatsAggPrefix();
        postfix = Utilities.join(lbSpec, taskID);
      }
      prefix = Utilities.join(prefix, spSpec, dpSpec);
      prefix = Utilities.getHashedStatsPrefix(prefix, maxKeyLength);


      String key = Utilities.join(prefix, postfix);


      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      if (!statsPublisher.publishStat(key, statsToPublish)) {
        // The original exception is lost.
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
          throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
        }
      }
    }
    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
      }

View Full Code Here

      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }

View Full Code Here

   *
   * @throws HiveException
   */
  private void publishStats() throws HiveException {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);


    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
    }


    if (!statsPublisher.connect(jc)) {
      // should fail since stats gathering is main purpose of the job
      LOG.error("StatsPublishing error: cannot connect to database");
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
    }


    // construct key used to store stats in intermediate db
    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc));
    String keyPrefix = Utilities.getHashedStatsPrefix(
        statsAggKeyPrefix, HiveConf.getIntVar(jc, ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    String key = keyPrefix + taskID;


    // construct statistics to be stored
    Map<String, String> statsToPublish = new HashMap<String, String>();
    statsToPublish.put(StatsSetupConst.RAW_DATA_SIZE, Long.toString(uncompressedFileSize));
    statsToPublish.put(StatsSetupConst.ROW_COUNT, Long.toString(rowNo));


    if (!statsPublisher.publishStat(key, statsToPublish)) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
    }


    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
    }
  }

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsPublisher

org.apache.hadoop.hive.ql.exec.ExecDriver

org.apache.hadoop.hive.ql.exec.FileSinkOperator

org.apache.hadoop.hive.ql.exec.mr.ExecDriver

org.apache.hadoop.hive.ql.exec.StatsTask

org.apache.hadoop.hive.ql.exec.TableScanOperator

org.apache.hadoop.hive.ql.exec.TestStatsPublisherEnhanced

org.apache.hadoop.hive.ql.exec.tez.DagUtils

org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper

org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.