Package org.apache.hadoop.hive.ql.stats

Examples of org.apache.hadoop.hive.ql.stats.StatsPublisher


      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          statsPublisher.init(job); // creating stats table if not exists
        }
      }

      this.createTmpDirs();
View Full Code Here


      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          statsPublisher.init(job); // creating stats table if not exists
        }
      }

      this.createTmpDirs();
View Full Code Here

        getConf().getTableInfo());
  }

  private void publishStats() {
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);

    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      return;
    }
    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      return;
    }

    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec() != null ? conf.getStaticSpec() : "";

    for (String fspKey : valToPaths.keySet()) {
      FSPaths fspValue = valToPaths.get(fspKey);
      String key;

      // construct the key(fileID) to insert into the intermediate stats table
      if (fspKey == "") {
        // for non-partitioned/static partitioned table, the key for temp storage is
        // common key prefix + static partition spec + taskID
        key = conf.getStatsAggPrefix() + spSpec + taskID;
      } else {
        // for partitioned table, the key is
        // common key prefix + static partition spec + DynamicPartSpec + taskID
        key = conf.getStatsAggPrefix() + spSpec + fspKey + Path.SEPARATOR + taskID;
      }
      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      statsPublisher.publishStat(key, statsToPublish);
    }
    statsPublisher.closeConnection();
  }
View Full Code Here

  public void testStatsPublisherOneStat() throws Throwable {
    try {
      System.out.println("StatsPublisher - one stat published per key - aggregating matching key");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000", stats));
      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("200", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("1000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("400", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize1);

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - one stat published per key - aggregating matching key - OK");
    } catch (Throwable e) {
View Full Code Here

  public void testStatsPublisher() throws Throwable {
    try {
      System.out.println("StatsPublisher - basic functionality");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher(
          (JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));
      // statsAggregator.cleanUp("file_0000");
      // assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      // aggregate non-existent stats
      String rowsX = statsAggregator.aggregateStats("file_00002", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rowsX);
      String usizeX = statsAggregator.aggregateStats("file_00002",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usizeX);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - basic functionality - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

  public void testStatsPublisherMultipleUpdates() throws Throwable {
    try {
      System.out.println("StatsPublisher - multiple updates");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // update which should not take any effect
      fillStatMap("190", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // update that should take effect
      fillStatMap("500", "5000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("600", "6000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("1100", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("11000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - multiple updates - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

    try {
      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics)");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      // update which should not take any effect - plus the map published is a supset of supported
      // stats
      fillStatMap("190", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // nothing changed
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // changed + the rawDataSize size was overwriten !!!
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("1000", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics) - OK");
    } catch (Throwable e) {
View Full Code Here

  public void testStatsAggregatorCleanUp() throws Throwable {
    try {
      System.out.println("StatsAggregator - clean-up");

      // instantiate stats publisher
      StatsPublisher statsPublisher = Utilities.getStatsPublisher((JobConf) conf);
      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // cleanUp
      assertTrue(statsAggregator.cleanUp("file_00000"));

      // now clean-up just for one key
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      // this should still be in the table
      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsAggregator - clean-up - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

  private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();

    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);

    if (statsPublisher == null) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
      }
      return;
    }

    if (!statsPublisher.connect(hconf)) {
      // just return, stats gathering should not block the main query
      LOG.error("StatsPublishing error: cannot connect to database");
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
      }
      return;
    }

    String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
    String spSpec = conf.getStaticSpec();

    int maxKeyLength = conf.getMaxStatsKeyPrefixLength();
    boolean taskIndependent = statsPublisher instanceof StatsCollectionTaskIndependent;

    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
      String fspKey = entry.getKey();     // DP/LB
      FSPaths fspValue = entry.getValue();

      // for bucketed tables, hive.optimize.sort.dynamic.partition optimization
      // adds the taskId to the fspKey.
      if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
        taskID = Utilities.getTaskIdFromFilename(fspKey);
        // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
        // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
        // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
        // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
        // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
        // not be retrieved from staging table and hence not aggregated. To avoid this issue
        // we will remove the taskId from the key which is redundant anyway.
        fspKey = fspKey.split(taskID)[0];
      }

      // split[0] = DP, split[1] = LB
      String[] split = splitKey(fspKey);
      String dpSpec = split[0];
      String lbSpec = split[1];

      String prefix;
      String postfix=null;
      if (taskIndependent) {
        // key = "database.table/SP/DP/"LB/
        prefix = conf.getTableInfo().getTableName();
      } else {
        // key = "prefix/SP/DP/"LB/taskID/
        prefix = conf.getStatsAggPrefix();
        postfix = Utilities.join(lbSpec, taskID);
      }
      prefix = Utilities.join(prefix, spSpec, dpSpec);
      prefix = Utilities.getHashedStatsPrefix(prefix, maxKeyLength);

      String key = Utilities.join(prefix, postfix);

      Map<String, String> statsToPublish = new HashMap<String, String>();
      for (String statType : fspValue.stat.getStoredStats()) {
        statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
      }
      if (!statsPublisher.publishStat(key, statsToPublish)) {
        // The original exception is lost.
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
          throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
        }
      }
    }
    if (!statsPublisher.closeConnection()) {
      // The original exception is lost.
      // Not changing the interface to maintain backward compatibility
      if (isStatsReliable) {
        throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
      }
View Full Code Here

      // make this client wait if job tracker is not behaving well.
      Throttle.checkJobTracker(job, LOG);

      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        StatsFactory factory = StatsFactory.newFactory(job);
        if (factory != null) {
          statsPublisher = factory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsPublisher

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.