Package org.apache.hadoop.hive.ql.stats

Examples of org.apache.hadoop.hive.ql.stats.StatsAggregator


      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));
      // statsAggregator.cleanUp("file_0000");
      // assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      // aggregate non-existent stats
      String rowsX = statsAggregator.aggregateStats("file_00002", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rowsX);
      String usizeX = statsAggregator.aggregateStats("file_00002",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usizeX);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - basic functionality - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here


      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // update which should not take any effect
      fillStatMap("190", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // update that should take effect
      fillStatMap("500", "5000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("600", "6000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("1100", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("11000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - multiple updates - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      // update which should not take any effect - plus the map published is a supset of supported
      // stats
      fillStatMap("190", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // nothing changed
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("2000", usize0);

      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("500", "");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // changed + the rawDataSize size was overwriten !!!
      rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("1000", rows0);
      usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - (multiple updates + publishing subset of supported statistics) - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // cleanUp
      assertTrue(statsAggregator.cleanUp("file_00000"));

      // now clean-up just for one key
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usize0);

      // this should still be in the table
      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsAggregator - clean-up - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

    // Nothing to do for StatsTask here.
  }

  private int aggregateStats() {

    StatsAggregator statsAggregator = null;

    try {
      // Stats setup:
      Warehouse wh = new Warehouse(conf);
      FileSystem fileSys;
      FileStatus[] fileStatus;

      if (!this.getWork().getNoStatsAggregator()) {
        String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
        StatsFactory.setImplementation(statsImplementationClass, conf);
        statsAggregator = StatsFactory.getStatsAggregator();
        // manufacture a StatsAggregator
        if (!statsAggregator.connect(conf)) {
          throw new HiveException("StatsAggregator connect failed " + statsImplementationClass);
        }
      }

      TableStatistics tblStats = new TableStatistics();

      org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
      Map<String, String> parameters = tTable.getParameters();

      boolean tableStatsExist = this.existStats(parameters);

      for (String statType : supportedStats) {
        if (parameters.containsKey(statType)) {
          tblStats.setStat(statType, Long.parseLong(parameters.get(statType)));
        }
      }

      if (parameters.containsKey(StatsSetupConst.NUM_PARTITIONS)) {
        tblStats.setNumPartitions(Integer.parseInt(parameters.get(StatsSetupConst.NUM_PARTITIONS)));
      }

      List<Partition> partitions = getPartitionsList();
      boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);

      if (partitions == null) {
        // non-partitioned tables:
        if (!tableStatsExist && atomic) {
          return 0;
        }
        Path tablePath = wh.getTablePath(db.getDatabase(table.getDbName()), table.getTableName());
        fileSys = tablePath.getFileSystem(conf);
        fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);

        tblStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
        long tableSize = 0L;
        for (int i = 0; i < fileStatus.length; i++) {
          tableSize += fileStatus[i].getLen();
        }
        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, tableSize);

        // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
        if (statsAggregator != null) {
          updateStats(collectableStats, tblStats, statsAggregator, parameters,
              work.getAggKey(), atomic);
          statsAggregator.cleanUp(work.getAggKey());
        }
      } else {
        // Partitioned table:
        // Need to get the old stats of the partition
        // and update the table stats based on the old and new stats.
        for (Partition partn : partitions) {
          //
          // get the old partition stats
          //
          org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
          parameters = tPart.getParameters();

          boolean hasStats = this.existStats(parameters);
          if (!hasStats && atomic) {
            continue;
          }

          Map<String, Long> currentValues = new HashMap<String, Long>();
          for (String statType : supportedStats) {
            Long val = parameters.containsKey(statType) ? Long.parseLong(parameters.get(statType))
                : 0L;
            currentValues.put(statType, val);
          }

          //
          // get the new partition stats
          //
          PartitionStatistics newPartStats = new PartitionStatistics();

          // In that case of a partition, the key for stats temporary store is
          // "rootDir/[dynamic_partition_specs/]%"
          String partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec());

          LOG.info("Stats aggregator : " + partitionID);

          if (statsAggregator != null) {
            updateStats(collectableStats, newPartStats, statsAggregator,
                parameters, partitionID, atomic);
          } else {
            for (String statType : collectableStats) {
              newPartStats.setStat(statType, currentValues.get(statType));
            }
          }

          fileSys = partn.getPartitionPath().getFileSystem(conf);
          fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(), 1, fileSys);
          newPartStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);

          long partitionSize = 0L;
          for (int i = 0; i < fileStatus.length; i++) {
            partitionSize += fileStatus[i].getLen();
          }
          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, partitionSize);

          if (hasStats) {
            PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
            tblStats.updateStats(oldPartStats, newPartStats);
          } else {
            tblStats.addPartitionStats(newPartStats);
          }

          //
          // update the metastore
          //
          for (String statType : supportedStats) {
            long statValue = newPartStats.getStat(statType);
            if (statValue >= 0) {
              parameters.put(statType, Long.toString(newPartStats.getStat(statType)));
            }
          }

          tPart.setParameters(parameters);
          String tableFullName = table.getDbName() + "." + table.getTableName();
          db.alterPartition(tableFullName, new Partition(table, tPart));

          if (statsAggregator != null) {
            statsAggregator.cleanUp(partitionID);
          }

          console.printInfo("Partition " + tableFullName + partn.getSpec() +
              " stats: [" + newPartStats.toString() + ']');
        }

      }

      //
      // write table stats to metastore
      //
      parameters = tTable.getParameters();
      for (String statType : supportedStats) {
        parameters.put(statType, Long.toString(tblStats.getStat(statType)));
      }
      parameters.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(tblStats.getNumPartitions()));
      tTable.setParameters(parameters);

      String tableFullName = table.getDbName() + "." + table.getTableName();

      db.alterTable(tableFullName, new Table(tTable));

      console.printInfo("Table " + tableFullName + " stats: [" + tblStats.toString() + ']');

    } catch (Exception e) {
      // return 0 since StatsTask should not fail the whole job
      console.printInfo("[Warning] could not update stats.",
          "Failed with exception " + e.getMessage() + "\n"
              + StringUtils.stringifyException(e));
    } finally {
      if (statsAggregator != null) {
        statsAggregator.closeConnection();
      }
    }
    // StatsTask always return 0 so that the whole job won't fail
    return 0;
  }
View Full Code Here

  /**
   * This method is static as it is called from the shutdown hook at the ExecDriver.
   */
  public static void cleanUp(String jobID, Configuration config) {
    StatsAggregator statsAggregator;
    String statsImplementationClass = HiveConf.getVar(config, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, config);
    statsAggregator = StatsFactory.getStatsAggregator();
    if (statsAggregator.connect(config)) {
      statsAggregator.cleanUp(jobID + Path.SEPARATOR); // Adding the path separator to avoid an Id
                                                       // being a prefix of another ID
      statsAggregator.closeConnection();
    }
  }
View Full Code Here

    statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
    StatsFactory.setImplementation(statsImplementationClass, conf);

    stats = new HashMap<String, String>();
    StatsAggregator sa = StatsFactory.getStatsAggregator();
    sa.connect(conf);
    sa.cleanUp("file_0");
    sa.closeConnection();
  }
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000", stats));
      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001", stats));


      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("200", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("1000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("400", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize1);

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out
          .println("StatsPublisher - one stat published per key - aggregating matching key - OK");
    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));
      // statsAggregator.cleanUp("file_0000");
      // assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("900", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("7000", usize1);

      // aggregate non-existent stats
      String rowsX = statsAggregator.aggregateStats("file_00002", StatsSetupConst.ROW_COUNT);
      assertEquals("0", rowsX);
      String usizeX = statsAggregator.aggregateStats("file_00002",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("0", usizeX);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - basic functionality - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

      assertNotNull(statsPublisher);
      assertTrue(statsPublisher.init(conf));
      assertTrue(statsPublisher.connect(conf));

      // instantiate stats aggregator
      StatsAggregator statsAggregator = StatsFactory.getStatsAggregator();
      assertNotNull(statsAggregator);
      assertTrue(statsAggregator.connect(conf));

      // publish stats
      fillStatMap("200", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("300", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      fillStatMap("400", "3000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("500", "4000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // update which should not take any effect
      fillStatMap("190", "1000");
      assertTrue(statsPublisher.publishStat("file_00000_a", stats));
      fillStatMap("290", "2000");
      assertTrue(statsPublisher.publishStat("file_00000_b", stats));

      // update that should take effect
      fillStatMap("500", "5000");
      assertTrue(statsPublisher.publishStat("file_00001_a", stats));
      fillStatMap("600", "6000");
      assertTrue(statsPublisher.publishStat("file_00001_b", stats));

      // aggregate existing stats
      String rows0 = statsAggregator.aggregateStats("file_00000", StatsSetupConst.ROW_COUNT);
      assertEquals("500", rows0);
      String usize0 = statsAggregator.aggregateStats("file_00000",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("3000", usize0);

      String rows1 = statsAggregator.aggregateStats("file_00001", StatsSetupConst.ROW_COUNT);
      assertEquals("1100", rows1);
      String usize1 = statsAggregator.aggregateStats("file_00001",
          StatsSetupConst.RAW_DATA_SIZE);
      assertEquals("11000", usize1);

      assertTrue(statsAggregator.cleanUp("file_0000"));

      // close connections
      assertTrue(statsPublisher.closeConnection());
      assertTrue(statsAggregator.closeConnection());

      System.out.println("StatsPublisher - multiple updates - OK");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.stats.StatsAggregator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.