Package org.apache.commons.math.stat.descriptive

Examples of org.apache.commons.math.stat.descriptive.DescriptiveStatistics


    }
  }

  @Test
  public void testCostFromStats() {
    DescriptiveStatistics statOne = new DescriptiveStatistics();
    for (int i =0; i < 100; i++) {
      statOne.addValue(10);
    }
    assertEquals(0, loadBalancer.costFromStats(statOne), 0.01);

    DescriptiveStatistics statTwo = new DescriptiveStatistics();
    for (int i =0; i < 100; i++) {
      statTwo.addValue(0);
    }
    statTwo.addValue(100);
    assertEquals(1, loadBalancer.costFromStats(statTwo), 0.01);

    DescriptiveStatistics statThree = new DescriptiveStatistics();
    for (int i =0; i < 100; i++) {
      statThree.addValue(0);
      statThree.addValue(100);
    }
    assertEquals(0.5, loadBalancer.costFromStats(statThree), 0.01);
  }
View Full Code Here


   *
   * @param clusterState The proposed cluster state
   * @return The cost of region load imbalance.
   */
  double computeSkewLoadCost(Map<ServerName, List<HRegionInfo>> clusterState) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (List<HRegionInfo> regions : clusterState.values()) {
      int size = regions.size();
      stats.addValue(size);
    }
    return costFromStats(stats);
  }
View Full Code Here

  private double computeRegionLoadCost(Map<ServerName, List<HRegionInfo>> clusterState,
                                       RegionLoadCostType costType) {

    if (this.clusterStatus == null || this.loads == null || this.loads.size() == 0) return 0;

    DescriptiveStatistics stats = new DescriptiveStatistics();

    // For every server look at the cost of each region
    for (List<HRegionInfo> regions : clusterState.values()) {
      long cost = 0; //Cost this server has from RegionLoad

      // For each region
      for (HRegionInfo region : regions) {
        // Try and get the region using the regionNameAsString
        List<RegionLoad> rl = loads.get(region.getRegionNameAsString());

        // That could have failed if the RegionLoad is using the other regionName
        if (rl == null) {
          // Try getting the region load using encoded name.
          rl = loads.get(region.getEncodedName());
        }
        // Now if we found a region load get the type of cost that was requested.
        if (rl != null) {
          cost += getRegionLoadCost(rl, costType);
        }
      }

      // Add the total cost to the stats.
      stats.addValue(cost);
    }

    // No return the scaled cost from data held in the stats object.
    return costFromStats(stats);
  }
View Full Code Here

    @Test
    public void testTableSampleBernoulli()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", TupleInfo.SINGLE_LONG).getMaterializedTuples().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedTuple> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)").getMaterializedTuples();

            assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, String.format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

        MetricsRegionSource.NUM_FILES_COMPACTED_DESC),
        this.regionWrapper.getNumFilesCompacted());
    for (Map.Entry<String, DescriptiveStatistics> entry : this.regionWrapper
        .getCoprocessorExecutionStatistics()
        .entrySet()) {
      DescriptiveStatistics ds = entry.getValue();
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "Min: "), ds.getMin() / 1000);
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "Mean: "), ds.getMean() / 1000);
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "Max: "), ds.getMax() / 1000);
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "90th percentile: "), ds
          .getPercentile(90d) / 1000);
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "95th percentile: "), ds
          .getPercentile(95d) / 1000);
      mrb.addGauge(Interns.info(regionNamePrefix + " " + entry.getKey() + " "
          + MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS,
        MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "99th percentile: "), ds
          .getPercentile(99d) / 1000);
    }

  }
View Full Code Here

            sample[i] = Math.random();
        }
        // normalize this sample
        double standardizedSample[] = StatUtils.normalize(sample);

        DescriptiveStatistics stats = new DescriptiveStatistics();
        // Add the data from the array
        for (int i = 0; i < length; i++) {
            stats.addValue(standardizedSample[i]);
        }
        // the calculations do have a limited precision   
        double distance = 1E-10;
        // check the mean an standard deviation
        assertEquals(0.0, stats.getMean(), distance);
        assertEquals(1.0, stats.getStandardDeviation(), distance);

    }
View Full Code Here

        GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
        gls.newSampleData(longley, nObs, 6);
        gls.newCovarianceData(cov.getData());
       
        // Create aggregators for stats measuring model performance
        DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
        DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
       
        // Generate Y vectors for 10000 models, estimate GLS and OLS and
        // Verify that OLS estimates are better
        final int nModels = 10000;
        for (int i = 0; i < nModels; i++) {
           
            // Generate y = xb + u with u cov
            RealVector u = MatrixUtils.createRealVector(gen.nextVector());
            double[] y = u.add(x.operate(b)).getData();
           
            // Estimate OLS parameters
            ols.newYSampleData(y);
            RealVector olsBeta = ols.calculateBeta();
           
            // Estimate GLS parameters
            gls.newYSampleData(y);
            RealVector glsBeta = gls.calculateBeta();
           
            // Record deviations from "true" beta
            double dist = olsBeta.getDistance(b);
            olsBetaStats.addValue(dist * dist);
            dist = glsBeta.getDistance(b);
            glsBetaStats.addValue(dist * dist);
           
        }
       
        // Verify that GLS is on average more efficient, lower variance
        assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
        assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation())
    }
View Full Code Here

     * Test DescriptiveStatistics - implementations that store full array of
     * values and execute multi-pass algorithms
     */
    public void testDescriptiveStatistics() throws Exception {

        DescriptiveStatistics u = new DescriptiveStatistics();

        loadStats("data/PiDigits.txt", u);
        assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-14);
        assertEquals("PiDigits: mean", mean, u.getMean(), 1E-14);

        loadStats("data/Mavro.txt", u);
        assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
        assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);

        loadStats("data/Michelso.txt", u);
        assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-14);
        assertEquals("Michelso: mean", mean, u.getMean(), 1E-14);

        loadStats("data/NumAcc1.txt", u);
        assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
        assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);

        loadStats("data/NumAcc2.txt", u);
        assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
        assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
    }
View Full Code Here

     * @param file
     * @param statistical summary
     */
    private void loadStats(String resource, Object u) throws Exception {

        DescriptiveStatistics d = null;
        SummaryStatistics s = null;
        if (u instanceof DescriptiveStatistics) {
            d = (DescriptiveStatistics) u;
        } else {
            s = (SummaryStatistics) u;
        }
        u.getClass().getDeclaredMethod(
                "clear", new Class[]{}).invoke(u, new Object[]{});
        mean = Double.NaN;
        std = Double.NaN;

        BufferedReader in =
            new BufferedReader(
                    new InputStreamReader(
                            CertifiedDataTest.class.getResourceAsStream(resource)));

        String line = null;

        for (int j = 0; j < 60; j++) {
            line = in.readLine();
            if (j == 40) {
                mean =
                    Double.parseDouble(
                            line.substring(line.lastIndexOf(":") + 1).trim());
            }
            if (j == 41) {
                std =
                    Double.parseDouble(
                            line.substring(line.lastIndexOf(":") + 1).trim());
            }
        }

        line = in.readLine();

        while (line != null) {
            if (d != null) {
                d.addValue(Double.parseDouble(line.trim()));
            else {
                s.addValue(Double.parseDouble(line.trim()));
            }
            line = in.readLine();
        }
View Full Code Here

    private Map<String, Double> certifiedValues;

    @Override
    protected void setUp() throws Exception {
        descriptives = new DescriptiveStatistics();
        summaries = new SummaryStatistics();
        certifiedValues = new HashMap<String, Double>();

        loadData();
    }
View Full Code Here

TOP

Related Classes of org.apache.commons.math.stat.descriptive.DescriptiveStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.