Package org.apache.commons.math3.stat.descriptive

Examples of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics


    @Test
    public void testTableSampleBernoulli()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(SINGLE_LONG)).getMaterializedTuples().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedTuple> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)").getMaterializedTuples();

            assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here


    @Test
    public void testTableSamplePoissonized()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(SINGLE_LONG)).getMaterializedTuples().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedTuple> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE POISSONIZED (50)").getMaterializedTuples();
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

    @Test
    public void testTableSamplePoissonizedRescaled()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(TupleInfo.SINGLE_LONG)).getMaterializedTuples().get(0).getField(0);

        for (int i = 0; i < 100; i++) {
            long value = (long) computeActual("SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED").getMaterializedTuples().get(0).getField(0);
            stats.addValue(value * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean));
        assertTrue(stats.getVariance() > 0, "Samples all had the exact same size");
    }
View Full Code Here

    @Test
    public void testTableSampleBernoulli()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(SINGLE_LONG)).getMaterializedTuples().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedTuple> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)").getMaterializedTuples();

            assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

    @Test
    public void testTableSamplePoissonized()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(SINGLE_LONG)).getMaterializedTuples().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedTuple> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE POISSONIZED (50)").getMaterializedTuples();
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

    @Test
    public void testTableSamplePoissonizedRescaled()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(TupleInfo.SINGLE_LONG)).getMaterializedTuples().get(0).getField(0);

        for (int i = 0; i < 100; i++) {
            long value = (long) computeActual("SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED").getMaterializedTuples().get(0).getField(0);
            stats.addValue(value * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean));
        assertTrue(stats.getVariance() > 0, "Samples all had the exact same size");
    }
View Full Code Here

    @Test
    public void testMultiplePositions()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        for (int i = 0; i < 500; ++i) {
            int uniques = ThreadLocalRandom.current().nextInt(20000) + 1;

            List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));

            long actual = estimateGroupByCount(values);
            double error = (actual - uniques) * 1.0 / uniques;

            stats.addValue(error);
        }

        assertLessThan(stats.getMean(), 1.0e-2);
        assertLessThan(Math.abs(stats.getStandardDeviation() - ApproximateCountDistinctAggregation.getStandardError()), 1.0e-2);
    }
View Full Code Here

    } else {
      start = 0;
      end = (int) dataSeries.getMaxX();
    }
   
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = start; i<=end; i++) {
      stats.addValue(dataSeries.getY(i).doubleValue());
    }
    return stats.getMean();
   
  }
View Full Code Here

        for (int i = 0; i < k; ++i) {
            sumImpl[i]     = new Sum();
            sumSqImpl[i]   = new SumOfSquares();
            minImpl[i]     = new Min();
            maxImpl[i]     = new Max();
            sumLogImpl[i= new SumOfLogs();
            geoMeanImpl[i] = new GeometricMean();
            meanImpl[i]    = new Mean();
        }

        covarianceImpl =
View Full Code Here

     * @param checker Convergence checker.
     */
    protected BaseOptimizer(ConvergenceChecker<PAIR> checker) {
        this.checker = checker;

        evaluations = new Incrementor(0, new MaxEvalCallback());
        iterations = new Incrementor(0, new MaxIterCallback());
    }
View Full Code Here

TOP

Related Classes of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.