Package org.apache.commons.math3.stat.descriptive

Examples of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics


    @Test
    public void testMultiplePositions()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        for (int i = 0; i < 500; ++i) {
            int uniques = ThreadLocalRandom.current().nextInt(20000) + 1;

            List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));

            long actual = estimateGroupByCount(values);
            double error = (actual - uniques) * 1.0 / uniques;

            stats.addValue(error);
        }

        assertLessThan(stats.getMean(), 1.0e-2);
        assertLessThan(Math.abs(stats.getStandardDeviation() - ApproximateCountDistinctAggregation.getStandardError()), 1.0e-2);
    }
View Full Code Here


            sample[i] = Math.random();
        }
        // normalize this sample
        double standardizedSample[] = StatUtils.normalize(sample);

        DescriptiveStatistics stats = new DescriptiveStatistics();
        // Add the data from the array
        for (int i = 0; i < length; i++) {
            stats.addValue(standardizedSample[i]);
        }
        // the calculations do have a limited precision   
        double distance = 1E-10;
        // check the mean an standard deviation
        Assert.assertEquals(0.0, stats.getMean(), distance);
        Assert.assertEquals(1.0, stats.getStandardDeviation(), distance);

    }
View Full Code Here

    private Map<String, Double> certifiedValues;

    @Before
    public void setUp() throws IOException {
        descriptives = new DescriptiveStatistics();
        summaries = new SummaryStatistics();
        certifiedValues = new HashMap<String, Double>();

        loadData();
    }
View Full Code Here

        GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
        gls.newSampleData(longley, nObs, 6);
        gls.newCovarianceData(cov.getData());
       
        // Create aggregators for stats measuring model performance
        DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
        DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
       
        // Generate Y vectors for 10000 models, estimate GLS and OLS and
        // Verify that OLS estimates are better
        final int nModels = 10000;
        for (int i = 0; i < nModels; i++) {
           
            // Generate y = xb + u with u cov
            RealVector u = MatrixUtils.createRealVector(gen.nextVector());
            double[] y = u.add(x.operate(b)).toArray();
           
            // Estimate OLS parameters
            ols.newYSampleData(y);
            RealVector olsBeta = ols.calculateBeta();
           
            // Estimate GLS parameters
            gls.newYSampleData(y);
            RealVector glsBeta = gls.calculateBeta();
           
            // Record deviations from "true" beta
            double dist = olsBeta.getDistance(b);
            olsBetaStats.addValue(dist * dist);
            dist = glsBeta.getDistance(b);
            glsBetaStats.addValue(dist * dist);
           
        }
       
        // Verify that GLS is on average more efficient, lower variance
        assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
        assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation())
    }
View Full Code Here

        UnivariateFunction f = new QuinticFunction();
        UnivariateOptimizer optimizer = new BrentOptimizer(1e-11, 1e-14);

        final DescriptiveStatistics[] stat = new DescriptiveStatistics[2];
        for (int i = 0; i < stat.length; i++) {
            stat[i] = new DescriptiveStatistics();
        }

        final double min = -0.75;
        final double max = 0.25;
        final int nSamples = 200;
View Full Code Here

    @Test
    public void testTableSampleBernoulli()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size();

        for (int i = 0; i < 100; i++) {
            List<MaterializedRow> values = computeActual("SELECT orderkey FROM ORDERS TABLESAMPLE BERNOULLI (50)").getMaterializedRows();

            assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
            stats.addValue(values.size() * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

    @Test
    public void testTableSamplePoissonized()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().get(0).getField(0);

        for (int i = 0; i < 100; i++) {
            String value = (String) computeActual("SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) APPROXIMATE AT 95 CONFIDENCE").getMaterializedRows().get(0).getField(0);
            stats.addValue(Long.parseLong(value.split(" ")[0]) * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
    }
View Full Code Here

    @Test
    public void testTableSamplePoissonizedRescaled()
            throws Exception
    {
        DescriptiveStatistics stats = new DescriptiveStatistics();

        long total = (long) computeExpected("SELECT COUNT(*) FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().get(0).getField(0);

        for (int i = 0; i < 100; i++) {
            String value = (String) computeActual("SELECT COUNT(*) FROM orders TABLESAMPLE POISSONIZED (50) RESCALED APPROXIMATE AT 95 CONFIDENCE").getMaterializedRows().get(0).getField(0);
            stats.addValue(Long.parseLong(value.split(" ")[0]) * 1.0 / total);
        }

        double mean = stats.getGeometricMean();
        assertTrue(mean > 0.90 && mean < 1.1, format("Expected sample to be rescaled to ~1.0, but was %s", mean));
        assertTrue(stats.getVariance() > 0, "Samples all had the exact same size");
    }
View Full Code Here

        }

        @Override
        public Block evaluateFinal()
        {
            DescriptiveStatistics statistics = new DescriptiveStatistics();
            for (int i = 0; i < accumulators.size(); i++) {
                BlockCursor cursor = accumulators.get(i).evaluateFinal().cursor();
                checkArgument(cursor.advanceNextPosition(), "accumulator returned no results");
                statistics.addValue(getNumeric(cursor));
            }

            BlockBuilder builder = new BlockBuilder(SINGLE_VARBINARY);
            builder.append(formatApproximateOutput(statistics, confidence));
            return builder.build();
View Full Code Here

        }

        @Override
        public void evaluateFinal(int groupId, BlockBuilder output)
        {
            DescriptiveStatistics statistics = new DescriptiveStatistics();
            for (int i = 0; i < accumulators.size(); i++) {
                BlockBuilder builder = new BlockBuilder(accumulators.get(i).getFinalTupleInfo());
                accumulators.get(i).evaluateFinal(groupId, builder);
                BlockCursor cursor = builder.build().cursor();
                checkArgument(cursor.advanceNextPosition(), "accumulator returned no results");
                statistics.addValue(getNumeric(cursor));
            }

            output.append(formatApproximateOutput(statistics, confidence));
        }
View Full Code Here

TOP

Related Classes of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.