Package org.apache.mahout.math.stats

Source Code of org.apache.mahout.math.stats.OnlineSummarizerTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.math.stats;

import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.MahoutTestCase;
import org.apache.mahout.math.jet.random.AbstractContinousDistribution;
import org.apache.mahout.math.jet.random.Gamma;
import org.junit.Test;

import java.util.Random;

public final class OnlineSummarizerTest extends MahoutTestCase {

  @Test
  public void testCount() {
    OnlineSummarizer x = new OnlineSummarizer();
    assertEquals(0, x.getCount());
    x.add(1);
    assertEquals(1, x.getCount());

    for (int i = 2; i < 110; i++) {
      x.add(i);
      assertEquals(i, x.getCount());
    }
  }

  @Test
  public void testStats() {
    // the reference limits here were derived using a numerical simulation where I took
    // 10,000 samples from the distribution in question and computed the stats from that
    // sample to get min, 25%-ile, median and so on.  I did this 1000 times to get 5% and
    // 95% confidence limits for those values.

    // symmetrical, well behaved
    System.out.printf("normal\n");
    check(normal(10000),
      -4.417246, -3.419809,
      -0.6972919, -0.6519899,
      -0.02056658, 0.02176474,
      0.6503866, 0.6983311,
      4.419809, 5.417246,
      -0.01515753, 0.01592942,
      0.988395, 1.011883);

    // asymmetrical, well behaved.  The range for the maximum was fudged slightly to all this to pass.
    System.out.printf("exp\n");
    check(exp(10000),
            -3e-4, 3.278763e-04,
            0.2783866, 0.298,
            0.6765024, 0.7109463,
            1.356929, 1.414761,
            8, 20,
            0.983805, 1.015920,
            0.977162, 1.022093
    );

    // asymmetrical, wacko distribution where mean/median is about 200
    System.out.printf("gamma\n");
    check(gamma(10000, 0.1),
      -5e-30, 5e-30,                                    // minimum
      3.8e-6, 8.6e-6,                                   // 25th %-ile
      0.004847959, 0.007234259,                         // median
      0.3074556, 0.4049404,                             // 75th %-ile
      45, 100,                                          // maximum
      0.9, 1.1,                                         // mean
      2.8, 3.5);                                        // standard dev

  }

  private static void check(OnlineSummarizer x, double... values) {
    for (int i = 0; i < 5; i++) {
      checkRange("quartile " + i, x.getQuartile(i), values[2 * i], values[2 * i + 1]);
    }
    assertEquals(x.getQuartile(2), x.getMedian(), 0);

    checkRange("mean", x.getMean(), values[10], values[11]);
    checkRange("sd", x.getSD(), values[12], values[13]);
  }

  private static void checkRange(String msg, double v, double low, double high) {
    if (v < low || v > high) {
      fail("Wanted " + msg + " to be in range [" + low + ',' + high + "] but got " + v);
    }
  }

  private static OnlineSummarizer normal(int n) {
    OnlineSummarizer x = new OnlineSummarizer();
    Random gen = RandomUtils.getRandom(1L);
    for (int i = 0; i < n; i++) {
      x.add(gen.nextGaussian());
    }
    return x;
  }

  private static OnlineSummarizer exp(int n) {
    OnlineSummarizer x = new OnlineSummarizer();
    Random gen = RandomUtils.getRandom(1L);
    for (int i = 0; i < n; i++) {
      x.add(-Math.log1p(-gen.nextDouble()));
    }
    return x;
  }

  private static OnlineSummarizer gamma(int n, double shape) {
    OnlineSummarizer x = new OnlineSummarizer();
    Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution gamma = new Gamma(shape, shape, gen);
    for (int i = 0; i < n; i++) {
      x.add(gamma.nextDouble());
    }
    return x;
  }

}

TOP

Related Classes of org.apache.mahout.math.stats.OnlineSummarizerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.