Package miscellaneous

Source Code of miscellaneous.StatisticsUtility

package miscellaneous;

import java.util.Collection;
import java.util.List;

import logs.Logs;

import org.apache.commons.math.MathException;
import org.apache.commons.math.distribution.NormalDistributionImpl;
import org.apache.commons.math.distribution.TDistributionImpl;
import org.apache.commons.math.stat.descriptive.moment.Mean;
import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math.stat.descriptive.rank.Median;

/*************************************************************************
*  Compilation:  javac StdRandom.java
*  Execution:    java StdRandom
*
*  A library of static methods to generate random numbers from
*  different distributions (bernoulli, uniform, gaussian,
*  discrete, and exponential). Also includes a method for
*  shuffling an array.
*
*  % java StdRandom 5
*  90 26.36076 false 8.79269 0
*  13 18.02210 false 9.03992 1
*  58 56.41176 true  8.80501 0
*  29 16.68454 false 8.90827 0
*  85 86.24712 true  8.95228 0
*
*
*  Remark
*  ------
*    - Uses Math.random() which generates a pseudorandom real number
*      in [0, 1)
*
*    - This library does not allow you to set the pseudorandom number
*      seed. See java.util.Random.
*
*    - See http://www.honeylocust.com/RngPack/ for an industrial
*      strength random number generator in Java.
*
*************************************************************************/

/**
* <i>Standard random</i>. This class provides methods for generating random number from various distributions.
* <p>
* For additional documentation, see <a href="http://www.cs.princeton.edu/introcs/22library">Section 2.2</a> of <i>Introduction to Programming in Java: An Interdisciplinary Approach</i> by Robert Sedgewick and Kevin Wayne.
*/
public class StatisticsUtility {
 
  /**
   * Return real number uniformly in [0, 1).
   */
  public static double uniform()
  {
    return Math.random();
  }
 
  /**
   * Return real number uniformly in [a, b).
   */
  public static double uniform(double a, double b)
  {
    return a + Math.random() * (b - a);
  }
 
  /**
   * Return an integer uniformly between 0 and N-1.
   */
  public static int uniform(int N)
  {
    return (int) (Math.random() * N);
  }
 
  /**
   * Return a boolean, which is true with probability p, and false otherwise.
   */
  public static boolean bernoulli(double p)
  {
    return Math.random() < p;
  }
 
  /**
   * Return a boolean, which is true with probability .5, and false otherwise.
   */
  public static boolean bernoulli()
  {
    return bernoulli(0.5);
  }
 
  /**
   * Return a real number with a standard Gaussian distribution.
   */
  public static double gaussian()
  {
    // use the polar form of the Box-Muller transform
    double r, x, y;
    do
    {
      x = uniform(-1.0, 1.0);
      y = uniform(-1.0, 1.0);
      r = x * x + y * y;
    }
    while (r >= 1 || r == 0);
    return x * Math.sqrt(-2 * Math.log(r) / r);
   
    // Remark: y * Math.sqrt(-2 * Math.log(r) / r)
    // is an independent random gaussian
  }
 
  /**
   * Return a real number from a gaussian distribution with given mean and stddev
   */
  public static double gaussian(double mean, double stddev)
  {
    return mean + stddev * gaussian();
  }
 
  /**
   * Return an integer with a geometric distribution with mean 1/p.
   */
  public static int geometric(double p)
  {
    // using algorithm given by Knuth
    return (int) Math.ceil(Math.log(uniform()) / Math.log(1.0 - p));
  }
 
  /**
   * Return an integer with a Poisson distribution with mean lambda.
   */
  public static int poisson(double lambda)
  {
    // using algorithm given by Knuth
    // see http://en.wikipedia.org/wiki/Poisson_distribution
    int k = 0;
    double p = 1.0;
    double L = Math.exp(-lambda);
    do
    {
      k++;
      p *= uniform();
    }
    while (p >= L);
    return k - 1;
  }
 
  /**
   * Return a real number with a Pareto distribution with parameter alpha.
   */
  public static double pareto(double alpha)
  {
    return Math.pow(1 - uniform(), -1.0 / alpha) - 1.0;
  }
 
  /**
   * Return a real number with a Cauchy distribution.
   */
  public static double cauchy()
  {
    return Math.tan(Math.PI * (uniform() - 0.5));
  }
 
  /**
   * Return a number from a discrete distribution: i with probability a[i].
   */
  public static int discrete(double[] a)
  {
    // precondition: sum of array entries equals 1
    double r = Math.random();
    double sum = 0.0;
    for (int i = 0; i < a.length; i++)
    {
      sum = sum + a[i];
      if(sum >= r)
      {
        return i;
      }
    }
    assert (false);
    return -1;
  }
 
  /**
   * Return a real number from an exponential distribution with rate lambda.
   */
  public static double exp(double lambda)
  {
    return -Math.log(1 - Math.random()) / lambda;
  }
 
  /**
   * Return the mean value of the list
   *
   * @param dList
   * @return
   */
  public static double mean(Double[] dList)
  {
    double result = 0;
    for (double d : dList)
    {
      result = result + d / dList.length;
    }
    return result;
  }
 
  /**
   * Return the mean value of the list
   *
   * @param dList
   * @return
   */
  public static double mean(Collection<Double> dList)
  {
    double result = 0;
    for (double d : dList)
    {
      result = result + d / dList.size();
    }
    return result;
  }
 
  /**
   * Return the min of the list
   *
   * @param dList
   * @return
   */
  public static double min(Collection<Double> dList)
  {
    double result = Double.MAX_VALUE;
    for (double d : dList)
    {
      if(result > d)
      {
        result = d;
      }
    }
    return result;
  }
 
  /**
   * Return the min of the list
   *
   * @param dList
   * @return
   */
  public static double max(Collection<Double> dList)
  {
    double result = Double.MIN_VALUE;
    for (double d : dList)
    {
      if(result < d)
      {
        result = d;
      }
    }
    return result;
  }
 
  /**
   * Return the index of the first value closest to the specified value in the list
   *
   * @param dList
   * @return -1 if list is empty
   */
  public static int nearestIndex(List<Double> dList, double target)
  {
    int result = -1;
    double currentDifference = Double.MAX_VALUE;
    double thisDifference;
    for (int i = 0; i < dList.size(); i++)
    {
      thisDifference = Math.abs(target - dList.get(i));
      if(currentDifference > thisDifference)
      {
        currentDifference = thisDifference;
        result = i;
      }
    }
    return result;
  }
 
  /**
   * Return the index of the first value closest to the specified value in the list
   *
   * @param dList
   * @return -1 if list is empty
   */
  public static int farthestIndex(List<Double> dList, double target)
  {
    int result = -1;
    double currentDifference = -1;
    double thisDifference;
    for (int i = 0; i < dList.size(); i++)
    {
      thisDifference = Math.abs(target - dList.get(i));
      if(currentDifference < thisDifference)
      {
        currentDifference = thisDifference;
        result = i;
      }
    }
    return result;
  }
 
  /**
   * Return the index of the first min value of the list
   *
   * @param dList
   * @return -1 if list is empty
   */
  public static int minIndex(List<Double> dList)
  {
    int result = -1;
    double currentMin = Double.MAX_VALUE;
    for (int i = 0; i < dList.size(); i++)
    {
      if(currentMin > dList.get(i))
      {
        currentMin = dList.get(i);
        result = i;
      }
    }
    return result;
  }
 
  /**
   * Return the index of the first max value of the list
   *
   * @param dList
   * @return -1 if list is empty
   */
  public static int maxIndex(List<Double> dList)
  {
    int result = -1;
    double currentMax = Double.MIN_VALUE;
    for (int i = 0; i < dList.size(); i++)
    {
      if(currentMax < dList.get(i))
      {
        currentMax = dList.get(i);
        result = i;
      }
    }
    return result;
  }
 
  /**
   * Rearrange the elements of an array in random order.
   */
  public static void shuffle(Object[] a)
  {
    int N = a.length;
    for (int i = 0; i < N; i++)
    {
      int r = i + uniform(N - i); // between i and N-1
      Object temp = a[i];
      a[i] = a[r];
      a[r] = temp;
    }
  }
 
  /**
   * Rearrange the elements of a double array in random order.
   */
  public static void shuffle(double[] a)
  {
    int N = a.length;
    for (int i = 0; i < N; i++)
    {
      int r = i + uniform(N - i); // between i and N-1
      double temp = a[i];
      a[i] = a[r];
      a[r] = temp;
    }
  }
 
  /**
   * Rearrange the elements of an int array in random order.
   */
  public static void shuffle(int[] a)
  {
    int N = a.length;
    for (int i = 0; i < N; i++)
    {
      int r = i + uniform(N - i); // between i and N-1
      int temp = a[i];
      a[i] = a[r];
      a[r] = temp;
    }
  }
 
  /**
   * For this distribution, X, this method returns P(X < x).
   *
   * @return
   */
  public static Double tDistCDF(double x, int dof)
  {
    TDistributionImpl tdist = new TDistributionImpl(dof);
    try
    {
      return tdist.cumulativeProbability(x);
    }
    catch (MathException e)
    {
      e.printStackTrace();
    }
    return null;
  }
 
  /**
   * For this distribution, X, this method returns the critical point x, such that P(X < x) = p. Returns Double.NEGATIVE_INFINITY for p=0 and Double.POSITIVE_INFINITY for p=1.
   */
  public static Double tDistInverseCDF(double p_value, int dof)
  {
    TDistributionImpl tdist = new TDistributionImpl(dof);
    try
    {
      return tdist.inverseCumulativeProbability(p_value);
    }
    catch (MathException e)
    {
      e.printStackTrace();
    }
    return null;
  }
 
  /**
   * Get the critical value for determining outliers using Grubb's method http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h1.htm
   *
   * @param mean
   * @param stdev
   * @param n
   * @param alpha
   *            (0.05 for 95% confidence)
   * @return g_critical, critical outlier threshold for furthest point.
   */
  public static Double grubbsOutlierLimit(int n, double alpha)
  {
    double t = -1 * tDistInverseCDF(alpha / ((2 * n)), n - 2);
    double g_critical = ((n - 1) / (Math.sqrt(n))) * Math.sqrt((t * t) / (n - 2 + t * t));
    return g_critical;
  }
 
  /**
   * Use Grubb's statistic for determining if there is an outlier and throw the point with the largest deviation from the mean if true.
   *
   * @param values
   * @return remaining values after culling if necessary
   */
  public static int getOutlier(List<Double> values, double alpha)
  {
    if(values.size() < 3)
    {
      return -1;
    }
    double[] arrayValues = toArray(values);
    double stdev = stdDev(arrayValues);
    double mean = mean(arrayValues);
    double g_critical = grubbsOutlierLimit(values.size(), alpha);
    Double maxOutlier = null;
    int outlierIndex = -1;
    for (Double value : values)
    {
      if(maxOutlier == null)
      {
        maxOutlier = value;
      }
      if(Math.abs(mean - value) > Math.abs(mean - maxOutlier))
      {
        maxOutlier = value;
      }
    }
    if(Math.abs(mean - maxOutlier) / stdev > g_critical)
    {
      outlierIndex = values.indexOf(maxOutlier);
      Logs.log("Found outlier at index: " + outlierIndex, 1, "StatisticsUtility");
    }
    return outlierIndex;
  }
 
  public static double[] toArray(List<Double> values)
  {
    int count = 0;
    double[] arrayValues = new double[values.size()];
    for (Double value : values)
    {
      arrayValues[count] = value;
      count = count + 1;
    }
    return arrayValues;
  }
 
  public static Double stdDev(double[] values)
  {
    StandardDeviation stdevCalculator = new StandardDeviation();
    return stdevCalculator.evaluate(values);
  }
 
  public static Double mean(double[] values)
  {
    Mean meanCalculator = new Mean();
    return meanCalculator.evaluate(values);
  }
 
  public static Double median(double[] values)
  {
    Median medianCalculator = new Median();
    return medianCalculator.evaluate(values);
  }
 
  public static Double median(Collection<Double> values)
  {
    double[] v = new double[values.size()];
    int count = 0;
    for (Double d : values)
    {
      v[count] = d;
      count++;
    }
    return median(v);
  }
 
  public static double normalCDF(double x, double mean, double sigma) throws MathException
  {
    NormalDistributionImpl normDist = new NormalDistributionImpl(mean, sigma);
    double ret = normDist.cumulativeProbability(x);
    return ret;
  }
 
  public static double normalInverseCDF(double pValue, double mean, double sigma) throws MathException
  {
    NormalDistributionImpl normDist = new NormalDistributionImpl(mean, sigma);
    double ret = normDist.inverseCumulativeProbability(pValue);
    return ret;
  }
 
  public static double normalPDF(double x, double mean, double sigma) throws MathException
  {
    double A = 1 / (sigma * Math.sqrt(2 * Math.PI));
    double B = -1 * Math.pow((x - mean), 2) / (2 * Math.pow(sigma, 2));
    double ret = A * Math.exp(B);
    return ret;
  }
 
}
TOP

Related Classes of miscellaneous.StatisticsUtility

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.