Package org.apache.mahout.clustering.dirichlet

Source Code of org.apache.mahout.clustering.dirichlet.UncommonDistributions

package org.apache.mahout.clustering.dirichlet;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.util.Random;

import org.apache.mahout.matrix.DenseVector;
import org.apache.mahout.matrix.Vector;
import org.uncommons.maths.random.GaussianGenerator;
import org.uncommons.maths.random.MersenneTwisterRNG;

public final class UncommonDistributions {

  private static final double sqrt2pi = Math.sqrt(2.0 * Math.PI);

  static Random random = new MersenneTwisterRNG();

  private UncommonDistributions() {
  }

  public static void init(byte[] seed) {
    random = new MersenneTwisterRNG(seed);
  }

  //=============== start of BSD licensed code. See LICENSE.txt
  /**
   * Returns a double sampled according to this distribution.  Uniformly
   * fast for all k > 0.  (Reference: Non-Uniform Random Variate Generation,
   * Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)  Uses Cheng's
   * rejection algorithm (GB) for k>=1, rejection from Weibull distribution
   * for 0 < k < 1.
   */
  public static double rGamma(double k, double lambda) {
    boolean accept = false;
    if (k >= 1) {
      //Cheng's algorithm
      double b = (k - Math.log(4));
      double c = (k + Math.sqrt(2 * k - 1));
      double lam = Math.sqrt(2 * k - 1);
      double cheng = (1 + Math.log(4.5));
      double x;
      do {
        double u = random.nextDouble();
        double v = random.nextDouble();
        double y = ((1 / lam) * Math.log(v / (1 - v)));
        x = (k * Math.exp(y));
        double z = (u * v * v);
        double r = (b + (c * y) - x);
        if ((r >= ((4.5 * z) - cheng)) || (r >= Math.log(z))) {
          accept = true;
        }
      } while (!accept);
      return x / lambda;
    } else {
      //Weibull algorithm
      double c = (1 / k);
      double d = ((1 - k) * Math.pow(k, (k / (1 - k))));
      double x;
      do {
        double u = random.nextDouble();
        double v = random.nextDouble();
        double z = -Math.log(u);
        double e = -Math.log(v);
        x = Math.pow(z, c);
        if ((z + e) >= (d + x)) {
          accept = true;
        }
      } while (!accept);
      return x / lambda;
    }
  }

  //============= end of BSD licensed code

  /**
   * Returns a random sample from a beta distribution with
   * the given shapes
   *
   * @param shape1 a double representing shape1
   * @param shape2 a double representing shape2
   * @return a Vector of samples
   */
  public static double rBeta(double shape1, double shape2) {
    double gam1 = rGamma(shape1, 1);
    double gam2 = rGamma(shape2, 1);
    return gam1 / (gam1 + gam2);

  }

  /**
   * Returns a vector of random samples from a beta distribution with
   * the given shapes
   *
   * @param K the number of samples to return
   * @param shape1 a double representing shape1
   * @param shape2 a double representing shape2
   * @return a Vector of samples
   */
  public static Vector rBeta(int K, double shape1, double shape2) {
    //List<Double> params = new ArrayList<Double>(2);
    //params.add(shape1);
    //params.add(Math.max(0, shape2));
    Vector result = new DenseVector(K);
    for (int i = 0; i < K; i++)
      result.set(i, rBeta(shape1, shape2));
    return result;
  }

  /**
   * Return a random sample from the chi-squared (chi^2) distribution with df
   * degrees of freedom.
   * @param df
   * @return a double sample
   */
  public static double rChisq(double df) {
    double result = 0.0;
    for (int i = 0; i < df; i++) {
      double sample = rNorm(0, 1);
      result += sample * sample;
    }
    return result;
  }

  /**
   * Return a random value from a normal distribution with the given mean and
   * standard deviation
   *
   * @param mean a double mean value
   * @param sd a double standard deviation
   * @return a double sample
   */
  public static double rNorm(double mean, double sd) {
    GaussianGenerator dist = new GaussianGenerator(mean, sd, random);
    return dist.nextValue();
  }

  /**
   * Return the normal density function value for the sample x
   *
   * pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
   * 
   *
   * @param x a double sample value
   * @param m a double mean value
   * @param s a double standard deviation
   * @return a double probability value
   */
  public static double dNorm(double x, double m, double s) {
    double xms = (x - m) / s;
    double ex = (xms * xms) / 2;
    double exp = Math.exp(-ex);
    return exp / (sqrt2pi * s);
  }

  /**
  * Returns one sample from a multinomial.
  */
  public static int rMultinom(Vector probabilities) {
    // our probability argument are not normalized.
    double total = probabilities.zSum();
    double nextDouble = random.nextDouble();
    double p = nextDouble * total;
    for (int i = 0; i < probabilities.cardinality(); i++) {
      double p_i = probabilities.get(i);
      if (p < p_i) {
        return i;
      } else {
        p -= p_i;
      }
    }
    // can't happen except for round-off error so we don't care what we return here
    return 0;
  }

  /**
   * Returns a multinomial vector sampled from the given probabilities
   *
   * rmultinom should be implemented as successive binomial sampling.
   *
   *Keep a normalizing amount that starts with 1 (I call it total).
   *
   * For each i
   *  k[i] = rbinom(p[i] / total, size);
   *  total -= p[i];
   *  size -= k[i];
   *
   * @param size the size parameter of the binomial distribution
   * @param probabilities a Vector of probabilities
   *
   * @return a multinomial distribution Vector
   */
  public static Vector rMultinom(int size, Vector probabilities) {
    // our probability argument may not be normalized.
    double total = probabilities.zSum();
    int cardinality = probabilities.cardinality();
    Vector result = new DenseVector(cardinality);
    for (int i = 0; total > 0 && i < cardinality; i++) {
      double p = probabilities.get(i);
      int ki = rBinomial(size, p / total);
      total -= p;
      size -= ki;
      result.set(i, ki);
    }
    return result;
  }

  /**
   * Returns an integer sampled according to this distribution.  Takes time
   * proprotional to np + 1.  (Reference: Non-Uniform Random Variate
   * Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html)
   * Second time-waiting algorithm.
   */
  public static int rBinomial(int n, double p) {
    if (p >= 1)
      return n; // needed to avoid infinite loops and negative results
    double q = -Math.log(1 - p);
    double sum = 0;
    int x = 0;
    while (sum <= q) {
      double u = random.nextDouble();
      double e = -Math.log(u);
      sum += (e / (n - x));
      x += 1;
    }
    if (x == 0)
      return 0;
    return x - 1;
  }

  /**
   * Sample from a Dirichlet distribution over the given alpha,
   * returning a vector of probabilities using a stick-breaking algorithm
   *
   * @param alpha an unnormalized count Vector
   * @return a Vector of probabilities
   */
  public static Vector rDirichlet(Vector alpha) {
    Vector r = alpha.like();
    double total = alpha.zSum();
    double remainder = 1;
    for (int i = 0; i < r.size(); i++) {
      double a = alpha.get(i);
      total -= a;
      double beta = rBeta(a, Math.max(0, total));
      double p = beta * remainder;
      r.set(i, p);
      remainder -= p;
    }
    return r;
  }

}
TOP

Related Classes of org.apache.mahout.clustering.dirichlet.UncommonDistributions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.