Source Code of cc.mallet.util.MVNormal

package cc.mallet.util;


import java.util.Arrays;
import java.text.NumberFormat;
import cc.mallet.util.Randoms;


import cc.mallet.types.*;


/** Tools for working with multivariate normal distributions */


public class MVNormal {


  /** Simple Cholesky decomposition, with no checks on squareness, 
   *  symmetricality, or positive definiteness. This follows the
   *  implementation from JAMA fairly closely.
   *  <p>
   *  Returns L such that LL' = A and L is lower-triangular.
   */ 
  
  public static double[] cholesky(double[] input, int numRows) {


    // Initialize the result. Note that java sets all elements to 0.
    double[] result = new double[ input.length ];
    double sumRowSquared = 0.0;
    double dotProduct = 0.0;


    // For each off-diagonal cell l_{jk} in the result,
    //  we need an index into the jth row and the kth row.
    // These are therefore both really row offsets, but one
    //  corresponds to the beginning of the (row)th row 
    //  and the other to the beginning of the (column)th row.
    int rowOffset = 0;
    int colOffset = 0;


    for (int row = 0; row < numRows; row++) {


      sumRowSquared = 0.0;
      rowOffset = row * numRows;
      
      for (int col = 0; col < row; col++) {


        dotProduct = 0.0;
        colOffset = col * numRows;


        for (int i = 0; i < col; i++) {
          dotProduct += 
            result[ rowOffset + i ] *
            result[ colOffset + i ];
        }


        result[ rowOffset + col ] = 
          (input[ rowOffset + col ] - dotProduct) /
          result[ colOffset + col ];
        sumRowSquared +=
          result[rowOffset + col] * 
          result[rowOffset + col];
      }


      //  Now the diagonal element
      result[ rowOffset + row ] =
        Math.sqrt(input[ rowOffset + row ] - sumRowSquared);
    }


    return result;
  }


  public static double[] bandCholesky(double[] input, int numRows) {


    // Initialize the result. Note that java sets all elements to 0.
    double[] result = new double[ input.length ];
    double sumRowSquared = 0.0;
    double dotProduct = 0.0;


    // For each off-diagonal cell l_{jk} in the result,
    //  we need an index into the jth row and the kth row.
    // These are therefore both really row offsets, but one
    //  corresponds to the beginning of the (row)th row 
    //  and the other to the beginning of the (column)th row.
    int rowOffset = 0;
    int colOffset = 0;


    int firstNonZero;


    for (int row = 0; row < numRows; row++) {


      sumRowSquared = 0.0;
      rowOffset = row * numRows;
      
      firstNonZero = row;


      for (int col = 0; col < row; col++) {


        if (firstNonZero == row) {
          if (input[ rowOffset + col ] == 0) {
            continue;
          }
          else {
            firstNonZero = col;
          }
        }


        dotProduct = 0.0;
        colOffset = col * numRows;


        for (int i = firstNonZero; i < col; i++) {
          dotProduct += 
            result[ rowOffset + i ] *
            result[ colOffset + i ];
        }


        result[ rowOffset + col ] = 
          (input[ rowOffset + col ] - dotProduct) /
          result[ colOffset + col ];
        sumRowSquared +=
          result[rowOffset + col] * 
          result[rowOffset + col];
      }


      //  Now the diagonal element
      result[ rowOffset + row ] =
        Math.sqrt(input[ rowOffset + row ] - sumRowSquared);
    }


    return result;
  }


  /** For testing band cholesky factorization */
  public static double[] bandMatrixRoot (int dim, int bandwidth) {
    double[] result = new double[dim * dim];
    
    for (int row = 0; row < dim; row++) {
      int rowOffset = row * dim;


      for (int col = Math.max(0, (row - bandwidth + 1));
         col <= row;
         col++) {
        result[rowOffset + col] = 1.0;
      }
    }


    return result;
  }


  /** Sample a multivariate normal from a precision matrix 
   *    (ie inverse covariance matrix)
   */
  public static double[] nextMVNormal(double[] mean, double[] precision, 
                    Randoms random) {
    return nextMVNormalWithCholesky(mean, cholesky(precision, mean.length), random);
  }


  public static double[] nextMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular,
                          Randoms random) {


    int n = mean.length;


    // Initialize vector z to standard normals
    //  [NB: using the same array for z and x]
    double[] result = new double[ n ];
    for (int i = 0; i < n; i++) {
      result[i] = random.nextGaussian();
    }
    
    // Now solve trans(L) x = z using back substitution
    double innerProduct;
    
    for (int i = n-1; i >= 0; i--) {
      innerProduct = 0.0;
      for (int j = i+1; j < n; j++) {
        // the cholesky decomp got us the precisionLowerTriangular triangular
        //  matrix, but we really want the transpose.
        innerProduct += result[j] * precisionLowerTriangular[ (n * j) + i ];
      }


      result[i] = (result[i] - innerProduct) / precisionLowerTriangular[ (n * i) + i ];
    }


    for (int i = 0; i < n; i++) {
      result[i] += mean[i];
    }


    return result;
  }
  
  /** Sample a vector x from N(m, (LL')<sup>-1</sup>, such that 
   *   sum_i x_i = 0.
   */
  public static double[] nextZeroSumMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular,
                               Randoms random) {


    int n = mean.length;


    double[] result = nextMVNormalWithCholesky(mean, precisionLowerTriangular, random);
    double sum = 0.0;
    for (int i = 0; i < n; i++) {
      sum += result[i];
    }
    
    // get the sum of each row of the inverse precision matrix
    //  by solving the two triangular systems L(L'x) = Ly = 1, L'x = y.


    double[] ones = new double[n];
    Arrays.fill(ones, 1.0);


    double[] firstSolution = solveWithForwardSubstitution(ones, precisionLowerTriangular);
    double[] rowSums = solveWithBackSubstitution(firstSolution, precisionLowerTriangular);


    double sumOfRowSums = 0.0;
    for (int i = 0; i < n; i++) {
      sumOfRowSums += rowSums[i];
    }


    double inverseSumOfRowSums = 1.0 / sumOfRowSums;


    for (int i = 0; i < n; i++) {
      result[i] -= inverseSumOfRowSums * rowSums[i] * sum;
    }


    return result;
  }
  
  public static double[][] nextMVNormal(int n, double[] mean, double[] precision,
                      Randoms random) {
    double[][] result = new double[n][];


    for (int i=0; i < n; i++) {
      result[i] = nextMVNormal(mean, precision, random);
    }


    return result;
  }


  public static FeatureVector nextFeatureVector(Alphabet alphabet, double[] mean,
                          double[] precision, Randoms random) {


    return new FeatureVector(alphabet, nextMVNormal(mean, precision, random));
  }


  /**
   *  @param priorMean A vector of mean values
   *  @param priorPrecisionDiagonal A vector representing a diagonal prior precision matrix
   *  @param precision A precision matrix
   */
  public static double[] nextMVNormalPosterior(double[] priorMean, double[] priorPrecisionDiagonal,
                         double[] precision,
                         double[] observedMean, int observations, 
                         Randoms random) {
    int dimension = priorMean.length;


    // Q_0 mu_0 + n Q y_bar
    double[] linearCombination = new double[dimension];


    for (int i=0; i<dimension; i++) {
      linearCombination[i] = priorMean[i] * priorPrecisionDiagonal[i];


      double innerProduct = 0.0;
      for (int j = 0; j < dimension; j++) {
        innerProduct += precision[ (dimension * i) + j ] * observedMean[j];
      }
      
      linearCombination[i] += observations * innerProduct;
    }
    
    // Q_0 + n Q
    double[] posteriorPrecision = new double[precision.length];
    
    for (int row = 0; row < dimension; row++) {
      for (int col = 0; col < dimension; col++) {
        posteriorPrecision[ (dimension * row) + col ] = 
          observations * precision[ (dimension * row) + col ];
        if (row == col) {
          posteriorPrecision[ (dimension * row) + col ] += 
            priorPrecisionDiagonal[row];
        }
      }
    }


    double[] inversePosteriorPrecision = invertSPD(posteriorPrecision, dimension);
    
    double[] posteriorMean = new double[dimension];


    for (int row = 0; row < dimension; row++) {
      double innerProduct = 0.0;
      for (int col = 0; col < dimension; col++) {
        innerProduct +=
          inversePosteriorPrecision[ (dimension * row) + col ] *
          linearCombination[ col ];
      }


      posteriorMean[row] = innerProduct;
    }


    return nextMVNormal(posteriorMean, posteriorPrecision, random);
  }


  /** 
   * This method returns x such that L'x = b.
   *  Note the transpose: this method assumes that 
   *  the input matrix is LOWER triangular, even though
   *  back substitution operates on UPPER triangular matrices.
   */
  public static double[] solveWithBackSubstitution(double[] b, double[] lowerTriangular) {
        double innerProduct;


    int n = b.length;
    double[] result = new double[n];


        for (int i = n-1; i >= 0; i--) {
      innerProduct = 0.0;
            for (int j = i+1; j < n; j++) {
        // Assume we're dealing with a single lower triangular
        //  matrix from a cholesky decomposition, so index into
        //  it as if it is the transpose.
        innerProduct += result[j] * lowerTriangular[ (n * j) + i ];
            }


            result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ];
        }


    return result;
  }


  /** 
   * This method returns x such that Lx = b
   *  where L is lower triangular
   */
  public static double[] solveWithForwardSubstitution(double[] b, double[] lowerTriangular) {
        double innerProduct;


    int n = b.length;
    double[] result = new double[n];


        for (int i = 0; i < n; i++) {
      innerProduct = 0.0;
            for (int j = 0; j < i; j++) {
        innerProduct += result[j] * lowerTriangular[ (n * i) + j ];
            }


            result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ];
        }


    return result;
  }


  /**
   *  This method returns the (lower-triangular) inverse of a lower triangular
   *   matrix.
   */
  public static double[] invertLowerTriangular(double[] inputMatrix, int dimension) {
    double[] outputMatrix = new double[inputMatrix.length];


    double x;


    for (int row = 0; row < dimension; row++) {
      for (int col = 0; col <= row; col++) {
        // Off-diagonal elements are the negative inner product
        //  (up to the row index) of the row from input and the col
        //  from the output, divided by the diagonal from the input.
        
        if (col == row) {
          // Diagonal elements are the same, but add 1 to the numerator
          x = 1.0;
        }
        else {
          x = 0.0;
        }
        
        for (int i = col; i < row; i++) {
          x -= inputMatrix[ (dimension * row) + i ] * outputMatrix[ (dimension * i) + col ];
        }


        outputMatrix[ (dimension * row) + col ] = x / inputMatrix[ (dimension * row) + row ];
      }


    }


    return outputMatrix;
  }


  /**
   *  Returns L'L for lower triangular matrix L.
   */
  public static double[] lowerTriangularCrossproduct(double[] inputMatrix, int dimension) {
    
    double[] outputMatrix = new double[inputMatrix.length];


    double innerProduct;


    for (int row = 0; row < dimension; row++) {
      for (int col = row; col < dimension; col++) {


        innerProduct = 0.0;


        for (int i = col; i < dimension; i++) {
          innerProduct += inputMatrix[ row + (dimension * i) ] * inputMatrix[ col + (dimension * i) ];
        }


        outputMatrix[ (dimension * row) + col ] = innerProduct;
        outputMatrix[ row + (dimension * col) ] = innerProduct;        
      }
    }


    return outputMatrix;
  }


  /** 
   *  Returns (lower-triangular) X = AB for square lower-triangular matrices A and B
   */
  public static double[] lowerTriangularProduct(double[] leftMatrix, double[] rightMatrix, int dimension) {
    
    double[] outputMatrix = new double[leftMatrix.length];


        double innerProduct;


        for (int row = 0; row < dimension; row++) {
            for (int col = 0; col <= row; col++) {


                innerProduct = 0.0;
        for (int i = col; i <= row; i++) {
          innerProduct += leftMatrix[ (dimension * row) + i ] * rightMatrix[ (dimension * i) + col ];
        }


        outputMatrix[ (dimension * row) + col ] = innerProduct;
      }
    }


    return outputMatrix;


  }


  public static double[] invertSPD(double[] inputMatrix, int dimension) {
    
    return lowerTriangularCrossproduct(invertLowerTriangular(bandCholesky(inputMatrix, dimension),
                                 dimension),
                       dimension);
  }


  /**
   *  A Wishart random variate, based on R code by Bill Venables.
   *
   *  @param sqrtScaleMatrix The lower-triangular matrix square root of the scale matrix.
   *     To draw from the posterior of a precision (ie inverse covariance) matrix,
   *     this should be chol( S^{-1} ), where S is the scatter matrix X'X of 
   *     columns of MV normal observations X.
   *  @param dimension The size of the matrix
   *  @param degreesOfFreedom  The degree of freedom for the Wishart. Should be greater than dimension. For 
   *     a posterior distribution, this is the number of observations + the df of the prior.
   */
  public static double[] nextWishart(double[] sqrtScaleMatrix, int dimension,
                     int degreesOfFreedom, Randoms random) {


    double[] sample = new double[sqrtScaleMatrix.length];
    
    for (int row = 0; row < dimension; row++) {


      for (int col = 0; col < row; col++) {
        sample[ (row * dimension) + col ] = random.nextGaussian(0, 1);
      }
      
      sample[ (row * dimension) + row ] = Math.sqrt(random.nextChiSq(degreesOfFreedom));
    }


    //System.out.println(doubleArrayToString(sample, dimension));
    //System.out.println(doubleArrayToString(sqrtScaleMatrix, dimension));    
    //System.out.println(doubleArrayToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension));


    System.out.println(diagonalToString(sample, dimension));
    System.out.println(diagonalToString(sqrtScaleMatrix, dimension));    
    System.out.println(diagonalToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension));


    return lowerTriangularCrossproduct(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension);


  }


  public static double[] nextWishartPosterior(double[] scatterMatrix, int observations,
                        double[] priorPrecisionDiagonal, int priorDF,
                        int dimension, Randoms random) {


    double[] scatterPlusPrior = new double[scatterMatrix.length];
    System.arraycopy(scatterMatrix, 0, scatterPlusPrior, 0, scatterMatrix.length);


    for (int i=0; i < dimension; i++) {
      scatterPlusPrior[ (dimension * i) + i ] += 1.0 / priorPrecisionDiagonal[i];
    }


    System.out.println(" inverted scatter plus prior");
    System.out.println(diagonalToString(invertSPD(scatterPlusPrior, dimension), dimension));


    System.out.println(" chol inverted scatter plus prior");
        System.out.println(diagonalToString(cholesky(invertSPD(scatterPlusPrior, dimension), dimension), dimension));


    double[] sqrtScaleMatrix = cholesky(invertSPD(scatterPlusPrior, dimension), dimension);
    
    return nextWishart(sqrtScaleMatrix, dimension, observations + priorDF, random);
  }
  
  /** Create a string representation of a square matrix in one-dimensional array format
   */
  public static String doubleArrayToString(double[] matrix, int dimension) {
    NumberFormat formatter = NumberFormat.getInstance();
        formatter.setMaximumFractionDigits(10);




    StringBuffer output = new StringBuffer();


    for (int row = 0; row < dimension; row++) {
      for (int col = 0; col < dimension; col++) {
        output.append(formatter.format(matrix[ (dimension * row) + col ]));
        output.append("\t");
      }
      output.append("\n");
    }
    
    return output.toString();
  }


  public static String diagonalToString(double[] matrix, int dimension) {
        NumberFormat formatter = NumberFormat.getInstance();
        formatter.setMaximumFractionDigits(4);




        StringBuffer output = new StringBuffer();


        for (int row = 0; row < dimension; row++) {
      output.append(formatter.format(matrix[ (dimension * row) + row ]));
      output.append(" ");
        }


        return output.toString();
    }




  public static double[] getScatterMatrix(double[][] observationMatrix) {
    int observations = observationMatrix.length;
    int dimension = observationMatrix[0].length;


    double[] outputMatrix = new double[dimension * dimension];
    double[] means = new double[dimension];


    // collect the sample means


    for (int i = 0; i < observations; i++) {
            for (int d = 0; d < dimension; d++) {
                means[d] += observationMatrix[i][d];
            }
    }


    for (int d = 0; d < dimension; d++) {
      means[d] /= observations;
    }


    // now the sample covariance (times n)


    for (int i = 0; i < observations; i++) {
            for (int d1 = 0; d1 < dimension; d1++) {
                for (int d2 = 0; d2 < dimension; d2++) {
                    outputMatrix[ (dimension * d1) + d2 ] +=
                        (observationMatrix[i][d1] - means[d1]) *
            (observationMatrix[i][d2] - means[d2]);
                }
      }
        }


    return outputMatrix;
  }


  public static void testCholesky() {


    int observations = 1000;


    double[] mean = new double[20];
    double[] precisionMatrix = new double[ 20 * 20 ];
    for (int i=0; i<20; i++) {
      precisionMatrix[ (20 * i) + i ] = 1.0;
    }


    Randoms random = new Randoms();
    double[] scatterMatrix = getScatterMatrix(nextMVNormal(observations, mean, precisionMatrix, random));
    
    double[] priorPrecision = new double[20];
    Arrays.fill(priorPrecision, 1.0);
    
    nextWishartPosterior(scatterMatrix, observations, priorPrecision, 21, 20, random);
  }




  public static void main (String[] args) {


    //double[] spd = { 19.133825, -1.180869, 6.403880, 
    //         -1.180869,  8.895968, 1.280748,
    //         6.403880,  1.280748, 9.155951 };
    
    double[] spd = {3.0, 0.0, -1.0, 
            0.0, 3.0, 0.0,
            -1.0, 0.0, 3.0};
    
    Randoms random = new Randoms();
    double[] mean = { 1.0, 1.0, 1.0 };
    double[] lower = cholesky(spd, 3);


    for (int iter = 0; iter < 10; iter++) {
      double[] sample = nextMVNormalWithCholesky(mean, lower,
                             random);
      for (int i=0; i<sample.length; i++) {
        System.out.print(sample[i] + "\t");
      }
      System.out.println();
    }


    for (int iter = 0; iter < 10; iter++) {
      double[] sample = nextZeroSumMVNormalWithCholesky(mean, lower,
                                random);
      for (int i=0; i<sample.length; i++) {
        System.out.print(sample[i] + "\t");
      }
      System.out.println();
    }




    /*
    int dim = 100;


    double[] bandLower = bandMatrixRoot(dim, 3);
    System.out.println(doubleArrayToString(bandLower, dim));


    double[] bandMatrix = lowerTriangularCrossproduct(bandLower, dim);
    System.out.println(doubleArrayToString(bandMatrix, dim));


    long startTime;


    startTime = System.currentTimeMillis();
    for (int i=0; i<100000; i++) {
      bandCholesky(bandMatrix, dim);
    }
    System.out.println(System.currentTimeMillis() - startTime);


    startTime = System.currentTimeMillis();
    for (int i=0; i<100000; i++) {
      cholesky(bandMatrix, dim);
    }
    System.out.println(System.currentTimeMillis() - startTime);
    */
    
    /*


    double[] l = {2.87527, 0.0, 0.0, -2.4168, 1.28, 0.0, -0.585168, -2.792234, 2.769609};
    double[] spd = { 19.133825, -1.180869, 6.403880, 
             -1.180869,  8.895968, 1.280748,
             6.403880,  1.280748, 9.155951 };


    double[] scatter = { 103.59761, -16.370939, 12.694755,
               -16.37094, 106.117048,  4.079818,
               12.69476,   4.079818, 94.065152 };


    double[] priorDiagonal = { 1.0, 1.0, 1.0 };


    testCholesky();
    */


    //System.out.println(doubleArrayToString(nextWishartPosterior(scatter, 100, priorDiagonal, 10, 3, new Randoms()), 3));


    /*
    long startTime = System.currentTimeMillis();
    for (int i=0; i<10000; i++) {
      invertSPD(spd, 3);
    }
    System.out.println(System.currentTimeMillis() - startTime);
    */    


    //System.out.println(doubleArrayToString(invertSPD(spd, 3), 3));
    //System.out.println(doubleArrayToString(nextWishart(l, 3, 25, new Randoms()), 3));


    /*
    double[] precisionMatrix = {0.98, -1.0, 0.0, -1.0, 2.13, -1.0, 0.0, -1.0, 1.01};
    double[] mean = new double[3];


    Randoms random = new Randoms();


    for (int i=0; i<10; i++) {
      double[] variate = nextMVNormal(mean, precisionMatrix, random);
      
      for (int j=0; j<variate.length; j++) {
        System.out.print(variate[j] + "\t");
      }
      System.out.println();
    }
    */
    
  }
}
Source Code of cc.mallet.util.MVNormal

Related Classes of cc.mallet.util.MVNormal