Package org.apache.mahout.math

Examples of org.apache.mahout.math.DenseMatrix


  public void testHebbianSolver() throws Exception {
    int numColumns = 800;
    Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
    int rank = 50;
    Matrix eigens = new DenseMatrix(rank, numColumns);
    TrainingState state = new TrainingState(eigens, null);
    long optimizedTime = timeSolver(corpus,
                                    0.00001,
                                    5,
                                    rank,
View Full Code Here


  public void testLanczosSolver() throws Exception {
    int numColumns = 800;
    Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
    int rank = 50;
    Matrix eigens = new DenseMatrix(rank, numColumns);
    long time = timeLanczos(corpus, eigens, rank, false);
    assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
    assertOrthonormal(eigens);
    assertEigen(eigens, corpus, 0.1, false);
  }
View Full Code Here

  public void testLanczosSolverSymmetric() throws Exception {
    int numColumns = 400;
    Matrix corpus = randomSequentialAccessSparseMatrix(500, 450, numColumns, 10, 1.0);
    Matrix gramMatrix = corpus.times(corpus.transpose());
    int rank = 30;
    Matrix eigens = new DenseMatrix(rank, gramMatrix.numCols());
    long time = timeLanczos(gramMatrix, eigens, rank, true);
    assertTrue("Lanczos taking too long!  Are you in the debugger? :)", time < 10000);
    assertOrthonormal(eigens);
    assertEigen(eigens, gramMatrix, 0.1, true);
  }
View Full Code Here

    values[1][0] = 0.0;
    values[2][0] = 0.0;
    values[1][1] = 0.0;
    values[2][2] = 0.0;

    Matrix dataset = new DenseMatrix(values);
    trainer.train(labelset, dataset);
    assertTrue(trainer.getModel().classify(dataset.getColumn(3)));
    assertFalse(trainer.getModel().classify(dataset.getColumn(0)));
  }
View Full Code Here

    String outputTmpPathString = parsedArgs.get("--tempDir");
    int numRows = Integer.parseInt(parsedArgs.get("--numRows"));
    int numCols = Integer.parseInt(parsedArgs.get("--numCols"));
    boolean isSymmetric = Boolean.parseBoolean(parsedArgs.get("--symmetric"));
    int desiredRank = Integer.parseInt(parsedArgs.get("--rank"));
    Matrix eigenVectors = new DenseMatrix(desiredRank, numCols);
    List<Double> eigenValues = new ArrayList<Double>();
    String outputEigenVectorPath =  parsedArgs.get("--output");
   
    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPathString,
                                                           outputTmpPathString,
View Full Code Here

    values[1][0] = 0.0;
    values[2][0] = 0.0;
    values[1][1] = 0.0;
    values[2][2] = 0.0;

    Matrix dataset = new DenseMatrix(values);
    this.trainer.train(labelset, dataset);
    assertFalse(this.trainer.getModel().classify(dataset.getColumn(3)));
    assertTrue(this.trainer.getModel().classify(dataset.getColumn(0)));
  }
View Full Code Here

    return v;
  }
 
  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
   
    for (int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k, w, Math.log(pseudocount));
      }
     
      logTotals[k] = Math.log(total);
    }
   
View Full Code Here

   *         singular values) have been found.
   */
  public TrainingState solve(Matrix corpus,
                             int desiredRank) {
    int cols = corpus.numCols();
    Matrix eigens = new DenseMatrix(desiredRank, cols);
    List<Double> eigenValues = new ArrayList<Double>();
    log.info("Finding " + desiredRank + " singular vectors of matrix with " + corpus.numRows() + " rows, via Hebbian");
    /**
     * The corpusProjections matrix is a running cache of the residual projection of each corpus vector against all
     * of the previously found singular vectors.  Without this, if multiple passes over the data is made (per
     * singular vector), recalculating these projections eventually dominates the computational complexity of the
     * solver.
     */
    Matrix corpusProjections = new DenseMatrix(corpus.numRows(), desiredRank);
    TrainingState state = new TrainingState(eigens, corpusProjections);
    for (int i = 0; i < desiredRank; i++) {
      Vector currentEigen = new DenseVector(cols);
      Vector previousEigen = null;
      while (hasNotConverged(currentEigen, corpus, state)) {
View Full Code Here

    DistributedRowMatrix corpus = TestDistributedRowMatrix.randomDistributedMatrix(500,
        450, 400, 10, 10.0, symmetric, "testdata");
    corpus.configure(new JobConf());
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    int desiredRank = 30;
    Matrix eigenVectors = new DenseMatrix(desiredRank, corpus.numCols());
    List<Double> eigenValues = new ArrayList<Double>();
    solver.solve(corpus, desiredRank, eigenVectors, eigenValues, symmetric);
    assertOrthonormal(eigenVectors);
    assertEigen(eigenVectors, corpus, eigenVectors.numRows() / 2, 0.01, symmetric);
  }
View Full Code Here

    return v;
  }

  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics,numWords);
    double[] logTotals = new double[numTopics];
    for(int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for(int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k,w,Math.log(pseudocount));
      }

      logTotals[k] = Math.log(total);
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.DenseMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.