Package org.apache.mahout.math.decomposer.hebbian

Source Code of org.apache.mahout.math.decomposer.hebbian.TestHebbianSolver

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.math.decomposer.hebbian;

import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;

import org.apache.mahout.math.decomposer.AsyncEigenVerifier;
import org.apache.mahout.math.decomposer.SingularVectorVerifier;
import org.apache.mahout.math.decomposer.SolverTest;
import org.junit.Test;

/**
* This test is woefully inadequate, and also requires tons of memory, because it's part
* unit test, part performance test, and part comparison test (between the Hebbian and Lanczos
* approaches).
* TODO: make better.
*/
public final class TestHebbianSolver extends SolverTest {

  public static long timeSolver(Matrix corpus,
                                double convergence,
                                int maxNumPasses,
                                TrainingState state) {
    return timeSolver(corpus,
                      convergence,
                      maxNumPasses,
                      10,
                      state);
  }

  public static long timeSolver(Matrix corpus,
                                double convergence,
                                int maxNumPasses,
                                int desiredRank,
                                TrainingState state) {
    HebbianUpdater updater = new HebbianUpdater();
    SingularVectorVerifier verifier = new AsyncEigenVerifier();
    HebbianSolver solver = new HebbianSolver(updater,
                                             verifier,
                                             convergence,
                                             maxNumPasses);
    long start = System.nanoTime();
    TrainingState finalState = solver.solve(corpus, desiredRank);
    assertNotNull(finalState);
    state.setCurrentEigens(finalState.getCurrentEigens());
    state.setCurrentEigenValues(finalState.getCurrentEigenValues());
    long time = 0L;
    time += System.nanoTime() - start;
    assertEquals(state.getCurrentEigens().numRows(), desiredRank);
    return time / 1000000L;
  }



  public static long timeSolver(Matrix corpus, TrainingState state) {
    return timeSolver(corpus, state, 10);
  }

  public static long timeSolver(Matrix corpus, TrainingState state, int rank) {
    return timeSolver(corpus, 0.01, 20, rank, state);
  }

  @Test
  public void testHebbianSolver() {
    int numColumns = 800;
    Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
    int rank = 50;
    Matrix eigens = new DenseMatrix(rank, numColumns);
    TrainingState state = new TrainingState(eigens, null);
    long optimizedTime = timeSolver(corpus,
                                    0.00001,
                                    5,
                                    rank,
                                    state);
    eigens = state.getCurrentEigens();
    assertEigen(eigens, corpus, 0.05, false);
    assertOrthonormal(eigens, 1.0e-6);
    System.out.println("Avg solving (Hebbian) time in ms: " + optimizedTime);
  }

  /*
  public void testSolverWithSerialization() throws Exception
  {
    _corpusProjectionsVectorFactory = new DenseMapVectorFactory();
    _eigensVectorFactory = new DenseMapVectorFactory();
   
    timeSolver(TMP_EIGEN_DIR,
               0.001,
               5,
               new TrainingState(null, null));
   
    File eigenDir = new File(TMP_EIGEN_DIR + File.separator + HebbianSolver.EIGEN_VECT_DIR);
    DiskBufferedDoubleMatrix eigens = new DiskBufferedDoubleMatrix(eigenDir, 10);
   
    DoubleMatrix inMemoryMatrix = new HashMapDoubleMatrix(_corpusProjectionsVectorFactory, eigens);
   
    for (Entry<Integer, MapVector> diskEntry : eigens)
    {
      for (Entry<Integer, MapVector> inMemoryEntry : inMemoryMatrix)
      {
        if (diskEntry.getKey() - inMemoryEntry.getKey() == 0)
        {
          assertTrue("vector with index : " + diskEntry.getKey() + " is not the same on disk as in memory",
                     Math.abs(1 - diskEntry.getValue().dot(inMemoryEntry.getValue())) < 1e-6);
        }
        else
        {
          assertTrue("vector with index : " + diskEntry.getKey()
                     + " is not orthogonal to memory vect with index : " + inMemoryEntry.getKey(),
                     Math.abs(diskEntry.getValue().dot(inMemoryEntry.getValue())) < 1e-6);
        }
      }
    }
    File corpusDir = new File(TMP_EIGEN_DIR + File.separator + "corpus");
    corpusDir.mkdir();
    // TODO: persist to disk?
   // DiskBufferedDoubleMatrix.persistChunk(corpusDir, corpus, true);
   // eigens.delete();
   
   // DiskBufferedDoubleMatrix.delete(new File(TMP_EIGEN_DIR));
  }
  */
/*
  public void testHebbianVersusLanczos() throws Exception
  {
    _corpusProjectionsVectorFactory = new DenseMapVectorFactory();
    _eigensVectorFactory = new DenseMapVectorFactory();
    int desiredRank = 200;
    long time = timeSolver(TMP_EIGEN_DIR,
                           0.00001,
                           5,
                           desiredRank,
                           new TrainingState());

    System.out.println("Hebbian time: " + time + "ms");
    File eigenDir = new File(TMP_EIGEN_DIR + File.separator + HebbianSolver.EIGEN_VECT_DIR);
    DiskBufferedDoubleMatrix eigens = new DiskBufferedDoubleMatrix(eigenDir, 10);
   
    DoubleMatrix2D srm = asSparseDoubleMatrix2D(corpus);
    long timeA = System.nanoTime();
    EigenvalueDecomposition asSparseRealDecomp = new EigenvalueDecomposition(srm);
    for (int i=0; i<desiredRank; i++)
      asSparseRealDecomp.getEigenvector(i);
    System.out.println("CommonsMath time: " + (System.nanoTime() - timeA)/TimingConstants.NANOS_IN_MILLI + "ms");
   
   // System.out.println("Hebbian results:");
   // printEigenVerify(eigens, corpus);
   
    DoubleMatrix lanczosEigenVectors = new HashMapDoubleMatrix(new HashMapVectorFactory());
    List<Double> lanczosEigenValues = new ArrayList<Double>();
    LanczosSolver solver = new LanczosSolver();
    solver.solve(corpus, desiredRank*5, lanczosEigenVectors, lanczosEigenValues);
   
    for (TimingSection section : LanczosSolver.TimingSection.values())
    {
      System.out.println("Lanczos " + section.toString() + " = " + (int)(solver.getTimeMillis(section)/1000) + " seconds");
    }
   
   // System.out.println("\nLanczos results:");
   // printEigenVerify(lanczosEigenVectors, corpus);
  }
 
  private DoubleMatrix2D asSparseDoubleMatrix2D(Matrix corpus)
  {
    DoubleMatrix2D result = new DenseDoubleMatrix2D(corpus.numRows(), corpus.numRows());
    for (int i=0; i<corpus.numRows(); i++) {
      for (int j=i; j<corpus.numRows(); j++) {
        double v = corpus.getRow(i).dot(corpus.getRow(j));
        result.set(i, j, v);
        result.set(j, i, v);
      }
    }
    return result;
  }


  public static void printEigenVerify(DoubleMatrix eigens, DoubleMatrix corpus)
  {
    for (Map.Entry<Integer, MapVector> entry : eigens)
    {
      MapVector eigen = entry.getValue();
      MapVector afterMultiply = corpus.timesSquared(eigen);
      double norm = afterMultiply.norm();
      double error = 1 - eigen.dot(afterMultiply) / (eigen.norm() * afterMultiply.norm());
      System.out.println(entry.getKey() + ": error = " + error + ", eVal = " + (norm / eigen.norm()));
    }
  }
    */

TOP

Related Classes of org.apache.mahout.math.decomposer.hebbian.TestHebbianSolver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.