Package org.apache.mahout.math.hadoop.decomposer

Examples of org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver


    // upon verification, we have to aim to overshoot and then discard
    // unnecessary vectors later
    int overshoot = (int) ((double) clusters * OVERSHOOT_MULTIPLIER);
    List<Double> eigenValues = new ArrayList<Double>(overshoot);
    Matrix eigenVectors = new DenseMatrix(overshoot, numDims);
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  L.getRowPath(),
                  new Path(outputTmp, "lanczos-" + (System.nanoTime() & 0xFF)),
                  L.numRows(),
                  L.numCols(),
                  true,
View Full Code Here


                                                               DistributedRowMatrix input,
                                                               int numEigenVectors,
                                                               int overshoot,
                                                               List<Double> eigenValues,
                                                               Matrix eigenVectors, Path tmp) throws IOException {
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Path seqFiles = new Path(tmp, "eigendecomp-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  input.getRowPath(),
                  new Path(tmp, "lanczos-" + (System.nanoTime() & 0xFF)),
                  input.numRows(),
                  input.numCols(),
                  true,
View Full Code Here

  public void testKmeansSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 15;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration conf = new Configuration();
    solver.setConf(conf);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    solver.run(testData, output, tmp, sampleData.size(), sampleDimension, false, desiredRank, 0.5, 0.0, true);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);

    // build in-memory data matrix A
    Matrix a = new DenseMatrix(sampleData.size(), sampleDimension);
    int i = 0;
View Full Code Here

  public void testKmeansDSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 13;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // Run EigenVerificationJob from within DistributedLanczosSolver.run(...)
    solver.run(testData, output, tmp, sampleData.size(), sampleDimension, false, desiredRank, 0.5, 0.0, false);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);

    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    JobConf conf = new JobConf(config);
View Full Code Here

  public void testKmeansDSVD2() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 13;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // call EigenVerificationJob separately
    solver.run(testData, output, tmp, sampleData.size(), sampleDimension, false, desiredRank);
    Path rawEigenvectors = new Path(output, DistributedLanczosSolver.RAW_EIGENVECTORS);
    JobConf conf = new JobConf(config);
    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5, 0.0, true, conf);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
View Full Code Here

  // @Test
  public void testKmeansSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration conf = new Configuration();
    solver.setConf(conf);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    int desiredRank = 15;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank, 0.5, 0.0, true);
    Path cleanEigenvectors = new Path(output,
        EigenVerificationJob.CLEAN_EIGENVECTORS);
   
    // build in-memory data matrix A
View Full Code Here

  // @Test
  public void testKmeansDSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // Run EigenVerificationJob from within DistributedLanczosSolver.run(...)
    int desiredRank = 13;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank, 0.5, 0.0, false);
   
    Path cleanEigenvectors = new Path(output,
        EigenVerificationJob.CLEAN_EIGENVECTORS);
   
View Full Code Here

  // @Test
  public void testKmeansDSVD2() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // call EigenVerificationJob separately
    int desiredRank = 13;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank);
    Path rawEigenvectors = new Path(output,
        DistributedLanczosSolver.RAW_EIGENVECTORS);
    Configuration conf = new Configuration(config);
    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5,
View Full Code Here

      L.setConf(new Configuration(conf));

      // eigendecomposition (step 3)
      int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
      LanczosState state = new LanczosState(L, eigenrank,
          new DistributedLanczosSolver().getInitialVector(L));

      DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
      U.setConf(new Configuration(conf));
      List<Double> eigenValues = Lists.newArrayList();
      for(int i=0; i<eigenrank; i++) {
View Full Code Here

                                                               DistributedRowMatrix input,
                                                               LanczosState state,
                                                               int numEigenVectors,
                                                               int overshoot,
                                                               Path tmp) throws IOException {
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Path seqFiles = new Path(tmp, "eigendecomp-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  state,
                  overshoot,
                  true,
                  seqFiles.toString());
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.