Examples of org.apache.mahout.math.hadoop.DistributedRowMatrix

org.apache.mahout.math.hadoop.DistributedRowMatrix
path must already contain an already created SequenceFile! DistributedRowMatrix m = new DistributedRowMatrix("path/to/vector/sequenceFile", "tmp/path", 10000000, 250000); m.configure(new JobConf()); // now if we want to multiply a vector by this matrix, it's dimension must equal the row dimension of this // matrix. If we want to timesSquared() a vector by this matrix, its dimension must equal the column dimension // of the matrix. Vector v = new DenseVector(250000); // now the following operation will be done via a M/R pass via Hadoop. Vector w = m.timesSquared(v);

    JobConf conf = new JobConf(config);
    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5, 0.0, true, conf);
    Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);


    // now multiply the testdata matrix and the eigenvector matrix
    DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
    svdT.configure(conf);
    DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
    a.configure(conf);
    DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
    sData.configure(conf);


    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
    // now run the KMeans job
    KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);
    // run ClusterDumper
    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
    clusterDumper.printClusters(termDictionary);
  }

View Full Code Here

                       int numCols, 
                       Vector b, 
                       Preconditioner preconditioner, 
                       int maxIterations, 
                       double maxError) {
    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, tempPath, numRows, numCols);
    matrix.setConf(conf);
        
    return solve(matrix, b, preconditioner, maxIterations, maxError);
  }

View Full Code Here

   */


  private LanczosState doTestDistributedLanczosSolver(boolean symmetric,
      int desiredRank, boolean hdfsBackedState)
      throws IOException {
    DistributedRowMatrix corpus = getCorpus(symmetric);
    Configuration conf = new Configuration();
    corpus.setConf(conf);
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Vector intitialVector = solver.getInitialVector(corpus);
    LanczosState state;
    if(hdfsBackedState) {
      HdfsBackedLanczosState hState = new HdfsBackedLanczosState(corpus,

View Full Code Here

    counter++;
    return state;
  }


  public void doTestResumeIteration(boolean symmetric) throws IOException {
    DistributedRowMatrix corpus = getCorpus(symmetric);
    Configuration conf = new Configuration();
    corpus.setConf(conf);
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    int rank = 10;
    Vector intitialVector = solver.getInitialVector(corpus);
    HdfsBackedLanczosState state = new HdfsBackedLanczosState(corpus, rank,
        intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter));

View Full Code Here

    createTransitionMatrix.setCombinerClass(MergeVectorsCombiner.class);
    createTransitionMatrix.getConfiguration().set(NUM_VERTICES_PARAM, String.valueOf(numVertices));
    createTransitionMatrix.getConfiguration().set(STAYING_PROBABILITY_PARAM, String.valueOf(stayingProbability));
    createTransitionMatrix.waitForCompletion(true);


    DistributedRowMatrix transitionMatrix = new DistributedRowMatrix(transitionMatrixPath, getTempPath(),
        numVertices, numVertices);
    transitionMatrix.setConf(getConf());


    Vector ranking = new DenseVector(numVertices).assign(1.0 / numVertices);
    Vector dampingVector = createDampingVector(numVertices, stayingProbability);


    /* power method: iterative transition-matrix times ranking-vector multiplication */
    while (numIterations-- > 0) {
      ranking = transitionMatrix.times(ranking).plus(dampingVector);
    }


    persistVector(getConf(), getTempPath(RANK_VECTOR), ranking);


    Job vertexWithPageRank = prepareJob(vertexIndexPath, getOutputPath(), SequenceFileInputFormat.class,

View Full Code Here

                             int numRows,
                             int numCols,
                             boolean isSymmetric,
                             int desiredRank,
                             String outputEigenVectorPathString) throws IOException {
    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
    matrix.setConf(new Configuration(originalConfig));
    LanczosState state = new LanczosState(matrix, desiredRank, getInitialVector(matrix));
    return runJob(originalConfig, state, desiredRank, isSymmetric, outputEigenVectorPathString);
  }

View Full Code Here

                 Path workingDirPath,
                 int numRows,
                 int numCols,
                 boolean isSymmetric,
                 int desiredRank) throws Exception {
    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols);
    matrix.setConf(new Configuration(getConf() != null ? getConf() : new Configuration()));


    LanczosState state;
    if(workingDirPath == null) {
      state = new LanczosState(matrix, desiredRank, getInitialVector(matrix));
    } else {
      HdfsBackedLanczosState hState =
          new HdfsBackedLanczosState(matrix, desiredRank, getInitialVector(matrix), workingDirPath);
      hState.setConf(matrix.getConf());
      state = hState;
    }
    solve(state, desiredRank, isSymmetric);


    Path outputEigenVectorPath = new Path(outputPath, RAW_EIGENVECTORS);

View Full Code Here

    this.minEigenValue = minEigenValue;


    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(conf, eigenInput, inMemory);
    }
    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tempOut, 1, 1);
    c.setConf(conf);
    corpus = c;


    // set up eigenverifier and orthoverifier TODO: allow multithreaded execution


    eigenVerifier = new SimpleEigenVerifier();

View Full Code Here

    }
    return eigenMetaData;
  }


  private void prepareEigens(Configuration conf, Path eigenInput, boolean inMemory) {
    DistributedRowMatrix eigens = new DistributedRowMatrix(eigenInput, tmpOut, 1, 1);
    eigens.setConf(conf);
    if (inMemory) {
      List<Vector> eigenVectors = Lists.newArrayList();
      for (MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }

View Full Code Here

    this.maxError = maxError;
    if (eigenInput != null && eigensToVerify == null) {
      prepareEigens(new Configuration(conf), eigenInput, inMemory);
    }


    DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1);
    c.setConf(new Configuration(conf));
    corpus = c;


    eigenVerifier = new SimpleEigenVerifier();


    Map<MatrixSlice, EigenStatus> eigenMetaData = verifyEigens();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.mahout.math.hadoop.DistributedRowMatrix

org.apache.hadoop.conf.Configuration

org.apache.hadoop.fs.FileSystem

org.apache.hadoop.fs.Path

org.apache.hadoop.mapred.JobConf

org.apache.hadoop.mapreduce.Job

org.apache.mahout.clustering.spectral.AffinityMatrixInputJob

org.apache.mahout.clustering.spectral.common.AffinityMatrixInputJob

org.apache.mahout.clustering.spectral.common.VectorMatrixMultiplicationJob

org.apache.mahout.clustering.spectral.eigencuts.EigencutsDriver

org.apache.mahout.clustering.spectral.kmeans.SpectralKMeansDriver

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.