"--cleansvd", "true"
};
new DistributedLanczosSolver().new DistributedLanczosSolverJob().run(args);
Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
Matrix eigenVectors = new DenseMatrix(6, corpus.numCols());
Collection<Double> eigenvalues = Lists.newArrayList();
output = getTestTempDirPath("output2");
tmp = getTestTempDirPath("tmp2");
args = new String[] {
"-i", new Path(testData, "distMatrix").toString(),
"-o", output.toString(),
"--tempDir", tmp.toString(),
"--numRows", "10",
"--numCols", "9",
"--rank", "7",
"--symmetric", "false",
"--cleansvd", "true"
};
new DistributedLanczosSolver().new DistributedLanczosSolverJob().run(args);
Path cleanEigenvectors2 = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
Matrix eigenVectors2 = new DenseMatrix(7, corpus.numCols());
Configuration conf = new Configuration();
Collection<Double> newEigenValues = Lists.newArrayList();
int i = 0;
for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors, conf)) {
NamedVector v = (NamedVector) value.get();
eigenVectors.assignRow(i, v);
log.info(v.getName());
if(EigenVector.getCosAngleError(v.getName()) < 1.0e-3) {
eigenvalues.add(EigenVector.getEigenValue(v.getName()));
}
i++;
}
assertEquals("number of clean eigenvectors", 3, i);
i = 0;
for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors2, conf)) {
NamedVector v = (NamedVector) value.get();
log.info(v.getName());
eigenVectors2.assignRow(i, v);
newEigenValues.add(EigenVector.getEigenValue(v.getName()));
i++;
}
Collection<Integer> oldEigensFound = Lists.newArrayList();
for(int row = 0; row < eigenVectors.numRows(); row++) {
Vector oldEigen = eigenVectors.viewRow(row);
if(oldEigen == null) {
break;
}
for(int newRow = 0; newRow < eigenVectors2.numRows(); newRow++) {
Vector newEigen = eigenVectors2.viewRow(newRow);
if(newEigen != null) {
if(oldEigen.dot(newEigen) > 0.9) {
oldEigensFound.add(row);
break;
}