int sampleDimension = sampleData.get(0).get().size();
solver.run(testData, output, tmp, sampleData.size(), sampleDimension, false, desiredRank, 0.5, 0.0, true);
Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
// build in-memory data matrix A
Matrix a = new DenseMatrix(sampleData.size(), sampleDimension);
int i = 0;
for (VectorWritable vw : sampleData) {
a.assignRow(i++, vw.get());
}
// extract the eigenvectors into P
Matrix p = new DenseMatrix(39, desiredRank - 1);
FileSystem fs = FileSystem.get(cleanEigenvectors.toUri(), conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, cleanEigenvectors, conf);
try {
Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
i = 0;
while (reader.next(key, value)) {
Vector v = ((VectorWritable) value).get();
p.assignColumn(i, v);
System.out.println("k=" + key.toString() + " V=" + AbstractCluster.formatVector(v, termDictionary));
value = reader.getValueClass().asSubclass(Writable.class).newInstance();
i++;
}
} finally {
reader.close();
}
// sData = A P
Matrix sData = a.times(p);
// now write sData back to file system so clustering can run against it
Path svdData = new Path(output, "svddata");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, svdData, IntWritable.class, VectorWritable.class);
try {
IntWritable key = new IntWritable();
VectorWritable value = new VectorWritable();
for (int row = 0; row < sData.numRows(); row++) {
key.set(row);
value.set(sData.getRow(row));
writer.append(key, value);
}
} finally {
writer.close();
}