JobConf conf = new JobConf(config);
new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5, 0.0, true, conf);
Path cleanEigenvectors = new Path(output, EigenVerificationJob.CLEAN_EIGENVECTORS);
// now multiply the testdata matrix and the eigenvector matrix
DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors, tmp, desiredRank - 1, sampleDimension);
svdT.configure(conf);
DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp, sampleData.size(), sampleDimension);
a.configure(conf);
DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
sData.configure(conf);
// now run the Canopy job to prime kMeans canopies
CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false, false);
// now run the KMeans job
KMeansDriver.run(sData.getRowPath(), new Path(output, "clusters-0"), output, measure, 0.001, 10, true, false);
// run ClusterDumper
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
clusterDumper.printClusters(termDictionary);
}