// be the case. Note, though, that the final clusters with document
// vectors will be in a different file.
Path kmeansClusters = new Path(output + "/canopy/clusters-0");
try {
CanopyDriver.run(conf, canopyInputPath, canopyOutputPath, new CosineDistanceMeasure(), t1, t2, true, false);
} catch (Exception e) {
LOG.error("Failure running mahout canopy.", e);
return 1;
}
// The convergencedelta and maxiterations affect how long kmeans will
// take to run and how many times we run the algorithm before we give
// up. The numbers we are using here seem to give reasonably good
// results.
try {
KMeansDriver.run(conf, kmeansInputPath, kmeansClusters, kmeansOutputPath, new CosineDistanceMeasure(), .5, 20, true, false);
} catch (Exception e) {
LOG.error("Failure running mahout kmeans.", e);
return 2;
}