String vectorsFolder = outputDir + "/tfidf-vectors";
String canopyCentroids = outputDir + "/canopy-centroids";
String clusterOutput = outputDir + "/clusters/";
CanopyDriver.run(conf, new Path(vectorsFolder), new Path(canopyCentroids),
new ManhattanDistanceMeasure(), 3000.0, 2000.0, false, false);
FuzzyKMeansDriver.run(conf, new Path(vectorsFolder), new Path(canopyCentroids, "clusters-0"), new Path(clusterOutput),
new TanimotoDistanceMeasure(), 0.01, 20, 2.0f, true, true, 0.0, false);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(