for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) {
mapperCentroids.add(pair.getSecond().getCentroid());
}
// Clusters the data using local batch StreamingKMeans.
StreamingKMeans batchClusterer =
new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF);
batchClusterer.cluster(syntheticData.getFirst());
List<Centroid> batchCentroids = Lists.newArrayList();
for (Vector v : batchClusterer) {
batchCentroids.add((Centroid) v);
}
// Clusters the data using point by point StreamingKMeans.
StreamingKMeans perPointClusterer =
new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
(1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);
for (Centroid datapoint : syntheticData.getFirst()) {
perPointClusterer.cluster(datapoint);
}
List<Centroid> perPointCentroids = Lists.newArrayList();
for (Vector v : perPointClusterer) {
perPointCentroids.add((Centroid) v);
}