System.out.println("K = " + k);
// pick k initial cluster centers at random
DummyOutputCollector<Text,KMeansInfo> collector = new DummyOutputCollector<Text,KMeansInfo>();
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = points.get(i).get();
Cluster cluster = new Cluster(vec, i);
// add the center so the centroid will be correct upon output
// cluster.addPoint(cluster.getCenter());
clusters.add(cluster);
}
mapper.config(clusters);
// map the data
for (VectorWritable point : points) {
mapper.map(new Text(), point, collector, null);
}
// now combine the data
KMeansCombiner combiner = new KMeansCombiner();
DummyOutputCollector<Text,KMeansInfo> collector2 = new DummyOutputCollector<Text,KMeansInfo>();
for (String key : collector.getKeys()) {
combiner.reduce(new Text(key), collector.getValue(key).iterator(), collector2, null);
}
// now reduce the data
KMeansReducer reducer = new KMeansReducer();
reducer.configure(conf);
reducer.config(clusters);
DummyOutputCollector<Text,Cluster> collector3 = new DummyOutputCollector<Text,Cluster>();
for (String key : collector2.getKeys()) {
reducer.reduce(new Text(key), collector2.getValue(key).iterator(), collector3, new DummyReporter());
}
assertEquals("Number of map results", k + 1, collector3.getData().size());
// compute the reference result after one iteration and compare
List<Cluster> reference = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = points.get(i).get();
reference.add(new Cluster(vec, i));
}
List<Vector> pointsVectors = new ArrayList<Vector>();
for(VectorWritable point : points)
pointsVectors.add(point.get());