for (int k = 0; k < points.size(); k++) {
// pick k initial cluster centers at random
DummyOutputCollector<Text, Text> collector = new DummyOutputCollector<Text, Text>();
List<Cluster> clusters = new ArrayList<Cluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = points.get(i);
Cluster cluster = new Cluster(vec);
// add the center so the centroid will be correct upon output
cluster.addPoint(cluster.getCenter());
clusters.add(cluster);
}
mapper.config(clusters);
// map the data
for (Vector point : points) {
mapper.map(new Text(), new Text(point.asFormatString()), collector,
null);
}
// now combine the data
KMeansCombiner combiner = new KMeansCombiner();
DummyOutputCollector<Text, Text> collector2 = new DummyOutputCollector<Text, Text>();
for (String key : collector.getKeys())
combiner.reduce(new Text(key), collector.getValue(key).iterator(),
collector2, null);
assertEquals("Number of map results", k + 1, collector2.getData().size());
// now verify that all points are accounted for
int count = 0;
Vector total = new DenseVector(2);
for (String key : collector2.getKeys()) {
List<Text> values = collector2.getValue(key);
assertEquals("too many values", 1, values.size());
String value = values.get(0).toString();
String[] pointInfo = value.split("\t");
count += Integer.parseInt(pointInfo[0]);
total = total.plus(AbstractVector.decodeVector(pointInfo[1]));
}
assertEquals("total points", 9, count);
assertEquals("point total[0]", 27, (int) total.get(0));
assertEquals("point total[1]", 27, (int) total.get(1));
}
}