System.out.println("testKFuzzyKMeansMRJob k= " + k);
// pick k initial cluster centers at random
List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = tweakValue(points.get(i));
SoftCluster cluster = new SoftCluster(vec, i);
cluster.addPoint(cluster.getCenter(), 1);
clusterList.add(cluster);
}
// run mapper
FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
mapper.config(clusterList);
DummyOutputCollector<Text, FuzzyKMeansInfo> mapCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
for (Vector point : points) {
mapper.map(new Text(), point, mapCollector,
null);
}
for (SoftCluster softCluster : clusterList) {
softCluster.recomputeCenter();
}
// run combiner
DummyOutputCollector<Text, FuzzyKMeansInfo> combinerCollector = new DummyOutputCollector<Text, FuzzyKMeansInfo>();
FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
//combiner.configure();
for (String key : mapCollector.getKeys()) {
List<FuzzyKMeansInfo> values = mapCollector.getValue(key);
combiner.reduce(new Text(key), values.iterator(), combinerCollector,
null);
}
// run reducer
DummyOutputCollector<Text, SoftCluster> reducerCollector = new DummyOutputCollector<Text, SoftCluster>();
FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
reducer.config(clusterList);
for (String key : combinerCollector.getKeys()) {
List<FuzzyKMeansInfo> values = combinerCollector.getValue(key);
reducer
.reduce(new Text(key), values.iterator(), reducerCollector, null);
}
// run clusterMapper
List<SoftCluster> reducerCluster = new ArrayList<SoftCluster>();
for (String key : reducerCollector.getKeys()) {
List<SoftCluster> values = reducerCollector.getValue(key);
reducerCluster.add(values.get(0));
}
for (SoftCluster softCluster : reducerCluster) {
softCluster.recomputeCenter();
}
DummyOutputCollector<Text, FuzzyKMeansOutput> clusterMapperCollector = new DummyOutputCollector<Text, FuzzyKMeansOutput>();
FuzzyKMeansClusterMapper clusterMapper = new FuzzyKMeansClusterMapper();
clusterMapper.config(reducerCluster);
for (Vector point : points) {
clusterMapper.map(new Text(), point,
clusterMapperCollector, null);
}
// now run for one iteration of referencefuzzykmeans and compare the
// results
// compute the reference result after one iteration and compare
List<SoftCluster> reference = new ArrayList<SoftCluster>();
for (int i = 0; i < k + 1; i++) {
Vector vec = tweakValue(points.get(i));
reference.add(new SoftCluster(vec, i));
}
Map<String, String> pointClusterInfo = new HashMap<String, String>();
referenceFuzzyKMeans(points, reference, pointClusterInfo,
EuclideanDistanceMeasure.class.getName(), 0.001, 1);