final String clusterInput = (args.length > 2 ? args[2] : "");
final String output = (args.length > 3 ? args[3] : "");
final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);
// create DataSourceContract for cluster center input
FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
initialClusterPoints.setDegreeOfParallelism(1);
BulkIteration iteration = new BulkIteration("K-Means Loop");
iteration.setInput(initialClusterPoints);
iteration.setMaximumNumberOfIterations(numIterations);
// create DataSourceContract for data point input
FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");
// create CrossOperator for distance computation
CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
.input1(dataPoints)
.input2(iteration.getPartialSolution())
.name("Compute Distances")
.build();
// create ReduceOperator for finding the nearest cluster centers
ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
.input(computeDistance)
.name("Find Nearest Centers")
.build();
// create ReduceOperator for computing new cluster positions
ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
.input(findNearestClusterCenters)
.name("Recompute Center Positions")
.build();
iteration.setNextPartialSolution(recomputeClusterCenter);
// create DataSourceContract for data point input
FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
// compute distance of points to final clusters
CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
.input1(dataPoints2)
.input2(iteration)