HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();
// choose initial center uniformly at random
int index = randomO.nextInt(data.numInstances());
m_ClusterCentroids.add(data.instance(index));
DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index),
data.numAttributes(), true);
initC.put(hk, null);
int iteration = 0;
int remainingInstances = data.numInstances() - 1;
if (m_NumClusters > 1) {
// proceed with selecting the rest
// distances to the initial randomly chose center
double[] distances = new double[data.numInstances()];
double[] cumProbs = new double[data.numInstances()];
for (int i = 0; i < data.numInstances(); i++) {
distances[i] =
m_DistanceFunction.distance(data.instance(i),
m_ClusterCentroids.instance(iteration));
}
// now choose the remaining cluster centers
for (int i = 1; i < m_NumClusters; i++) {
// distances converted to probabilities
double[] weights = new double[data.numInstances()];
System.arraycopy(distances, 0, weights, 0, distances.length);
Utils.normalize(weights);
double sumOfProbs = 0;
for (int k = 0; k < data.numInstances(); k++) {
sumOfProbs += weights[k];
cumProbs[k] = sumOfProbs;
}
cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no rounding issues
// choose a random instance
double prob = randomO.nextDouble();
for (int k = 0; k < cumProbs.length; k++) {
if (prob < cumProbs[k]) {
Instance candidateCenter = data.instance(k);
hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true);
if (!initC.containsKey(hk)) {
initC.put(hk, null);
m_ClusterCentroids.add(candidateCenter);
} else {
// we shouldn't get here because any instance that is a duplicate of