private static IntPair pickFirstTwo(Matrix dataPoints,
SimilarityFunction simFunc,
int[] weights, double[] inverseSimilarities) {
double OPT_1 = 0; // optimal 1-means cost.
DoubleVector centerOfMass = new DenseVector(dataPoints.columns());
double sum = 0d;
int rows = dataPoints.rows();
int cols = dataPoints.columns();
double[] probs = new double[rows];
int totalWeight = 0;
for (int i = 0; i < rows; i++) {
DoubleVector v = dataPoints.getRowVector(i);
int weight = weights[i];
// Update the center of mass for the entire solution based
VectorMath.add(centerOfMass, new ScaledDoubleVector(v, weight));
totalWeight += weight;
}
// Then rescale the center of mass based on the total weight
for (int j = 0; j < cols; j++)
centerOfMass.set(j, centerOfMass.get(j) / totalWeight);
for (int i = 0; i < rows; i++) {
double sim = simFunc.sim(centerOfMass, dataPoints.getRowVector(i));
sim = invertSim(sim);
inverseSimilarities[i] = sim;
OPT_1 += sim * weights[i];
}
// Compute the probability mass of picking the first mean
for (int i = 0; i < rows; i++) {
probs[i] = (OPT_1 + totalWeight * inverseSimilarities[i])
/ (2 * totalWeight * OPT_1);
sum += probs[i];
}
// Normalize the relative mass assigned to each point to create a true
// probability distribution that sums to 1.
for (int i = 0; i < rows; i++)
probs[i] = probs[i] / sum;
// Select the first center with probability proportional to its
// dissimilarity from the center of mass
int c1 = selectWithProb(probs);
DoubleVector y = dataPoints.getRowVector(c1);
// Keep the inverse similarity from the first center to the center of
// mass
double invSimFromCtrToC1 = invertSim(simFunc.sim(y, centerOfMass));
// Recalculate inverseSimilarities and probs for selecting the second point. Also
// reset the probability of picking the first center again to 0
sum = 0.0;
probs[c1] = 0;
for (int i = 0; i < rows; i++) {
// Skip assigning any probability mass to the first center's index
// since it has already been selected
if (i == c1)
continue;
double sim = invertSim(simFunc.sim(dataPoints.getRowVector(i), y))
* weights[i];
inverseSimilarities[i] = sim;
probs[i] = sim / ( OPT_1 + totalWeight * invSimFromCtrToC1);
sum += probs[i];
}
// Normalize the probability masses to be probabilities
for (int i = 0; i < rows; i++)
probs[i] = probs[i] / sum;
// Select a second center
int c2 = selectWithProb(probs);
DoubleVector z = dataPoints.getRowVector(c2);
inverseSimilarities[c1] = 0;
inverseSimilarities[c2] = 0;
// For each of the non-center points, assign it's initial inverse
// similarity (i.e., distance) to be the minimum to either of the two
// centers
for (int i = 0; i < rows; i++) {
DoubleVector v = dataPoints.getRowVector(i);
double sim1 = simFunc.sim(v, y); // center 1
sim1 = invertSim(sim1);
double sim2 = simFunc.sim(v, z); // center 2
sim2 = invertSim(sim2);
inverseSimilarities[i] = Math.min(sim1, sim2);