IntArrayList columns, int iterations)
{
// Prepare selected matrix
final DoubleMatrix2D selected = input.viewSelection(null, columns.toArray())
.copy();
final IntIntMap selectedToInput = new IntIntOpenHashMap(selected.columns());
for (int i = 0; i < columns.size(); i++)
{
selectedToInput.put(i, columns.get(i));
}
// Prepare results holders
List<IntArrayList> result = Lists.newArrayList();
List<IntArrayList> previousResult = null;
for (int i = 0; i < partitions; i++)
{
result.add(new IntArrayList(selected.columns()));
}
for (int i = 0; i < selected.columns(); i++)
{
result.get(i % partitions).add(i);
}
// Matrices for centroids and document-centroid similarities
final DoubleMatrix2D centroids = new DenseDoubleMatrix2D(selected.rows(),
partitions).assign(selected.viewPart(0, 0, selected.rows(), partitions));
final DoubleMatrix2D similarities = new DenseDoubleMatrix2D(partitions,
selected.columns());
// Run a fixed number of K-means iterations
for (int it = 0; it < iterations; it++)
{
// Update centroids
for (int i = 0; i < result.size(); i++)
{
final IntArrayList cluster = result.get(i);
for (int k = 0; k < selected.rows(); k++)
{
double sum = 0;
for (int j = 0; j < cluster.size(); j++)
{
sum += selected.get(k, cluster.get(j));
}
centroids.setQuick(k, i, sum / cluster.size());
}
}
if (it < iterations - 1)
{
previousResult = result;
result = Lists.newArrayList();
for (int i = 0; i < partitions; i++)
{
result.add(new IntArrayList(selected.columns()));
}
}
// Calculate similarity to centroids
centroids.zMult(selected, similarities, 1, 0, true, false);
// Assign documents to the nearest centroid
for (int c = 0; c < similarities.columns(); c++)
{
int maxRow = 0;
double max = similarities.get(0, c);
for (int r = 1; r < similarities.rows(); r++)
{
if (max < similarities.get(r, c))
{
max = similarities.get(r, c);
maxRow = r;
}
}
result.get(maxRow).add(c);
}
if (ObjectUtils.equals(previousResult, result))
{
// Unchanged result
break;
}
}
// Map the results back to the global indices
for (Iterator<IntArrayList> it = result.iterator(); it.hasNext();)
{
final IntArrayList cluster = it.next();
if (cluster.isEmpty())
{
it.remove();
}
else
{
for (int j = 0; j < cluster.size(); j++)
{
cluster.set(j, selectedToInput.get(cluster.get(j)));
}
}
}
return result;