// Convert the data points to a sparse matrix.
SparseMatrix contexts = Matrices.asSparseMatrix(contextSet);
// Cluster the context set.
LOG.info("Clustering term: " + senseName);
Assignments assignments = (numClusters > 0)
? clustering.cluster(contexts, numClusters, props)
: clustering.cluster(contexts, props);
LOG.info("Finished clustering term: " + senseName);
SparseDoubleVector[] centroids = assignments.getSparseCentroids();
// Add the centroids to the splitSenses map.
for (int index = 0; index < centroids.length; ++index) {
String sense = (index > 0)
? senseName + "-" + index
: senseName;
wordSpace.put(sense, centroids[index]);
}
LOG.info("Finished creating centroids for term: " + senseName);
// Empty out the stored contexts to free up memory for later processes.
contextSet.clear();
// If the reporter is null, avoid making any report.
if (reporter == null)
return;
// Generate the secondary context labels for each data point.
String[] contextLabels = reporter.contextLabels(senseName);
if (contextLabels.length == 0)
return;
LOG.info("Making assignment report: " + senseName);
// Report the assignments for each clustered data point. Note that some
// data points might not have been clustered (Cluto based clustering
// does this on occasion) so we must check for the number of assignments
// first.
for (int i = 0; i < assignments.size(); ++i)
if (assignments.get(i).assignments().length > 0)
reporter.updateAssignment(senseName, contextLabels[i],
assignments.get(i).assignments()[0]);
LOG.info("Finished making assignment report: " + senseName);
}