return Collections.singletonList(mostSimilarCommittee);
}
else {
// Make a copy of the row because will be changing the vector as we
// assign it to more committees
SparseDoubleVector copy = new CompactSparseVector(row);
// let C be a list of clusters initially empty
List<Integer> assignedClusters = new ArrayList<Integer>();
// let S be the top-200 similar clusters to e
MultiMap<Double,Duple<Committee,Integer>> mostSimilarCommittees =
new BoundedSortedMultiMap<Double,Duple<Committee,Integer>>(200);
// for (Committee c : committees)
for (int i = 0; i < committees.size(); ++i) {
Committee c = committees.get(i);
mostSimilarCommittees.put(
Similarity.cosineSimilarity(row, c.centroid()),
new Duple<Committee,Integer>(c, i));
}
// System.out.println("Most similar committees: " +
// mostSimilarCommittees);
// while S is not empty {
// let c be the most similar cluster to e
for (Duple<Committee,Integer> p : mostSimilarCommittees.values()) {
Committee c = p.x;
Integer comId = p.y;
SparseDoubleVector centroid = c.centroid();
// if the similarity(e, c) < SIGMA, exit the loop
if (Similarity.cosineSimilarity(copy, centroid) < 0) {
// NOTE: we intentionally don't exit the loop
continue;
}
// if c is not similar to any cluster in C {
boolean isSimilar = false;
for (Integer committeeId : assignedClusters) {
Committee c2 = committees.get(committeeId);
if (Similarity.cosineSimilarity(c2.centroid(), centroid)
>= softClusteringThresh) {
isSimilar = true;
break;
}
}
if (!isSimilar) {
// assign e to c
assignedClusters.add(comId);
// remove from e its features that overlap with the features of
// c; remove c from S
for (int i : centroid.getNonZeroIndices()) {
copy.set(i, 0);
}
}
}
return assignedClusters;
}