final Set<Document> partitionDocuments = documentsByPartition.get(partition);
final int partitionDocumentsCount = partitionDocuments.size();
double partitionFMeasure = 0;
double partitionPrecision = 0;
double partitionRecall = 0;
Cluster bestFMeasureCluster = null;
for (Cluster cluster : clusters)
{
final List<Document> clusterDocuments = cluster.getAllDocuments();
if (cluster.isOtherTopics() || clusterDocuments.size() == 0)
{
continue;
}
final Set<Document> commonDocuments = Sets.newHashSet(partitionDocuments);
commonDocuments.retainAll(clusterDocuments);
final double precision = commonDocuments.size()
/ (double) clusterDocuments.size();
final double recall = commonDocuments.size()
/ (double) partitionDocumentsCount;
final double fMeasure = MathUtils.harmonicMean(precision, recall);
if (fMeasure > partitionFMeasure)
{
partitionFMeasure = fMeasure;
partitionPrecision = precision;
partitionRecall = recall;
bestFMeasureCluster = cluster;
}
}
recallSum += partitionRecall * partitionDocumentsCount;
precisionSum += partitionPrecision * partitionDocumentsCount;
fMeasureSum += partitionFMeasure * partitionDocumentsCount;
partitionDocumentsCountSum += partitionDocumentsCount;
recallByPartition.put(partition, partitionRecall);
precisionByPartition.put(partition, partitionPrecision);
fMeasureByPartition.put(partition, partitionFMeasure);
if (bestFMeasureCluster != null)
{
bestFMeasureCluster.setAttribute(BEST_F_MEASURE_PARTITION, partition);
}
}
// Dividing by partitionDocumentsCountSum rather than by the number of documents
// because partitionDocumentsCountSum can be larger than the number of documents