* @param distFunc the distance function
* @param clusterOrder the cluster order to extract the clusters from
* @return the extracted clusters
*/
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), logger) : null;
int processed = 0;
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
for(Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
V object = database.get(entry.getID());
BitSet preferenceVector = entry.getReachability().getCommonPreferenceVector();
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
if(parallelClusters == null) {
parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
Pair<BitSet, ArrayModifiableDBIDs> cluster = null;
for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
V c_centroid = DatabaseUtil.centroid(database, c.second, c.first);
PreferenceVectorBasedCorrelationDistance dist = distFunc.correlationDistance(object, c_centroid, preferenceVector, preferenceVector);
if(dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
double d = distFunc.weightedDistance(object, c_centroid, dist.getCommonPreferenceVector());
if(d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
if(cluster == null) {
cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
entryToClusterMap.put(entry.getID(), cluster);
if(progress != null) {
progress.setProcessed(++processed, logger);
}
}
if(progress != null) {
progress.ensureCompleted(logger);
}
if(logger.isDebuggingFiner()) {
StringBuffer msg = new StringBuffer("Step 0");
for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {