final DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final StepProgress stepprog = logger.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelClustering()).run(database).getAllClusters();
// global in-cluster min/max
DoubleMinMax giminmax = new DoubleMinMax();
// global other-cluster min/max
DoubleMinMax gominmax = new DoubleMinMax();
// in-cluster distances
MeanVariance mimin = new MeanVariance();
MeanVariance mimax = new MeanVariance();
MeanVariance midif = new MeanVariance();
// other-cluster distances
MeanVariance momin = new MeanVariance();
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
final AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>> histogram;
if(stepprog != null) {
stepprog.beginStep(1, "Prepare histogram.", logger);
}
if(exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
}
else if(sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
}
else {
histogram = FlexiHistogram.LongSumLongSumHistogram(numbin);
}
if(stepprog != null) {
stepprog.beginStep(2, "Build histogram.", logger);
}
final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), logger) : null;
// iterate per cluster
final Pair<Long, Long> incFirst = new Pair<Long, Long>(1L, 0L);
final Pair<Long, Long> incSecond = new Pair<Long, Long>(0L, 1L);
for(Cluster<?> c1 : split) {
for(DBID id1 : c1.getIDs()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
for(DBID id2 : c1.getIDs()) {
// skip the point itself.
if(id1 == id2) {
continue;
}
double d = distFunc.distance(id1, id2).doubleValue();
histogram.aggregate(d, incFirst);
iminmax.put(d);
}
// aggregate
mimin.put(iminmax.getMin());
mimax.put(iminmax.getMax());
midif.put(iminmax.getDiff());
// min/max
giminmax.put(iminmax.getMin());
giminmax.put(iminmax.getMax());
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
for(Cluster<?> c2 : split) {
if(c2 == c1) {
continue;
}
for(DBID id2 : c2.getIDs()) {
// skip the point itself (shouldn't happen though)
if(id1 == id2) {
continue;
}
double d = distFunc.distance(id1, id2).doubleValue();
histogram.aggregate(d, incSecond);
ominmax.put(d);
}
}
// aggregate
momin.put(ominmax.getMin());
momax.put(ominmax.getMax());
modif.put(ominmax.getDiff());
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
if(progress != null) {
progress.incrementProcessed(logger);
}
}
}