public OutlierResult run(Database database) throws IllegalStateException {
Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
for(DBID id : relation.iterDBIDs()) {
List<DistanceResultPair<D>> neighbors = rangeQuery.getRangeForDBID(id, rmax);
// build list of critical distances
ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
{
for(int i = 0; i < neighbors.size(); i++) {
DistanceResultPair<D> r = neighbors.get(i);
if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
continue;
}
cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i));
final double ri = r.getDistance().doubleValue() / alpha;
if(ri <= rmax.doubleValue()) {
cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE));
}
}
}
Collections.sort(cdist);
// fill the gaps to have fast lookups of number of neighbors at a given
// distance.
int lastk = 0;
for(DoubleIntPair c : cdist) {
if(c.second == Integer.MIN_VALUE) {
c.second = lastk;
}
else {
lastk = c.second;
}
}
interestingDistances.put(id, cdist);
if(progressPreproc != null) {
progressPreproc.incrementProcessed(logger);
}
}
if(progressPreproc != null) {
progressPreproc.ensureCompleted(logger);
}
// LOCI main step
FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for(DBID id : relation.iterDBIDs()) {
final List<DoubleIntPair> cdist = interestingDistances.get(id);
final double maxdist = cdist.get(cdist.size() - 1).first;
final int maxneig = cdist.get(cdist.size() - 1).second;
double maxmdefnorm = 0.0;
double maxnormr = 0;
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(id, range);
// Ensure the set is sorted. Should be a no-op with most indexes.
Collections.sort(maxneighbors);
// For any critical distance, compute the normalized MDEF score.
for(DoubleIntPair c : cdist) {
// Only start when minimum size is fulfilled
if (c.second < nmin) {
continue;
}
final double r = c.first;
final double alpha_r = alpha * r;
// compute n(p_i, \alpha * r) from list (note: alpha_r is different from c!)
final int n_alphar = elementsAtRadius(cdist, alpha_r);
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
MeanVariance mv_n_r_alpha = new MeanVariance();
for(DistanceResultPair<D> ne : maxneighbors) {
// Stop at radius r
if(ne.getDistance().doubleValue() > r) {
break;
}
int rn_alphar = elementsAtRadius(interestingDistances.get(ne.getDBID()), alpha_r);
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
// Redundant divisions removed.
final double mdef = (nhat_r_alpha - n_alphar); // / nhat_r_alpha;
final double sigmamdef = sigma_nhat_r_alpha; // / nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if(mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = r;
}
}
}
else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = 1.0;
maxnormr = maxdist;
}
mdef_norm.putDouble(id, maxmdefnorm);
mdef_radius.putDouble(id, maxnormr);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
progressLOCI.incrementProcessed(logger);
}
}
if(progressLOCI != null) {
progressLOCI.ensureCompleted(logger);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));