public OutlierResult run(Database database) throws IllegalStateException {
Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
for(DBID id : relation.iterDBIDs()) {
List<DistanceResultPair<D>> neighbors = rangeQuery.getRangeForDBID(id, rmax);
// build list of critical distances
ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
{
for(int i = 0; i < neighbors.size(); i++) {
DistanceResultPair<D> r = neighbors.get(i);
if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
continue;
}
cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i));
final double ri = r.getDistance().doubleValue() / alpha;
if(ri <= rmax.doubleValue()) {
cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE));
}
}
}
Collections.sort(cdist);
// fill the gaps to have fast lookups of number of neighbors at a given
// distance.
int lastk = 0;
for(DoubleIntPair c : cdist) {
if(c.second == Integer.MIN_VALUE) {
c.second = lastk;
}
else {
lastk = c.second;
}
}
interestingDistances.put(id, cdist);
if(progressPreproc != null) {
progressPreproc.incrementProcessed(logger);
}
}
if(progressPreproc != null) {
progressPreproc.ensureCompleted(logger);
}
// LOCI main step
FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class, Double.class);
WritableDataStore<Double> mdef_norm = store.getStorage(0, Double.class);
WritableDataStore<Double> mdef_radius = store.getStorage(1, Double.class);
for(DBID id : relation.iterDBIDs()) {
double maxmdefnorm = 0.0;
double maxnormr = 0;
List<DoubleIntPair> cdist = interestingDistances.get(id);
double maxdist = cdist.get(cdist.size() - 1).first;
int maxneig = cdist.get(cdist.size() - 1).second;
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(id, range);
for(DoubleIntPair c : cdist) {
double alpha_r = alpha * c.first;
// compute n(p_i, \alpha * r) from list
int n_alphar = 0;
for(DoubleIntPair c2 : cdist) {
if(c2.first <= alpha_r) {
n_alphar = c2.second;
}
else {
break;
}
}
// compute \hat{n}(p_i, r, \alpha)
double nhat_r_alpha = 0.0;
double sigma_nhat_r_alpha = 0.0;
// Build the sublist from maxneighbors to match the radius c.first
List<DistanceResultPair<D>> rneighbors = null;
for(int i = nmin; i < maxneighbors.size(); i++) {
DistanceResultPair<D> ne = maxneighbors.get(i);
if(ne.getDistance().doubleValue() > c.first) {
rneighbors = maxneighbors.subList(1, i);
break;
}
}
if(rneighbors == null) {
continue;
}
for(DistanceResultPair<D> rn : rneighbors) {
List<DoubleIntPair> rncdist = interestingDistances.get(rn.getDBID());
int rn_alphar = 0;
for(DoubleIntPair c2 : rncdist) {
if(c2.first <= alpha_r) {
rn_alphar = c2.second;
}
else {
break;
}
}
nhat_r_alpha = nhat_r_alpha + rn_alphar;
sigma_nhat_r_alpha = sigma_nhat_r_alpha + (rn_alphar * rn_alphar);
}
// finalize average and deviation
nhat_r_alpha = nhat_r_alpha / rneighbors.size();
sigma_nhat_r_alpha = Math.sqrt(sigma_nhat_r_alpha / rneighbors.size() - nhat_r_alpha * nhat_r_alpha);
double mdef = 1.0 - (n_alphar / nhat_r_alpha);
double sigmamdef = sigma_nhat_r_alpha / nhat_r_alpha;
double mdefnorm = mdef / sigmamdef;
if(mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = c.first;
}
}
}
else {
// FIXME: when nmin was never fulfilled - what is the proper value then?
maxmdefnorm = 1.0;
maxnormr = maxdist;
}
mdef_norm.put(id, maxmdefnorm);
mdef_radius.put(id, maxnormr);
if(progressLOCI != null) {
progressLOCI.incrementProcessed(logger);
}
}
if(progressLOCI != null) {
progressLOCI.ensureCompleted(logger);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
// TODO: actually provide min and max?
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(Double.NaN, Double.NaN, 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);