* Performs the LoOP algorithm on the given database.
*/
public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
final double sqrt2 = Math.sqrt(2.0);
StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O, D> knnComp = pair.getFirst();
KNNQuery<O, D> knnReach = pair.getSecond();
// Assert we got something
if(knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
if(knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
{// computing PRDs
if(stepprog != null) {
stepprog.beginStep(3, "Computing pdists", logger);
}
FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
final KNNResult<D> neighbors = knnReach.getKNNForDBID(id, kreach);
double sqsum = 0.0;
// use first kref neighbors as reference set
int ks = 0;
for(DistanceResultPair<D> neighbor : neighbors) {
if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
double d = neighbor.getDistance().doubleValue();
sqsum += d * d;
ks++;
if(ks >= kreach) {
break;
}
}
}
double pdist = lambda * Math.sqrt(sqsum / ks);
pdists.putDouble(id, pdist);
if(prdsProgress != null) {
prdsProgress.incrementProcessed(logger);
}
}
}
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
MeanVariance mvplof = new MeanVariance();
{// compute LOOP_SCORE of each db object
if(stepprog != null) {
stepprog.beginStep(4, "Computing PLOF", logger);
}
FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
final KNNResult<D> neighbors = knnComp.getKNNForDBID(id, kcomp);
MeanVariance mv = new MeanVariance();
// use first kref neighbors as comparison set.
int ks = 0;
for(DistanceResultPair<D> neighbor1 : neighbors) {
if(objectIsInKNN || !neighbor1.getDBID().equals(id)) {
mv.put(pdists.doubleValue(neighbor1.getDBID()));
ks++;
if(ks >= kcomp) {
break;
}
}
}
double plof = Math.max(pdists.doubleValue(id) / mv.getMean(), 1.0);
if(Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
plofs.putDouble(id, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
if(progressPLOFs != null) {
progressPLOFs.incrementProcessed(logger);
}
}
}
double nplof = lambda * Math.sqrt(mvplof.getMean());
if(logger.isDebugging()) {
logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
}
// Compute final LoOP values.
WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
{// compute LOOP_SCORE of each db object
if(stepprog != null) {
stepprog.beginStep(5, "Computing LoOP scores", logger);
}
FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
for(DBID id : relation.iterDBIDs()) {
loops.putDouble(id, NormalDistribution.erf((plofs.doubleValue(id) - 1) / (nplof * sqrt2)));
if(progressLOOPs != null) {
progressLOOPs.incrementProcessed(logger);
}
}
}
if(stepprog != null) {
stepprog.setCompleted(logger);
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();