Package de.lmu.ifi.dbs.elki.algorithm.outlier

Source Code of de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI$Parameterizer

package de.lmu.ifi.dbs.elki.algorithm.outlier;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableRecordStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;

/**
* Fast Outlier Detection Using the "Local Correlation Integral".
*
* Exact implementation only, not aLOCI.
*
* TODO: add aLOCI
*
* Outlier detection using multiple epsilon neighborhoods.
*
* Based on: S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:
* LOCI: Fast Outlier Detection Using the Local Correlation Integral. In: Proc.
* 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003.
*
* @author Erich Schubert
*
* @apiviz.has RangeQuery
*
* @param <O> Object type
* @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
  /**
   * The logger for this class.
   */
  private static final Logging logger = Logging.getLogger(LOCI.class);

  /**
   * Parameter to specify the maximum radius of the neighborhood to be
   * considered, must be suitable to the distance function specified.
   */
  public static final OptionID RMAX_ID = OptionID.getOrCreateOptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");

  /**
   * Parameter to specify the minimum neighborhood size
   */
  public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");

  /**
   * Parameter to specify the averaging neighborhood scaling.
   */
  public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");

  /**
   * Holds the value of {@link #RMAX_ID}.
   */
  private D rmax;

  /**
   * Holds the value of {@link #NMIN_ID}.
   */
  private int nmin;

  /**
   * Holds the value of {@link #ALPHA_ID}.
   */
  private double alpha;

  /**
   * Constructor.
   *
   * @param distanceFunction Distance function
   * @param rmax Maximum radius
   * @param nmin Minimum neighborhood size
   * @param alpha Alpha value
   */
  public LOCI(DistanceFunction<? super O, D> distanceFunction, D rmax, int nmin, double alpha) {
    super(distanceFunction);
    this.rmax = rmax;
    this.nmin = nmin;
    this.alpha = alpha;
  }

  /**
   * Runs the algorithm in the timed evaluation part.
   */
  @Override
  public OutlierResult run(Database database) throws IllegalStateException {
    Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
    DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);

    FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
    // LOCI preprocessing step
    WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
    for(DBID id : relation.iterDBIDs()) {
      List<DistanceResultPair<D>> neighbors = rangeQuery.getRangeForDBID(id, rmax);
      // build list of critical distances
      ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
      {
        for(int i = 0; i < neighbors.size(); i++) {
          DistanceResultPair<D> r = neighbors.get(i);
          if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
            continue;
          }
          cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i));
          final double ri = r.getDistance().doubleValue() / alpha;
          if(ri <= rmax.doubleValue()) {
            cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE));
          }
        }
      }
      Collections.sort(cdist);
      // fill the gaps to have fast lookups of number of neighbors at a given
      // distance.
      int lastk = 0;
      for(DoubleIntPair c : cdist) {
        if(c.second == Integer.MIN_VALUE) {
          c.second = lastk;
        }
        else {
          lastk = c.second;
        }
      }

      interestingDistances.put(id, cdist);
      if(progressPreproc != null) {
        progressPreproc.incrementProcessed(logger);
      }
    }
    if(progressPreproc != null) {
      progressPreproc.ensureCompleted(logger);
    }
    // LOCI main step
    FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
    WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class, Double.class);
    WritableDataStore<Double> mdef_norm = store.getStorage(0, Double.class);
    WritableDataStore<Double> mdef_radius = store.getStorage(1, Double.class);
    for(DBID id : relation.iterDBIDs()) {
      double maxmdefnorm = 0.0;
      double maxnormr = 0;
      List<DoubleIntPair> cdist = interestingDistances.get(id);
      double maxdist = cdist.get(cdist.size() - 1).first;
      int maxneig = cdist.get(cdist.size() - 1).second;
      if(maxneig >= nmin) {
        D range = distFunc.getDistanceFactory().fromDouble(maxdist);
        // Compute the largest neighborhood we will need.
        List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(id, range);
        for(DoubleIntPair c : cdist) {
          double alpha_r = alpha * c.first;
          // compute n(p_i, \alpha * r) from list
          int n_alphar = 0;
          for(DoubleIntPair c2 : cdist) {
            if(c2.first <= alpha_r) {
              n_alphar = c2.second;
            }
            else {
              break;
            }
          }
          // compute \hat{n}(p_i, r, \alpha)
          double nhat_r_alpha = 0.0;
          double sigma_nhat_r_alpha = 0.0;
          // Build the sublist from maxneighbors to match the radius c.first
          List<DistanceResultPair<D>> rneighbors = null;
          for(int i = nmin; i < maxneighbors.size(); i++) {
            DistanceResultPair<D> ne = maxneighbors.get(i);
            if(ne.getDistance().doubleValue() > c.first) {
              rneighbors = maxneighbors.subList(1, i);
              break;
            }
          }
          if(rneighbors == null) {
            continue;
          }
          for(DistanceResultPair<D> rn : rneighbors) {
            List<DoubleIntPair> rncdist = interestingDistances.get(rn.getDBID());
            int rn_alphar = 0;
            for(DoubleIntPair c2 : rncdist) {
              if(c2.first <= alpha_r) {
                rn_alphar = c2.second;
              }
              else {
                break;
              }
            }
            nhat_r_alpha = nhat_r_alpha + rn_alphar;
            sigma_nhat_r_alpha = sigma_nhat_r_alpha + (rn_alphar * rn_alphar);
          }
          // finalize average and deviation
          nhat_r_alpha = nhat_r_alpha / rneighbors.size();
          sigma_nhat_r_alpha = Math.sqrt(sigma_nhat_r_alpha / rneighbors.size() - nhat_r_alpha * nhat_r_alpha);
          double mdef = 1.0 - (n_alphar / nhat_r_alpha);
          double sigmamdef = sigma_nhat_r_alpha / nhat_r_alpha;
          double mdefnorm = mdef / sigmamdef;

          if(mdefnorm > maxmdefnorm) {
            maxmdefnorm = mdefnorm;
            maxnormr = c.first;
          }
        }
      }
      else {
        // FIXME: when nmin was never fulfilled - what is the proper value then?
        maxmdefnorm = 1.0;
        maxnormr = maxdist;
      }
      mdef_norm.put(id, maxmdefnorm);
      mdef_radius.put(id, maxnormr);
      if(progressLOCI != null) {
        progressLOCI.incrementProcessed(logger);
      }
    }
    if(progressLOCI != null) {
      progressLOCI.ensureCompleted(logger);
    }
    Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
    // TODO: actually provide min and max?
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(Double.NaN, Double.NaN, 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
    return result;
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
    protected D rmax = null;

    protected int nmin = 0;

    protected double alpha = 0.5;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
      final DistanceParameter<D> rmaxP = new DistanceParameter<D>(RMAX_ID, distanceFactory);
      if(config.grab(rmaxP)) {
        rmax = rmaxP.getValue();
      }

      final IntParameter nminP = new IntParameter(NMIN_ID, 20);
      if(config.grab(nminP)) {
        nmin = nminP.getValue();
      }

      final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5);
      if(config.grab(alphaP)) {
        alpha = alphaP.getValue();
      }
    }

    @Override
    protected LOCI<O, D> makeInstance() {
      return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.