Package de.lmu.ifi.dbs.elki.algorithm.outlier

Source Code of de.lmu.ifi.dbs.elki.algorithm.outlier.SOD$Parameterizer

package de.lmu.ifi.dbs.elki.algorithm.outlier;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.BitSet;
import java.util.Iterator;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.iterator.IterableIterator;
import de.lmu.ifi.dbs.elki.utilities.iterator.IterableUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;

/**
* @author Arthur Zimek
*
* @apiviz.has SODModel oneway - - computes
* @apiviz.has SharedNearestNeighborSimilarityFunction
*
* @param <V> the type of NumberVector handled by this Algorithm
*/
// todo arthur comment
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
public class SOD<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
  /**
   * The logger for this class.
   */
  private static final Logging logger = Logging.getLogger(SOD.class);

  /**
   * Parameter to specify the number of shared nearest neighbors to be
   * considered for learning the subspace properties., must be an integer
   * greater than 0.
   */
  public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of shared nearest neighbors to be considered for learning the subspace properties.");

  /**
   * Parameter to indicate the multiplier for the discriminance value for
   * discerning small from large variances.
   */
  public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");

  /**
   * Holds the value of {@link #KNN_ID}.
   */
  private int knn;

  /**
   * Holds the value of {@link #ALPHA_ID}.
   */
  private double alpha;

  /**
   * The similarity function.
   */
  private SharedNearestNeighborSimilarityFunction<V> similarityFunction;

  /**
   * Constructor with parameters.
   *
   * @param knn knn value
   * @param alpha Alpha parameter
   * @param similarityFunction Shared nearest neighbor similarity function
   */
  public SOD(int knn, double alpha, SharedNearestNeighborSimilarityFunction<V> similarityFunction) {
    super();
    this.knn = knn;
    this.alpha = alpha;
    this.similarityFunction = similarityFunction;
  }

  /**
   * Performs the SOD algorithm on the given database.
   *
   * @param relation Data relation to process
   */
  public OutlierResult run(Relation<V> relation) throws IllegalStateException {
    SimilarityQuery<V, IntegerDistance> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null;
    WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
    DoubleMinMax minmax = new DoubleMinMax();
    for(Iterator<DBID> iter = relation.iterDBIDs(); iter.hasNext();) {
      DBID queryObject = iter.next();
      if(progress != null) {
        progress.incrementProcessed(logger);
      }
      DBIDs knnList = getKNN(relation, snnInstance, queryObject).asDBIDs();
      SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(queryObject));
      sod_models.put(queryObject, model);
      minmax.put(model.getSod());
    }
    if(progress != null) {
      progress.ensureCompleted(logger);
    }
    // combine results.
    Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    OutlierResult sodResult = new OutlierResult(meta, new SODProxyScoreResult(models, relation.getDBIDs()));
    // also add the models.
    sodResult.addChildResult(models);
    return sodResult;
  }

  /**
   * Provides the k nearest neighbors in terms of the shared nearest neighbor
   * distance.
   * <p/>
   * The query object is excluded from the knn list.
   *
   * @param database the database holding the objects
   * @param snnInstance similarity function
   * @param queryObject the query object for which the kNNs should be determined
   * @return the k nearest neighbors in terms of the shared nearest neighbor
   *         distance without the query object
   */
  private KNNList<DoubleDistance> getKNN(Relation<V> database, SimilarityQuery<V, IntegerDistance> snnInstance, DBID queryObject) {
    // similarityFunction.getPreprocessor().getParameters();
    KNNHeap<DoubleDistance> kNearestNeighbors = new KNNHeap<DoubleDistance>(knn, new DoubleDistance(Double.POSITIVE_INFINITY));
    for(Iterator<DBID> iter = database.iterDBIDs(); iter.hasNext();) {
      DBID id = iter.next();
      if(!id.equals(queryObject)) {
        double distance = 1.0 / snnInstance.similarity(queryObject, id).doubleValue();
        kNearestNeighbors.add(new DoubleDistanceResultPair(distance, id));
      }
    }
    return kNearestNeighbors.toKNNList();
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  /**
   *
   *
   * @author Arthur Zimek
   * @param <O> the type of DatabaseObjects handled by this Result
   */
  // TODO: arthur comment
  public static class SODModel<O extends NumberVector<O, ?>> implements TextWriteable, Comparable<SODModel<?>> {
    private double[] centerValues;

    private O center;

    private double[] variances;

    private double expectationOfVariance;

    private BitSet weightVector;

    private double sod;

    /**
     * Initialize SOD Model
     *
     * @param database Database
     * @param neighborhood Neighborhood
     * @param alpha Alpha value
     * @param queryObject Query object
     */
    public SODModel(Relation<O> database, DBIDs neighborhood, double alpha, O queryObject) {
      // TODO: store database link?
      centerValues = new double[DatabaseUtil.dimensionality(database)];
      variances = new double[centerValues.length];
      for(DBID id : neighborhood) {
        O databaseObject = database.get(id);
        for(int d = 0; d < centerValues.length; d++) {
          centerValues[d] += databaseObject.doubleValue(d + 1);
        }
      }
      for(int d = 0; d < centerValues.length; d++) {
        centerValues[d] /= neighborhood.size();
      }
      for(DBID id : neighborhood) {
        O databaseObject = database.get(id);
        for(int d = 0; d < centerValues.length; d++) {
          // distance
          double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
          // variance
          variances[d] += distance * distance;
        }
      }
      expectationOfVariance = 0;
      for(int d = 0; d < variances.length; d++) {
        variances[d] /= neighborhood.size();
        expectationOfVariance += variances[d];
      }
      expectationOfVariance /= variances.length;
      weightVector = new BitSet(variances.length);
      for(int d = 0; d < variances.length; d++) {
        if(variances[d] < alpha * expectationOfVariance) {
          weightVector.set(d, true);
        }
      }
      center = DatabaseUtil.assumeVectorField(database).getFactory().newInstance(centerValues);
      sod = subspaceOutlierDegree(queryObject, center, weightVector);
    }

    /**
     *
     *
     * @param queryObject
     * @param center
     * @param weightVector
     * @return sod value
     */
    private double subspaceOutlierDegree(O queryObject, O center, BitSet weightVector) {
      final DimensionsSelectingEuclideanDistanceFunction df = new DimensionsSelectingEuclideanDistanceFunction(weightVector);
      double distance = df.distance(queryObject, center).doubleValue();
      distance /= weightVector.cardinality();
      return distance;
    }

    /**
     * Return the SOD of the point.
     *
     * @return sod value
     */
    public double getSod() {
      return this.sod;
    }

    @Override
    public void writeToText(TextWriterStream out, String label) {
      out.inlinePrint(label + "=" + this.sod);
      out.commentPrintLn(this.getClass().getSimpleName() + ":");
      out.commentPrintLn("relevant attributes (counting starts with 0): " + this.weightVector.toString());
      out.commentPrintLn("center of neighborhood: " + out.normalizationRestore(center).toString());
      out.commentPrintLn("subspace outlier degree: " + this.sod);
      out.commentPrintSeparator();
    }

    @Override
    public int compareTo(SODModel<?> o) {
      return Double.compare(this.getSod(), o.getSod());
    }

  }

  /**
   * Proxy class that converts a model result to an actual SOD score result.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  protected static class SODProxyScoreResult implements Relation<Double> {
    /**
     * Model result this is a proxy for.
     */
    Relation<SODModel<?>> models;
   
    /**
     * The IDs we are defined for
     */
    DBIDs dbids;

    /**
     * Constructor.
     *
     * @param models Models result
     * @param dbids IDs we are defined for
     */
    public SODProxyScoreResult(Relation<SODModel<?>> models, DBIDs dbids) {
      super();
      this.models = models;
      this.dbids = dbids;
    }

    @Override
    public Double get(DBID objID) {
      return models.get(objID).getSod();
    }

    @Override
    public String getLongName() {
      return "Subspace Outlier Degree";
    }

    @Override
    public String getShortName() {
      return "sod-outlier";
    }

    @Override
    public DBIDs getDBIDs() {
      return dbids;
    }
   
    @Override
    public IterableIterator<DBID> iterDBIDs() {
      return IterableUtil.fromIterator(dbids.iterator());
    }

    @Override
    public Database getDatabase() {
      return null; // FIXME
    }

    @Override
    public void set(DBID id, Double val) {
      throw new UnsupportedOperationException();
    }

    @Override
    public void delete(DBID id) {
      throw new UnsupportedOperationException();
    }

    @Override
    public SimpleTypeInformation<Double> getDataTypeInformation() {
      return TypeUtil.DOUBLE;
    }

    @Override
    public int size() {
      return dbids.size();
    }

    @Override
    public ResultHierarchy getHierarchy() {
      return models.getHierarchy();
    }

    @Override
    public void setHierarchy(ResultHierarchy hierarchy) {
      models.setHierarchy(hierarchy);
    }
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
    /**
     * Holds the value of {@link #KNN_ID}.
     */
    private int knn = 1;

    /**
     * Holds the value of {@link #ALPHA_ID}.
     */
    private double alpha = 1.1;

    /**
     * The similarity function.
     */
    private SharedNearestNeighborSimilarityFunction<V> similarityFunction;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0), 1);
      if(config.grab(knnP)) {
        knn = knnP.getValue();
      }

      final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 1.1);
      if(config.grab(alphaP)) {
        alpha = alphaP.getValue();
      }

      Class<SharedNearestNeighborSimilarityFunction<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(SharedNearestNeighborSimilarityFunction.class);
      similarityFunction = config.tryInstantiate(cls);
    }

    @Override
    protected SOD<V> makeInstance() {
      return new SOD<V>(knn, alpha, similarityFunction);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.outlier.SOD$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.