Package de.lmu.ifi.dbs.elki.math.linearalgebra.pca

Source Code of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.ProgressiveEigenPairFilter$Parameterizer

package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.ArrayList;
import java.util.List;

import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;

/**
* The ProgressiveEigenPairFilter sorts the eigenpairs in descending order of
* their eigenvalues and marks the first eigenpairs, whose sum of eigenvalues is
* higher than the given percentage of the sum of all eigenvalues as strong
* eigenpairs. In contrast to the PercentageEigenPairFilter, it will use a
* percentage which changes linearly with the subspace dimensionality. This
* makes the parameter more consistent for different dimensionalities and often
* gives better results when clusters of different dimensionality exist, since
* different percentage alpha levels might be appropriate for different
* dimensionalities.
*
* Example calculations of alpha levels:
*
* In a 3D space, a progressive alpha value of 0.5 equals:
*
* - 1D subspace: 50 % + 1/3 of remainder = 0.667
*
* - 2D subspace: 50 % + 2/3 of remainder = 0.833
*
* In a 4D space, a progressive alpha value of 0.5 equals:
*
* - 1D subspace: 50% + 1/4 of remainder = 0.625
*
* - 2D subspace: 50% + 2/4 of remainder = 0.750
*
* - 3D subspace: 50% + 3/4 of remainder = 0.875
*
* Reasoning why this improves over PercentageEigenPairFilter:
*
* In a 100 dimensional space, a single Eigenvector representing over 85% of the
* total variance is highly significant, whereas the strongest 85 Eigenvectors
* together will by definition always represent at least 85% of the variance.
* PercentageEigenPairFilter can thus not be used with these parameters and
* detect both dimensionalities correctly.
*
* The second parameter introduced here, walpha, serves a different function: It
* prevents the eigenpair filter to use a statistically weak Eigenvalue just to
* reach the intended level, e.g. 84% + 1% >= 85% when 1% is statistically very
* weak.
*
* @author Erich Schubert
*
*/
@Title("Progressive Eigenpair Filter")
@Description("Sorts the eigenpairs in decending order of their eigenvalues and returns the first eigenpairs, whose sum of eigenvalues explains more than the a certain percentage of the unexpected variance, where the percentage increases with subspace dimensionality.")
public class ProgressiveEigenPairFilter implements EigenPairFilter {
  /**
   * Parameter progressive alpha.
   */
  public static final OptionID EIGENPAIR_FILTER_PALPHA = OptionID.getOrCreateOptionID("pca.filter.progressivealpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share.");

  /**
   * The default value for alpha.
   */
  public static final double DEFAULT_PALPHA = 0.5;

  /**
   * The default value for alpha.
   */
  public static final double DEFAULT_WALPHA = 0.95;

  /**
   * The threshold for strong eigenvectors: the strong eigenvectors explain a
   * portion of at least alpha of the total variance.
   */
  private double palpha;

  /**
   * The noise tolerance level for weak eigenvectors
   */
  private double walpha;

  /**
   * Constructor.
   *
   * @param palpha palpha
   * @param walpha walpha
   */
  public ProgressiveEigenPairFilter(double palpha, double walpha) {
    super();
    this.palpha = palpha;
    this.walpha = walpha;
  }

  /**
   * Filter eigenpairs.
   */
  @Override
  public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) {
    // init strong and weak eigenpairs
    List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>();
    List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>();

    // determine sum of eigenvalues
    double totalSum = 0;
    for(int i = 0; i < eigenPairs.size(); i++) {
      EigenPair eigenPair = eigenPairs.getEigenPair(i);
      totalSum += eigenPair.getEigenvalue();
    }
    double expectedVariance = totalSum / eigenPairs.size() * walpha;

    // determine strong and weak eigenpairs
    double currSum = 0;
    boolean found = false;
    int i;
    for(i = 0; i < eigenPairs.size(); i++) {
      EigenPair eigenPair = eigenPairs.getEigenPair(i);
      // weak Eigenvector?
      if(eigenPair.getEigenvalue() < expectedVariance) {
        break;
      }
      currSum += eigenPair.getEigenvalue();
      // calculate progressive alpha level
      double alpha = 1.0 - (1.0 - palpha) * (1.0 - (i + 1) / eigenPairs.size());
      if(currSum / totalSum >= alpha || i == eigenPairs.size() - 1) {
        found = true;
        strongEigenPairs.add(eigenPair);
        break;
      }
    }
    // if we didn't hit our alpha level, we consider all vectors to be weak!
    if(!found) {
      assert (weakEigenPairs.size() == 0);
      weakEigenPairs = strongEigenPairs;
      strongEigenPairs = new ArrayList<EigenPair>();
    }
    for(; i < eigenPairs.size(); i++) {
      EigenPair eigenPair = eigenPairs.getEigenPair(i);
      weakEigenPairs.add(eigenPair);
    }

    // the code using this method doesn't expect an empty strong set,
    // if we didn't find any strong ones, we make all vectors strong
    if(strongEigenPairs.size() == 0) {
      return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs);
    }
    return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs);
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer extends AbstractParameterizer {
    /**
     * The threshold for strong eigenvectors: the strong eigenvectors explain a
     * portion of at least alpha of the total variance.
     */
    private double palpha;

    /**
     * The noise tolerance level for weak eigenvectors
     */
    private double walpha;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      DoubleParameter palphaP = new DoubleParameter(EIGENPAIR_FILTER_PALPHA, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_PALPHA);
      if(config.grab(palphaP)) {
        palpha = palphaP.getValue();
      }

      DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA);
      if(config.grab(walphaP)) {
        walpha = walphaP.getValue();
      }
    }

    @Override
    protected ProgressiveEigenPairFilter makeInstance() {
      return new ProgressiveEigenPairFilter(palpha, walpha);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.math.linearalgebra.pca.ProgressiveEigenPairFilter$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.