Package de.lmu.ifi.dbs.elki.algorithm.clustering

Source Code of de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi$SteepScanPosition

package de.lmu.ifi.dbs.elki.algorithm.clustering;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.OPTICSModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.IterableResult;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ClassParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;

/**
* Class to handle OPTICS Xi extraction.
*
* @author Erich Schubert
*
* @apiviz.uses OPTICSTypeAlgorithm oneway
* @apiviz.uses ClusterOrderResult oneway
* @apiviz.has SteepAreaResult
*
* @param <N> Number distance used by OPTICS
*/
public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<Clustering<OPTICSModel>> implements ClusteringAlgorithm<Clustering<OPTICSModel>> {
  /**
   * The logger for this class.
   */
  private static final Logging logger = Logging.getLogger(OPTICSXi.class);

  /**
   * Parameter to specify the actual OPTICS algorithm to use.
   */
  public static final OptionID XIALG_ID = OptionID.getOrCreateOptionID("opticsxi.algorithm", "The actual OPTICS-type algorithm to use.");

  /**
   * Parameter to specify the steepness threshold.
   */
  public static final OptionID XI_ID = OptionID.getOrCreateOptionID("opticsxi.xi", "Threshold for the steepness requirement.");

  /**
   * The actual algorithm we use.
   */
  OPTICSTypeAlgorithm<N> optics;

  /**
   * Xi parameter
   */
  double xi;

  /**
   * Constructor.
   *
   * @param optics OPTICS algorithm to use
   * @param xi Xi value
   */
  public OPTICSXi(OPTICSTypeAlgorithm<N> optics, double xi) {
    super();
    this.optics = optics;
    this.xi = xi;
  }

  public Clustering<OPTICSModel> run(Database database, Relation<?> relation) {
    // TODO: ensure we are using the same relation?
    ClusterOrderResult<N> opticsresult = optics.run(database);

    if(!NumberDistance.class.isInstance(optics.getDistanceFactory())) {
      logger.verbose("Xi cluster extraction only supported for number distances!");
      return null;
    }

    if(logger.isVerbose()) {
      logger.verbose("Extracting clusters with Xi: " + xi);
    }
    return extractClusters(opticsresult, relation, 1.0 - xi, optics.getMinPts());
  }

  /**
   * Extract clusters from a cluster order result.
   *
   * @param clusterOrderResult cluster order result
   * @param relation Relation
   * @param ixi Parameter 1 - Xi
   * @param minpts Parameter minPts
   */
  // TODO: resolve handling of the last point in the cluster order
  private Clustering<OPTICSModel> extractClusters(ClusterOrderResult<N> clusterOrderResult, Relation<?> relation, double ixi, int minpts) {
    // TODO: add progress?
    List<ClusterOrderEntry<N>> clusterOrder = clusterOrderResult.getClusterOrder();
    double mib = 0.0;
    // TODO: make it configurable to keep this list; this is mostly useful for
    // visualization
    List<SteepArea> salist = new ArrayList<SteepArea>();
    List<SteepDownArea> sdaset = new java.util.Vector<SteepDownArea>();
    ModifiableHierarchy<Cluster<OPTICSModel>> hier = new HierarchyHashmapList<Cluster<OPTICSModel>>();
    HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<Cluster<OPTICSModel>>();
    HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());

    SteepScanPosition<N> scan = new SteepScanPosition<N>(clusterOrder);
    while(scan.hasNext()) {
      final int curpos = scan.index;
      // Update maximum-inbetween
      mib = Math.max(mib, scan.ecurr.getReachability().doubleValue());
      // The last point cannot be the start of a steep area.
      if(scan.esucc != null) {
        // Xi-steep down area
        if(scan.steepDown(ixi)) {
          // Update mib values with current mib and filter
          updateFilterSDASet(mib, sdaset, ixi);
          final double startval = scan.ecurr.getReachability().doubleValue();
          int startsteep = scan.index;
          int endsteep = Math.min(scan.index + 1, clusterOrder.size());
          {
            while(scan.hasNext()) {
              scan.next();
              // not going downward at all - stop here.
              if(!scan.steepDown(1.0)) {
                break;
              }
              // still steep - continue.
              if(scan.steepDown(ixi)) {
                endsteep = Math.min(scan.index + 1, clusterOrder.size());
              }
              else {
                // Stop looking after minpts "flat" steps.
                if(scan.index - endsteep > minpts) {
                  break;
                }
              }
            }
          }
          mib = clusterOrder.get(endsteep).getReachability().doubleValue();
          final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
          if(logger.isDebuggingFinest()) {
            logger.debugFinest("Xi " + sda.toString());
          }
          sdaset.add(sda);
          if(salist != null) {
            salist.add(sda);
          }
          continue;
        }
        else
        // Xi-steep up area
        if(scan.steepUp(ixi)) {
          // Update mib values with current mib and filter
          updateFilterSDASet(mib, sdaset, ixi);
          final SteepUpArea sua;
          // Compute steep-up area
          {
            int startsteep = scan.index;
            int endsteep = scan.index + 1;
            mib = scan.ecurr.getReachability().doubleValue();
            double esuccr = scan.esucc.getReachability().doubleValue();
            // There is nothing higher than infinity
            if(!Double.isInfinite(esuccr)) {
              // find end of steep-up-area, eventually updating mib again
              while(scan.hasNext()) {
                scan.next();
                // not going upward - stop here.
                if(!scan.steepUp(1.0)) {
                  break;
                }
                // still steep - continue.
                if(scan.steepUp(ixi)) {
                  endsteep = Math.min(scan.index + 1, clusterOrder.size() - 1);
                  mib = scan.ecurr.getReachability().doubleValue();
                  esuccr = scan.esucc.getReachability().doubleValue();
                }
                else {
                  // Stop looking after minpts non-up steps.
                  if(scan.index - endsteep > minpts) {
                    break;
                  }
                }
              }
            }
            sua = new SteepUpArea(startsteep, endsteep, esuccr);
            if(logger.isDebuggingFinest()) {
              logger.debugFinest("Xi " + sua.toString());
            }
            if(salist != null) {
              salist.add(sua);
            }
          }
          // Validate and computer clusters
          // logger.debug("SDA size:"+sdaset.size()+" "+sdaset);
          ListIterator<SteepDownArea> sdaiter = sdaset.listIterator(sdaset.size());
          // Iterate backwards for correct hierarchy generation.
          while(sdaiter.hasPrevious()) {
            SteepDownArea sda = sdaiter.previous();
            // logger.debug("Comparing: eU="+mib.doubleValue()+" SDA: "+sda.toString());
            // Condition 3b: end-of-steep-up > maximum-in-between lower
            if(mib * ixi < sda.getMib()) {
              continue;
            }
            // By default, clusters cover both the steep up and steep down area
            int cstart = sda.getStartIndex();
            int cend = sua.getEndIndex();
            // However, we sometimes have to adjust this (Condition 4):
            {
              // Case b)
              if(sda.getMaximum() * ixi >= sua.getMaximum()) {
                while(cstart < sda.getEndIndex()) {
                  if(clusterOrder.get(cstart + 1).getReachability().doubleValue() > sua.getMaximum()) {
                    cstart++;
                  }
                  else {
                    break;
                  }
                }
              }
              // Case c)
              else if(sua.getMaximum() * ixi >= sda.getMaximum()) {
                while(cend > sua.getStartIndex()) {
                  if(clusterOrder.get(cend - 1).getReachability().doubleValue() > sda.getMaximum()) {
                    cend--;
                  }
                  else {
                    break;
                  }
                }
              }
              // Case a) is the default
            }
            // Condition 3a: obey minpts
            if(cend - cstart + 1 < minpts) {
              continue;
            }
            // Build the cluster
            ModifiableDBIDs dbids = DBIDUtil.newArray();
            for(int idx = cstart; idx <= cend; idx++) {
              final DBID dbid = clusterOrder.get(idx).getID();
              // Collect only unclaimed IDs.
              if(unclaimedids.remove(dbid)) {
                dbids.add(dbid);
              }
            }
            if(logger.isDebuggingFine()) {
              logger.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cstart - cend + 1));
            }
            OPTICSModel model = new OPTICSModel(cstart, cend);
            Cluster<OPTICSModel> cluster = new Cluster<OPTICSModel>("Cluster_" + cstart + "_" + cend, dbids, model, hier);
            // Build the hierarchy
            {
              Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
              while(iter.hasNext()) {
                Cluster<OPTICSModel> clus = iter.next();
                OPTICSModel omodel = clus.getModel();
                if(model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
                  hier.add(cluster, clus);
                  iter.remove();
                }
              }
            }
            curclusters.add(cluster);
          }
        }
      }
      // Make sure to advance at least one step
      if(curpos == scan.index) {
        scan.next();
      }
    }
    if(curclusters.size() > 0 || unclaimedids.size() > 0) {
      final Clustering<OPTICSModel> clustering = new Clustering<OPTICSModel>("OPTICS Xi-Clusters", "optics");
      if(unclaimedids.size() > 0) {
        final Cluster<OPTICSModel> allcluster;
        if(clusterOrder.get(clusterOrder.size() - 1).getReachability().isInfiniteDistance()) {
          allcluster = new Cluster<OPTICSModel>("Noise", unclaimedids, true, new OPTICSModel(0, clusterOrder.size() - 1), hier);
        }
        else {
          allcluster = new Cluster<OPTICSModel>("Cluster", unclaimedids, new OPTICSModel(0, clusterOrder.size() - 1), hier);
        }
        for(Cluster<OPTICSModel> cluster : curclusters) {
          hier.add(allcluster, cluster);
        }
        clustering.addCluster(allcluster);
      }
      else {
        for(Cluster<OPTICSModel> cluster : curclusters) {
          clustering.addCluster(cluster);
        }
      }
      clustering.addChildResult(clusterOrderResult);
      if(salist != null) {
        clusterOrderResult.addChildResult(new SteepAreaResult(salist));
      }
      return clustering;
    }
    return null;
  }

  /**
   * Update the mib values of SteepDownAreas, and remove obsolete areas.
   *
   * @param mib Maximum in-between value
   * @param sdaset Set of steep down areas.
   */
  private static void updateFilterSDASet(double mib, List<SteepDownArea> sdaset, double ixi) {
    Iterator<SteepDownArea> iter = sdaset.iterator();
    while(iter.hasNext()) {
      SteepDownArea sda = iter.next();
      if(sda.getMaximum() * ixi <= mib) {
        iter.remove();
      }
      else {
        // Update
        sda.setMib(Math.max(sda.getMib(), mib));
      }
    }
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return optics.getInputTypeRestriction();
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  /**
   * Position when scanning for steep areas
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   *
   * @param <N> Distance type
   */
  private static class SteepScanPosition<N extends NumberDistance<N, ?>> {
    /**
     * Cluster order
     */
    List<ClusterOrderEntry<N>> co;

    /**
     * Current position
     */
    int index = 0;

    /**
     * Current entry
     */
    ClusterOrderEntry<N> ecurr = null;

    /**
     * Next entry
     */
    ClusterOrderEntry<N> esucc = null;

    /**
     * Constructor.
     *
     * @param co Cluster order
     */
    public SteepScanPosition(List<ClusterOrderEntry<N>> co) {
      super();
      this.co = co;
      this.ecurr = (co.size() >= 1) ? co.get(0) : null;
      this.esucc = (co.size() >= 2) ? co.get(1) : null;
    }

    /**
     * Advance to the next entry
     */
    public void next() {
      index++;
      ecurr = esucc;
      if(index + 1 < co.size()) {
        esucc = co.get(index + 1);
      }
    }

    /**
     * Test whether there is a next value.
     *
     * @return end of cluster order
     */
    public boolean hasNext() {
      return index < co.size();
    }

    /**
     * Test for a steep up point.
     *
     * @param ixi steepness factor (1-xi)
     * @return truth value
     */
    public boolean steepUp(double ixi) {
      if(ecurr.getReachability().isInfiniteDistance()) {
        return false;
      }
      if(esucc == null) {
        return true;
      }
      return ecurr.getReachability().doubleValue() <= esucc.getReachability().doubleValue() * ixi;
    }

    /**
     * Test for a steep down area.
     *
     * @param ixi Steepness factor (1-xi)
     * @return truth value
     */
    public boolean steepDown(double ixi) {
      if(esucc == null) {
        return false;
      }
      if(esucc.getReachability().isInfiniteDistance()) {
        return false;
      }
      return ecurr.getReachability().doubleValue() * ixi >= esucc.getReachability().doubleValue();
    }
  }

  /**
   * Data structure to represent a steep-down-area for the xi method.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public abstract static class SteepArea {
    /**
     * Start index of down-area
     */
    private int startindex;

    /**
     * End index of down-area
     */
    private int endindex;

    /**
     * Maximum value
     */
    private double maximum;

    /**
     * Constructor.
     *
     * @param startindex Start index
     * @param endindex End index
     * @param maximum Maximum value
     */
    public SteepArea(int startindex, int endindex, double maximum) {
      super();
      this.startindex = startindex;
      this.endindex = endindex;
      this.maximum = maximum;
    }

    /**
     * Start index
     *
     * @return the start index
     */
    public int getStartIndex() {
      return startindex;
    }

    /**
     * End index
     *
     * @return the end index
     */
    public int getEndIndex() {
      return endindex;
    }

    /**
     * Get the start value = maximum value
     *
     * @return the starting value
     */
    public double getMaximum() {
      return maximum;
    }

  }

  /**
   * Data structure to represent a steep-down-area for the xi method.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class SteepDownArea extends SteepArea {
    /**
     * Maximum in-between value
     */
    private double mib;

    /**
     * Constructor
     *
     * @param startindex Start index
     * @param endindex End index
     * @param startDouble Start value (= maximum)
     * @param mib Maximum inbetween value (for Xi extraction; modifiable)
     */
    public SteepDownArea(int startindex, int endindex, double startDouble, double mib) {
      super(startindex, endindex, startDouble);
      this.mib = mib;
    }

    /**
     * Get the maximum in-between value
     *
     * @return the mib value
     */
    public double getMib() {
      return mib;
    }

    /**
     * Update the maximum in-between value
     *
     * @param mib the mib to set
     */
    public void setMib(double mib) {
      this.mib = mib;
    }

    @Override
    public String toString() {
      return "SteepDownArea(" + getStartIndex() + " - " + getEndIndex() + ", max=" + getMaximum() + ", mib=" + mib + ")";
    }
  }

  /**
   * Data structure to represent a steep-down-area for the xi method.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class SteepUpArea extends SteepArea {
    /**
     * Constructor
     *
     * @param startindex Starting index
     * @param endindex End index
     * @param endDouble End value (= maximum)
     */
    public SteepUpArea(int startindex, int endindex, double endDouble) {
      super(startindex, endindex, endDouble);
    }

    @Override
    public String toString() {
      return "SteepUpArea(" + getStartIndex() + " - " + getEndIndex() + ", max=" + getMaximum() + ")";
    }
  }

  /**
   * Result containing the chi-steep areas.
   *
   * @author Erich Schubert
   *
   * @apiviz.has SteepArea
   */
  public static class SteepAreaResult implements IterableResult<SteepArea> {
    /**
     * Storage
     */
    Collection<SteepArea> areas;

    /**
     * Constructor.
     *
     * @param areas Areas
     */
    public SteepAreaResult(Collection<SteepArea> areas) {
      super();
      this.areas = areas;
    }

    @Override
    public String getLongName() {
      return "Xi-Steep areas";
    }

    @Override
    public String getShortName() {
      return "xi-steep-areas";
    }

    @Override
    public Iterator<SteepArea> iterator() {
      return areas.iterator();
    }
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer<D extends NumberDistance<D, ?>> extends AbstractParameterizer {
    protected OPTICSTypeAlgorithm<D> optics;

    protected double xi = 0.0;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      DoubleParameter xiP = new DoubleParameter(XI_ID);
      xiP.addConstraint(new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.CLOSE, 1.0, IntervalConstraint.IntervalBoundary.OPEN));
      if(config.grab(xiP)) {
        xi = xiP.getValue();
      }

      ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<OPTICSTypeAlgorithm<D>>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
      if(config.grab(opticsP)) {
        optics = opticsP.instantiateClass(config);
      }
    }

    @Override
    protected OPTICSXi<D> makeInstance() {
      return new OPTICSXi<D>(optics, xi);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi$SteepScanPosition

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
ndling/constraints/GreaterConstraint.html" title="Examples of de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint">de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.