Package de.jungblut.clustering

Source Code of de.jungblut.clustering.CanopyClustering

package de.jungblut.clustering;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.google.common.base.Preconditions;

import de.jungblut.distance.DistanceMeasurer;
import de.jungblut.math.DoubleVector;

/**
* Sequential canopy clusterer.
*
* @author thomas.jungblut
*/
public final class CanopyClustering {

  private static final Log LOG = LogFactory.getLog(CanopyClustering.class);

  private CanopyClustering() {
    throw new IllegalAccessError();
  }

  /**
   * Creates a list of canopies. Make sure that t1 > t2!
   *
   * @param points the points to cluster.
   * @param measure the distance measurer to use.
   * @param t1 the outer cluster distance (fuzzy).
   * @param t2 the inner cluster distance (exclusive).
   * @param verbose if true, output about timinings and number of clusters.
   * @return a list of canopy centers.
   */
  public static List<DoubleVector> createCanopies(List<DoubleVector> pPoints,
      DistanceMeasurer measure, double t1, double t2, boolean verbose) {
    Preconditions.checkArgument(t1 > t2, "t1 must be > t2!");

    // use a linked structure, so we can remove the head fast
    LinkedList<DoubleVector> points = new LinkedList<>(pPoints);

    // do the clustering
    List<DoubleVector> canopyList = new ArrayList<>();
    long start = System.currentTimeMillis();
    while (!points.isEmpty()) {
      DoubleVector p1 = points.get(0);
      points.remove(0);
      DoubleVector canopy = p1.deepCopy();
      int assigned = 1;
      // one can speed this up by an inverted index or a kd-tree
      Iterator<DoubleVector> iterator = points.iterator();
      while (iterator.hasNext()) {
        DoubleVector p2 = iterator.next();
        double dist = measure.measureDistance(p1, p2);
        // Put all points that are within distance threshold T1 into the
        // canopy
        if (dist < t1) {
          assigned++;
          canopy.add(p2);
        }
        // Remove from the list all points that are within distance
        // threshold T2 (strongly bound)
        if (dist < t2) {
          iterator.remove();
        }
      }
      // average it
      if (assigned > 1) {
        canopy = canopy.divide(assigned);
      }
      canopyList.add(canopy);

      if (verbose) {
        LOG.info(points.size()
            + " vectors remaining to cluster | Found canopies: "
            + canopyList.size() + " | Took "
            + (System.currentTimeMillis() - start) + "ms!");
      }
      start = System.currentTimeMillis();
    }
    return canopyList;
  }

}
TOP

Related Classes of de.jungblut.clustering.CanopyClustering

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.