Package stallone.cluster

Source Code of stallone.cluster.KCenterClustering

package stallone.cluster;

import java.util.Iterator;
import stallone.api.cluster.IClustering;
import stallone.api.datasequence.DataSequence;
import stallone.api.datasequence.IDataInput;
import stallone.api.datasequence.IDataList;
import stallone.api.datasequence.IDataSequence;
import stallone.api.discretization.IDiscretization;
import stallone.api.doubles.IDoubleArray;
import stallone.api.doubles.IMetric;
import stallone.api.ints.IIntArray;
import stallone.api.ints.Ints;
import stallone.doubles.fastutils.DoubleArrayList;
import stallone.doubles.fastutils.IntArrayList;

/**
* Implementation based on the work of "A Fast Geometric Clustering Method on
* Conformatino Space of Biomolecules" J. Sun and Y. Yao and X. Huang and V.
* Pande and G. Carlsson and L. J. Guibas To appear Correspondence to
* "guibas@cs.stanford.edu"
*
* @author Martin Senne
*/
public class KCenterClustering implements IClustering, IDiscretization
{

    private IDataList clusters = DataSequence.create.list();
    /**
     * data points which are going to be clustered.
     */
    private Iterable<IDoubleArray> data; // P
    private int size = 0;
    //int dimension;

    /*
     * Metric which is used as distance measure @see
     * biocomp.phaseprofiler.clustering.interfaces.IMetric
     */
    private IMetric<IDoubleArray> distanceMetric;
    /**
     * total number of clusters.
     */
    private int numberOfClusters;
    /**
     * stores the assignment of point to cluster numbers e.g.
     * assignedClusters[2] = 4 says, the third datapoint belongs to cluster 5
     */
    private IntArrayList assignedClusters;
    /**
     * seed of the random generator.
     */
    private long randomSeed;

    /**
     * @see biocomp.phaseprofiler.clustering.interfaces.IClustering
     */
    @Override
    public void perform()
    {
        // these are the minimal distances of all data points to any cluster center
        DoubleArrayList minDistances = new DoubleArrayList();


        // first cluster center is randomly selected
        // Random rnd = new Random(randomSeed);
        // clusterCenterIndexes[0] = rnd.nextInt(dataPoints.size());

        // first cluster center is equal to first frame
        Iterator<IDoubleArray> it = data.iterator();
        if (!it.hasNext())
        {
            throw (new RuntimeException("Trying to cluster an empty data set"));
        }
        IDoubleArray v_0 = it.next().copy();
        clusters.add(v_0);
        assignedClusters.add(0);
        minDistances.add(0);
        //clusterCenterIndexes[0]=0;

        // consider second data point
        /*
         * if (!it.hasNext()) throw(new RuntimeException("Trying to cluster a
         * data set with one data point")); IVector v_i = it.next();
         * assignedClusters.add(0); minDistances.add(distanceMetric.measure(v_i,
         * v_0));
         */

        // first iteration: go over data, calculate distance to first center and assign first center
        System.out.println("KCenter: iteration 1/" + numberOfClusters + ".");
        IDoubleArray v_i = null;
        double maxMinDistance = 0;
        int count = 0;
        while (it.hasNext())
        {
            IDoubleArray p_j = it.next().copy();

            // calculate distance to cluster 0
            double d = distanceMetric.distance(p_j, v_0);
            minDistances.add(d);

            // is this the largest distance so far? Then memorize as next center candidate
            if (d >= maxMinDistance)
            {
                count++;
                maxMinDistance = d;
                v_i = p_j;
            }

            // assign every data point p_j to cluster 0
            assignedClusters.add(0);
        }


        // for all other clusters, add the most distance point as center and the reassign
        // for k clusters do k-2 further passes to the data
        for (int i = 1; i < numberOfClusters; i++)
        {
            System.out.println("KCenter: iteration " + (i + 1) + "/" + numberOfClusters + ".");

            // add most distance point as a new center
            clusters.add(v_i);

            int j = 0;
            maxMinDistance = 0;
            IDoubleArray v_next = null;
            for (it = data.iterator(); it.hasNext(); j++) //for (int j = 0; j < dataPoints.size(); j++)
            {
                IDoubleArray p_j = it.next().copy();

                // get minimal distance of p_j to cluster centers
                double currentDistance = minDistances.get(j);

                // calculate new distance of p_j to the possible new cluster center
                double newDistance = distanceMetric.distance(p_j, v_i);

                // if new cluster center closer, then reassign
                if (newDistance < currentDistance)
                {
                    assignedClusters.set(j, i);
                    minDistances.set(j, newDistance);

                }

                if (minDistances.get(j) >= maxMinDistance)
                {
                    maxMinDistance = minDistances.get(j);
                    v_next = p_j;
                }
            }

            v_i = v_next;
        }
    }

    @Override
    public void setInput(IDataInput _data)
    {
        this.data = _data.singles();
        this.size = _data.size();
        assignedClusters = new IntArrayList();
    }

    @Override
    public void setInput(IDataSequence datapoints)
    {
        this.data = datapoints;
        this.size = datapoints.size();
        //this.dimension = datapoints.dimension();
        assignedClusters = new IntArrayList();
    }

    @Override
    public void setMetric(IMetric<IDoubleArray> metric)
    {
        this.distanceMetric = metric;
    }

    public void setNumberOfClusters(int numberOfClusters)
    {

        // initialize variables
        this.numberOfClusters = numberOfClusters;
        //this.clusterCenterIndexes = new int[numberOfClusters];
    }

    @Override
    public int getNumberOfClusters()
    {
        return numberOfClusters;
    }

    //@Override
    /*
     * public int getClusterIndex(int i) { return pointPBelongsToCluster[i];
    }
     */
    //@Override
    /*
     * public IDoubleArray getMembership(int i) {
     *
     * // here we need a factory for vector creation IDoubleArray membership =
     * Doubles.create.array(numberOfClusters);
     *
     * for (int j = 0; j < membership.size(); j++) { membership.set(j, 0.0d); }
     *
     * membership.set(pointPBelongsToCluster[i], 1.0d);
     *
     * return membership;
    }
     */
    /**
     * Sets the random seed for the pseudo random number generator in the
     * initialization. Need for reproducibility of the results.
     *
     * @param randomSeed The random seed for the number generator
     */
    public void setRandomSeed(long randomSeed)
    {
        this.randomSeed = randomSeed;
    }

    @Override
    public IDiscretization getClusterAssignment()
    {
        return this;
    }

    /**
     * Returns index of that cluster which is closest to the given point p.
     *
     * @param p poiint which is assigned to one of the clusters of
     *
     * @return index of cluster
     */
    @Override
    public int assign(IDoubleArray p)
    {
        double minimalDistance = Double.MAX_VALUE;
        int indexOfNearestCluster = -1;

        for (int i = 0; i < numberOfClusters; i++)
        {
            IDoubleArray v_i = clusters.get(i);

            // calculate distance
            double currentDistance = distanceMetric.distance(p, v_i);

            if (currentDistance < minimalDistance)
            {
                minimalDistance = currentDistance;
                indexOfNearestCluster = i;
            }
        }

        if (indexOfNearestCluster == -1)
        {
            throw new RuntimeException("Can not assign cluster.");
        }
        else
        {
            return indexOfNearestCluster;
        }
    }

    @Override
    public IDoubleArray assignFuzzy(IDoubleArray p)
    {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public IDoubleArray getRepresentative(IDoubleArray p)
    {
        return clusters.get(assign(p));
    }

    //@Override
    public String getDescriptiveName()
    {
        return "kcenter";
    }

    //@Override
    /*
     * public IDoubleArray getClusterCenter(int i) { return
     * dataPoints.get(clusterCenterIndexes[i]);
    }
     */
    @Override
    public Iterator<IDoubleArray> clusterCenterIterator()
    {
        return (clusters.iterator());
    }

    @Override
    public IIntArray getClusterIndexes()
    {
        assignedClusters.trim();
        return (Ints.create.arrayFrom(assignedClusters.elements()));
    }

    @Override
    public IDataSequence getClusterCenters()
    {
        return clusters;
    }
}
TOP

Related Classes of stallone.cluster.KCenterClustering

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.