Package stallone.mc.pcca

Source Code of stallone.mc.pcca.PCCA

package stallone.mc.pcca;

import static stallone.api.API.*;
import stallone.api.IAlgorithm;
import stallone.api.algebra.Algebra;
import stallone.api.algebra.IEigenvalueDecomposition;
import stallone.api.cluster.IClustering;
import stallone.api.doubles.Doubles;
import stallone.api.doubles.IDoubleArray;
import stallone.api.ints.IIntArray;
import stallone.api.ints.Ints;
import stallone.ints.PrimitiveIntArray;

/**
* A java implementation of the Inner Simplex Algorithm (ISA) from Marcus Weber. For details refer to:<br>
* <i>Marcus Weber: Improved Perron Cluster Analysis, ZIB-Report 03-04.<br>
* Marcus Weber and Tobias Galliat: Charakterization of Transition States in Conformational Dynamics using Fuzzy States,
* ZIB-Report 02-12.</i><br>
* See <tt><A HREF="www.zib.de/bib/pub/index.en.html"><CODE>www.zib.de/bib/pub/index.en.html</CODE></A></tt>.
*
* <p>A typical application is the computation of meta stable sets of a Markov chain represented by a given reversible
* transition matrix. This requires the following steps:<br>
* 1) Compute a reversible transition matrix, say <b>P</b>.<br>
* 2) Determine the number of meta stable sets, for instance via the size of the Perron Cluster, say <i>k</i>.<br>
* 3) Compute the <i>k</i> eigenvectors belonging to the <i>k</i> eigenvalues closest to 1.<br>
* 4) Use the eigenvectors as input data to the cluster algorithm.<br>
* 5) With {@link #getClusters} you get an array containing the allocation of each indices to a cluster, which is a
* number between 0,...,k-1.<br>
* 6) Use {@link #getPermutationArray} to obtain a permutation array. With this array you can permute the state space of
* <b>P</b>, such that states belonging to the same cluster are neighbours.</p>
*
* @author  meerbach@math.fu-berlin.de
*/
public final class PCCA implements IAlgorithm
{

    /**
     * The condition of the transformation matrix. The transformation matrix transforms a given dataset with
     * simplex-like structure to a dataset with standard simplex-like structure. Large condition indicates an ill
     * conditioned cluster problem.
     */
    //public double COND_TRANS;
    /**
     * An indikator for the deviation of the data-points from simplex structure. Is this indikator negative, than there
     * are data points outside the computed simplex, so this indikator should be close to zero.
     */
    public double INDIKATOR;
    private int[] CLUSTER;
    private int[] SORTARRAY;
    private int[] CLUSTER_SIZE;
    private IDoubleArray FUZZY;
    private IDoubleArray eigenvectors;

    /**
     * Creates a new instance of ClusterByIsa and computes a clustering of a data set with the Isa-algorithm. The
     * created instance will be immutable. The results will be:<br>
     * <i>Cluster allocation</i>: An integer array containing the allocation of the given data points to the clusters:
     * <code>CLUSTER[i]</code> is the cluster of data point <i>i</i>.<br>
     * <i>Fuzzy allocations</i>: A (number of data points times number of clusters) -matrix containing the fuzzy
     * allocation for each data point.<br>
     * <i>Simplex indikator</i>: An indikator for the deviation of the shape of the set of data-points from a simplex
     * structure.<br>
     * <i>Condition indikator</i>: Large condition indicates an ill conditioned cluster problem.<br>
     * Use the {@link #allData()} method to display all results and the {@link #getClusters()}, resp. {@link #getFuzzy}
     * method to achieve them. The simplex and the condition indikator are public fields.
     *
     * @param   dataSet  the data points in a <code>DoubleMatrix2D</code>. <code>dataSet.rows()</code> is the number of
     *                   data points, while <code>dataSet.columns()</code> is the number of cluster.
     *
     * @throws  IllegalArgumentException  if there are more clusters than data points.
     */
    public PCCA()
    {
    }

    public void setEigenvectors(IDoubleArray dataSet)
    {
        this.eigenvectors = dataSet;
    }

    public void perform()
    {

        if (eigenvectors == null)
        {
            throw new RuntimeException("No eigenvectors set. Aborting.");
        }

        /* initialisation */
        int noOfClusters = eigenvectors.columns();
        int noOfPoints = eigenvectors.rows();
        IDoubleArray fuzzyAllocation;
        IDoubleArray transMatrix;
        double condTrans;
        double indikator = 0;
        int[] clusterAllocation = new int[noOfPoints];
        int[] counter;

        /* Special cases: */
        /* a) less than two clusters. */
        if (noOfClusters < 2)
        {
            fuzzyAllocation = Doubles.create.matrix(noOfPoints, 1, 1);
            transMatrix = Doubles.create.matrix(1, 1, 1 / eigenvectors.get(0, 0));
            condTrans = 0;

            for (int i = 0; i < noOfPoints; i++)
            {
                clusterAllocation[i] = 0;
            }

            counter = new int[2];
            counter[0] = 0;
            counter[1] = noOfPoints;
        }
        /* b) more cluster than states. */
        else if (noOfClusters > noOfPoints)
        {
            throw new IllegalArgumentException("There are more clusters than points given!");
        }
        else if (noOfClusters == noOfPoints)
        {
            fuzzyAllocation = Doubles.create.array(noOfPoints,noOfPoints);
            for (int i=0; i<fuzzyAllocation.rows(); i++)
                fuzzyAllocation.set(i,i,1);
            transMatrix = Algebra.util.inverse(eigenvectors);

            // currently, we do not calculate the condition number because we lack the method
            //condTrans = A.cond(transMatrix);

            counter = new int[noOfPoints + 1];

            for (int i = 0; i < noOfPoints; i++)
            {
                clusterAllocation[i] = i;
                counter[i + 1] = 1;
            }
        }
        /* Start of the Isa-algorithm. */
        else
        {
            double skalar = 0;
            double maxDist = 0;
            double comp = 0;
            double entry = 0;
            int[] index = new int[noOfClusters];
            int[] indexAll = Ints.create.arrayRange(0, noOfClusters).getArray();
            IDoubleArray orthoSys = eigenvectors.copy();
            IDoubleArray dummy;

            /* Compute the two data-points with the largest distance
             *(quantified by the 2-norm)*/
            for (int i = 0; i < noOfPoints; i++)
            {
                for (int j = i + 1; j < noOfPoints; j++)
                {
                    double dij = Algebra.util.distance(eigenvectors.viewRow(i), eigenvectors.viewRow(j));

                    if (dij > maxDist)
                    {
                        maxDist = dij;
                        index[0] = i;
                        index[1] = j;
                    }
                }
            }

            /* compute the other representatives by modified gram-schmidt
             * (i.e. the data points with the
             *largest distance to them computed before).*/
            for (int i = 0; i < noOfPoints; i++)
            {
                for (int j=0; j<orthoSys.columns(); j++)
                    orthoSys.set(i, j, orthoSys.get(i,j)-eigenvectors.get(0, j));
            }

            // divide by norm of index 1.
            double d = Algebra.util.norm(orthoSys.viewRow(index[1]));
            Algebra.util.scale(1.0/d, orthoSys);

            // ??
            for (int i = 2; i < noOfClusters; i++)
            {
                maxDist = 0;
                dummy = orthoSys.viewRow(index[i - 1]).copy();

                for (int j = 0; j < noOfPoints; j++)
                {
                    skalar = Algebra.util.dot(dummy, orthoSys.viewRow(j));
                    for (int k = 0; k<dummy.size(); k++)
                    {
                        orthoSys.set(j, k, orthoSys.get(j,k) - (dummy.get(k) * skalar));
                    }
                    comp = Algebra.util.norm(orthoSys.viewRow(j));

                    if (comp > maxDist)
                    {
                        maxDist = comp;
                        index[i] = j;
                    }
                }

                double normi = Algebra.util.norm(orthoSys.viewRow(index[i]));
                Algebra.util.scale(1.0/normi, orthoSys);
            }

            /* Use the index-array with the representatives to compute the
             * transformation matrix, i.e., the matrix that maps the
             * representative to the edges of the standard simplex.*/
            transMatrix = Algebra.util.inverse(eigenvectors.view(index, indexAll));

            // currently cannot compute condition number
            //condTrans = A.cond(transMatrix);

            /* Transform the data set to a (hopefully) nearly standard simplex
             * structure, by mapping all data points with the computed
             * transformation matrix. */
            fuzzyAllocation = Doubles.create.matrix(Algebra.util.product(eigenvectors, transMatrix).getTable());

            /* Extract from soft (fuzzy) allocation the sharp allocation vektor.*/
            counter = new int[noOfClusters + 1];

            for (int i = 0; i < noOfPoints; i++)
            {
                comp = 0;

                for (int j = 0; j < noOfClusters; j++)
                {
                    entry = fuzzyAllocation.get(i, j);

                    if (entry < indikator)
                    {
                        indikator = entry;
                    }

                    if (entry > comp)
                    {
                        clusterAllocation[i] = j;
                        comp = entry;
                    }
                }

                counter[clusterAllocation[i] + 1]++;
            }
        } // end if-else

        /* Compute a sorting array and an array with the number of data points
         *in each cluster*/
        int[] counter2 = new int[noOfClusters];
        int[] sortarray = new int[noOfPoints];

        for (int i = 1; i < counter.length; i++)
        {
            counter[i] += counter[i - 1];
        }

        for (int i = 0; i < sortarray.length; i++)
        {
            sortarray[counter[clusterAllocation[i]] + counter2[clusterAllocation[i]]] = i;
            counter2[clusterAllocation[i]]++;
        }

        /* define fields */
        INDIKATOR = indikator;
        FUZZY = fuzzyAllocation;
        //COND_TRANS = condTrans;
        CLUSTER = clusterAllocation;
        SORTARRAY = sortarray;
        CLUSTER_SIZE = counter2;
    }

    /**
     * Returns an array with cluster allocations. <code>int[i]</code> is the cluster to which data point i was
     * allocated.
     */
    public IIntArray getClusters()
    {
        return(new PrimitiveIntArray(CLUSTER));
    }

    /**
     * Returns an array containing the number of data points in each cluster.
     */
    public IIntArray getClusterSize()
    {
        return(new PrimitiveIntArray(CLUSTER_SIZE));
    }

    public IDoubleArray getFuzzy()
    {
        return(FUZZY);
    }

    /**
     * Returns the permutation which is needed to arrange the data points according to their cluster.
     */
    public IIntArray getPermutationArray()
    {
        return(new PrimitiveIntArray(SORTARRAY));
    }
}
TOP

Related Classes of stallone.mc.pcca.PCCA

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.