Source Code of org.gephi.clustering.plugin.mcl.MarkovClustering$MCLCluster

/*
Copyright 2008-2010 Gephi
Authors : Mathieu Bastian <mathieu.bastian@gephi.org>
Website : http://www.gephi.org


This file is part of Gephi.


Gephi is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.


Gephi is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.


You should have received a copy of the GNU Affero General Public License
along with Gephi.  If not, see <http://www.gnu.org/licenses/>.
*/
package org.gephi.clustering.plugin.mcl;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.gephi.clustering.api.Cluster;
import org.gephi.clustering.spi.Clusterer;
import org.gephi.graph.api.Edge;
import org.gephi.graph.api.Graph;
import org.gephi.graph.api.GraphModel;
import org.gephi.graph.api.Node;
import org.gephi.utils.longtask.spi.LongTask;
import org.gephi.utils.progress.Progress;
import org.gephi.utils.progress.ProgressTicket;


/**
 * MarkovClustering implements the Markov clustering (MCL) algorithm for graphs,
 * using a HashMap-based sparse representation of a Markov matrix, i.e., an
 * adjacency matrix m that is normalised to one. Elements in a column / node can
 * be interpreted as decision probabilities of a random walker being at that
 * node. Note: whereas we explain the algorithms with columns, the actual
 * implementation works with rows because the used sparse matrix has row-major
 * format.
 * <p>
 * The basic idea underlying the MCL algorithm is that dense regions in sparse
 * graphs correspond with regions in which the number of k-length paths is
 * relatively large, for small k in N, which corresponds to multiplying
 * probabilities in the matrix appropriately. Random walks of length k have
 * higher probability (product) for paths with beginning and ending in the same
 * dense region than for other paths.
 * <p>
 * The algorithm starts by creating a Markov matrix from the graph, for which
 * first the adjacency matrix is added diagonal elements to include self-loops
 * for all nodes, i.e., probabilities that the random walker stays at a
 * particular node. After this initialisation, the algorithm works by
 * alternating two operations, expansion and inflation, iteratively recomputing
 * the set of transition probabilities. The expansion step corresponds to matrix
 * multiplication (on stochastic matrices), the inflation step corresponds with
 * a parametrized inflation operator Gamma_r, which acts column-wise on (column)
 * stochastic matrices (here, we use row-wise operation, which is analogous).
 * <p>
 * The inflation operator transforms a stochastic matrix into another one by
 * raising each element to a positive power p and re-normalising columns to keep
 * the matrix stochastic. The effect is that larger probabilities in each column
 * are emphasised and smaller ones deemphasised. On the other side, the matrix
 * multiplication in the expansion step creates new non-zero elements, i.e.,
 * edges. The algorithm converges very fast, and the result is an idempotent
 * Markov matrix, M = M * M, which represents a hard clustering of the graph
 * into components.
 * <p>
 * Expansion and inflation have two opposing effects: While expansion flattens
 * the stochastic distributions in the columns and thus causes paths of a random
 * walker to become more evenly spread, inflation contracts them to favoured
 * paths.
 * <p>
 * Description is based on the introduction of Stijn van Dongen's thesis Graph
 * Clustering by Flow Simulation (2000); for a mathematical treatment of the
 * algorithm and the associated MCL process, see there.
 */
//Original author Gregor Heinrich
public class MarkovClustering implements Clusterer, LongTask {


    private double maxResidual = 0.001;
    private double gammaExp = 2.0;
    private double loopGain = 0.;
    private double zeroMax = 0.001;
    private Cluster[] clusters;
    //LongTask
    private ProgressTicket progressTicket;
    private boolean cancelled;


    public void execute(GraphModel graphModel) {
        cancelled = false;
        Progress.start(progressTicket);
        Progress.setDisplayName(progressTicket, "MCL Clustering");


        HashMap<Integer, Node> nodeMap = new HashMap<Integer, Node>();
        HashMap<Node, Integer> intMap = new HashMap<Node, Integer>();


        Graph graph = graphModel.getGraphVisible();
        graph.readLock();


        //Load matrix
        SparseMatrix matrix = new SparseMatrix();
        int nodeId = 0;
        for (Edge e : graph.getEdges()) {
            Node source = e.getSource();
            Node target = e.getTarget();
            Integer sourceId;
            Integer targetId;
            if ((sourceId = intMap.get(source)) == null) {
                sourceId = nodeId++;
                intMap.put(source, sourceId);
                nodeMap.put(sourceId, source);
            }
            if ((targetId = intMap.get(target)) == null) {
                targetId = nodeId++;
                intMap.put(target, targetId);
                nodeMap.put(targetId, target);
            }
            double weight = e.getWeight();
            matrix.add(sourceId, targetId, weight);


            if (cancelled) {
                graph.readUnlockAll();
                return;
            }
        }


        graph.readUnlock();


        matrix = matrix.transpose();
        matrix = run(matrix, maxResidual, gammaExp, loopGain, zeroMax);


        if (cancelled) {
            return;
        }


        Map<Integer, ArrayList<Integer>> map = getClusters(matrix);


        if (cancelled) {
            return;
        }


        int clusterNumber = 1;
        List<Cluster> clustersList = new ArrayList<Cluster>();
        Set<ArrayList<Integer>> sortedClusters = new HashSet<ArrayList<Integer>>();
        for (ArrayList<Integer> c : map.values()) {
            if (!sortedClusters.contains(c)) {
                sortedClusters.add(c);
                Node[] nodes = new Node[c.size()];
                int i = 0;
                for (Integer in : c) {
                    Node node = nodeMap.get(in);
                    nodes[i] = node;
                    i++;
                }
                clustersList.add(new MCLCluster(nodes, clusterNumber));
                clusterNumber++;
            }
            if (cancelled) {
                return;
            }
        }
        clusters = clustersList.toArray(new Cluster[0]);


        Progress.finish(progressTicket);
    }


    public Cluster[] getClusters() {
        return clusters;
    }


    public boolean cancel() {
        cancelled = true;
        return true;
    }


    public void setProgressTicket(ProgressTicket progressTicket) {
        this.progressTicket = progressTicket;
    }


    private static class MCLCluster implements Cluster {


        private Node[] nodes;
        private String name;
        private Node metaNode;


        public MCLCluster(Node[] nodes, int number) {
            this.nodes = nodes;
            this.name = "Cluster " + number;
        }


        public Node[] getNodes() {
            return nodes;
        }


        public int getNodesCount() {
            return nodes.length;
        }


        public String getName() {
            return name;
        }


        public Node getMetaNode() {
            return metaNode;
        }


        public void setMetaNode(Node node) {
            this.metaNode = node;
        }
    }


    /**
     * run the MCL process.
     *
     * @param a matrix
     * @param maxResidual maximum difference between row elements and row square
     *        sum (measure of idempotence)
     * @param pGamma inflation exponent for Gamma operator
     * @param loopGain values for cycles
     * @param maxZero maximum value considered zero for pruning operations
     * @return the resulting matrix
     */
    public SparseMatrix run(SparseMatrix a, double maxResidual, double pGamma, double loopGain, double maxZero) {


        //System.out.println("original matrix\n" + a.transpose().toStringDense());


        // add cycles
        addLoops(a, loopGain);


        // make stochastic
        a.normaliseRows();
        //System.out.println("normalised\n" + a.transpose().toStringDense());


        double residual = 1.;
        int i = 0;


        if (cancelled) {
            return a;
        }


        // main iteration
        while (residual > maxResidual) {
            i++;
            a = expand(a);
            residual = inflate(a, pGamma, maxZero);
            System.out.println("residual energy = " + residual);
            if (cancelled) {
                return a;
            }
        }
        return a;
    }


    /**
     * inflate stochastic matrix by Hadamard (elementwise) exponentiation,
     * pruning and normalisation :
     * <p>
     * result = Gamma ( m, p ) = normalise ( prune ( m .^ p ) ).
     * <p>
     * By convention, normalisation is done along rows (SparseMatrix has
     * row-major representation)
     *
     * @param m matrix (mutable)
     * @param p exponent as a double
     * @param zeromax below which elements are pruned from the sparse matrix
     * @return residuum value, m is modified.
     */
    public double inflate(SparseMatrix m, double p, double zeromax) {
        double res = 0.;


        // matlab: m = m .^ p
        m.hadamardPower(p);
        // matlab: m(find(m < threshold)) = 0
        m.prune(zeromax);
        // matlab [for cols]: dinv = diag(1./sum(m)); m = m * dinv; return
        // sum(m)
        SparseVector rowsums = m.normalise(1.);


        // check if done: if the maximum element
        for (int i : rowsums.keySet()) {
            SparseVector row = m.get(i);
            double max = row.max();
            double sumsq = row.sum(2.);
            res = Math.max(res, max - sumsq);
            if (cancelled) {
                return res;
            }
        }
        return res;
    }


    /**
     * expand stochastic quadratic matrix by sqaring it with itself: result = m *
     * m. Here normalisation is rowwise.
     *
     * @param matrix
     * @return new matrix (pointer != argument)
     */
    public SparseMatrix expand(SparseMatrix m) {
        m = m.times(m);
        return m;
    }


    /**
     * add loops with specific energy, which corresponds to adding loopGain to
     * the diagonal elements.
     *
     * @param a
     * @param loopGain
     */
    private void addLoops(SparseMatrix a, double loopGain) {
        if (loopGain <= 0) {
            return;
        }
        for (int i = 0; i < a.size(); i++) {
            a.add(i, i, loopGain);
            if (cancelled) {
                return;
            }
        }
    }


    public double getGammaExp() {
        return gammaExp;
    }


    /**
     * Set inflation exponent for Gamma operator. Default is 2.0
     */
    public void setGammaExp(double gammaExp) {
        this.gammaExp = gammaExp;
    }


    public double getLoopGain() {
        return loopGain;
    }


    /**
     * Set values for cycles. Default is 0.0
     */
    public void setLoopGain(double loopGain) {
        this.loopGain = loopGain;
    }


    public double getMaxResidual() {
        return maxResidual;
    }


    /**
     * Set the maximum difference between row elements and row square sum (measure of idempotence). Default is 0.001
     */
    public void setMaxResidual(double maxResidual) {
        this.maxResidual = maxResidual;
    }


    public double getZeroMax() {
        return zeroMax;
    }


    /**
     * Set the maximum value considered zero for pruning operations. Default is 0.001
     */
    public void setZeroMax(double zeroMax) {
        this.zeroMax = zeroMax;
    }


    private Map<Integer, ArrayList<Integer>> getClusters(SparseMatrix matrix) {


        Map<Integer, ArrayList<Integer>> clusters = new HashMap<Integer, ArrayList<Integer>>();
        int clusterCount = 0;


        double[][] mat = matrix.getDense();
        for (int i = 0; i < mat.length; i++) {
            for (int j = 0; j < mat[0].length; j++) {
                double value = mat[i][j];
                if (value != 0.0) {
                    if (i == j) {
                        continue;
                    }


                    if (clusters.containsKey(j)) {
                        // Already seen "column" -- get the cluster and add column
                        ArrayList<Integer> columnCluster = clusters.get(j);
                        if (clusters.containsKey(i)) {
                            // We've already seen row also -- join them
                            ArrayList<Integer> rowCluster = clusters.get(i);
                            if (rowCluster == columnCluster) {
                                continue;
                            }
                            columnCluster.addAll(rowCluster);
                            clusterCount--;
                        } else {
                            // debugln("Adding "+row+" to "+columnCluster.getClusterNumber());
                            columnCluster.add(i);
                        }
                        for (Integer in : columnCluster) {
                            clusters.put(in, columnCluster);
                        }
                    } else {
                        ArrayList<Integer> rowCluster;
                        // First time we've seen "column" -- have we already seen "row"
                        if (clusters.containsKey(i)) {
                            // Yes, just add column to row's cluster
                            rowCluster = clusters.get(i);
                            // debugln("Adding "+column+" to "+rowCluster.getClusterNumber());
                            rowCluster.add(j);
                        } else {
                            rowCluster = new ArrayList<Integer>();
                            clusterCount++;
                            // debugln("Created new cluster "+rowCluster.getClusterNumber()+" with "+row+" and "+column);
                            rowCluster.add(j);
                            rowCluster.add(i);
                        }
                        for (Integer in : rowCluster) {
                            clusters.put(in, rowCluster);
                        }
                    }
                }
                if (cancelled) {
                    return clusters;
                }
            }
        }


        return clusters;
    }
}
Source Code of org.gephi.clustering.plugin.mcl.MarkovClustering$MCLCluster

Related Classes of org.gephi.clustering.plugin.mcl.MarkovClustering$MCLCluster