Package edu.ucla.sspace.graph

Source Code of edu.ucla.sspace.graph.ChineseWhispersClustering

/*
* Copyright 2012 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.graph;

import edu.ucla.sspace.common.Similarity;

import edu.ucla.sspace.clustering.Assignment;
import edu.ucla.sspace.clustering.Merge;
import edu.ucla.sspace.clustering.SoftAssignment;

import edu.ucla.sspace.util.Counter;
import edu.ucla.sspace.util.HashMultiMap;
import edu.ucla.sspace.util.HashIndexer;
import edu.ucla.sspace.util.Indexer;
import edu.ucla.sspace.util.MultiMap;
import edu.ucla.sspace.util.ObjectCounter;
import edu.ucla.sspace.util.Pair;
import edu.ucla.sspace.util.WorkQueue;

import edu.ucla.sspace.util.primitive.IntIterator;
import edu.ucla.sspace.util.primitive.IntSet;
import edu.ucla.sspace.util.primitive.IntIntMultiMap;
import edu.ucla.sspace.util.primitive.IntIntHashMultiMap;
import edu.ucla.sspace.util.primitive.PrimitiveCollections;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;

import java.util.logging.Level;
import java.util.logging.Logger;

import gnu.trove.iterator.TIntDoubleIterator;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;

import static edu.ucla.sspace.util.LoggerUtil.verbose;
import static edu.ucla.sspace.util.LoggerUtil.veryVerbose;

/**
* @author David Jurgens
*/
public class ChineseWhispersClustering implements java.io.Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * The logger to which clustering status updates will be written.
     */
    private static final Logger LOGGER =
        Logger.getLogger(ChineseWhispersClustering.class.getName());
      
    private static final String PROPERTY_PREFIX =
        "edu.ucla.sspace.graph.LinkClustering";

    private static final int DEFAULT_MAX_ITERATIONS = 100;

    private static final double DEFAULT_RANDOM_ASSIGNMENT_PROB = 0d;

    private static final Random RANDOM = new Random();

    /**
     *
     * @return a mapping from the cluster index to the set of graph vertices
     *         mapped to that cluster
     */
    public <E extends Edge> MultiMap<Integer,Integer> cluster(Graph<E> graph) {
        return cluster(graph, DEFAULT_MAX_ITERATIONS,
                       DEFAULT_RANDOM_ASSIGNMENT_PROB);
    }

    /**
     *
     * @return a mapping from the cluster index to the set of graph vertices
     *         mapped to that cluster
     */
    public <E extends Edge> MultiMap<Integer,Integer>
                      cluster(Graph<E> graph, int maxIterations) {
        return cluster(graph, maxIterations, DEFAULT_RANDOM_ASSIGNMENT_PROB);
    }

    /**
     *
     *
     * @return a mapping from the cluster index to the set of graph vertices
     *         mapped to that cluster
     */
    public <E extends Edge> MultiMap<Integer,Integer>
                      cluster(Graph<E> graph, int maxIterations,
                              double randomAssignmentProb) {
        if (!areVerticesContiguous(graph))
            throw new IllegalArgumentException(
                "Graph vertex indices must be contiguous");

        int[] vertexAssignments = new int[graph.order()];
        int[] vertices = new int[graph.order()];
        for (int i = 0; i < vertices.length; ++i) {
            vertices[i] = i;
            vertexAssignments[i] = i;
        }

        boolean assignmentsChanged = true;
        double mutationRate = randomAssignmentProb;
        for (int iter = 0; iter < maxIterations && assignmentsChanged; ++iter) {
            assignmentsChanged = false;
           
            // Shuffle the order in which the vertices will be accessed
            PrimitiveCollections.shuffle(vertices);

            for (int i = 0; i < vertices.length; ++i) {
                int vertex = vertices[i];

                // Allow for random mutations
                if (RANDOM.nextDouble() < mutationRate) {
                    int randomClass = RANDOM.nextInt(vertices.length);
                    int oldClass = vertexAssignments[vertex];
                    if (oldClass != randomClass) {
                        vertexAssignments[vertex] = randomClass;
                        assignmentsChanged = true;
                    }                   
                }
                // Otherwise use the regular update procedure
                else {
                    // Get the neighbors of the current vertex and identify
                    // which class label is the maximum from the neighbors
                    int maxClass = (graph instanceof WeightedGraph)
                        ? getMaxClassWeighted(vertex, vertexAssignments,
                            (WeightedGraph<? extends WeightedEdge>)graph)
                        : getMaxClass(vertex, vertexAssignments, graph);
                    int oldClass = vertexAssignments[vertex];
                    if (oldClass != maxClass) {
                        vertexAssignments[vertex] = maxClass;
                        assignmentsChanged = true;
                    }
                }               
               
            }
        }

        MultiMap<Integer,Integer> toReturn =
            new HashMultiMap<Integer,Integer>();
        for (int i = 0; i < vertices.length; ++i)
            toReturn.put(vertexAssignments[i], i);
        return toReturn;
    }

    static <E extends Edge> boolean areVerticesContiguous(Graph<E> g) {
        return true;
    }

    static int getMaxClass(int v, int[] vertexAssignments, Graph g) {
        IntSet neighbors = g.getNeighbors(v);
        IntIterator iter = neighbors.iterator();
        Counter<Integer> classes = new ObjectCounter<Integer>();
        classes.count(vertexAssignments[v]);
        while (iter.hasNext()) {
            int n = iter.nextInt();
            classes.count(vertexAssignments[n]);
        }

        TIntSet ties = new TIntHashSet();
        int max = 0;
        for (Map.Entry<Integer,Integer> e : classes) {
            int clazz = e.getKey();
            int count = e.getValue();
            if (count > max) {
                ties.clear();
                max = count;
            }
            if (count == max)
                ties.add(clazz);
        }

        int[] options = ties.toArray(new int[ties.size()]);
        return (options.length == 1)
            ? options[0]
            : options[RANDOM.nextInt(options.length)];
    }

    static <E extends WeightedEdge> int
                      getMaxClassWeighted(int v, int[] vertexAssignments,
                                          WeightedGraph<E> g) {
        Set<E> edges = g.getAdjacencyList(v);
        TIntDoubleMap classSums = new TIntDoubleHashMap();
        for (WeightedEdge e : edges) {
            int n = (e.to() == v) ? e.from() : e.to();
            int nClass = vertexAssignments[n];
            double weight = e.weight();
            if (classSums.containsKey(nClass)) {
                double curWeight = classSums.get(nClass);
                classSums.put(nClass, weight + curWeight);
            }
            else {
                classSums.put(nClass, weight);
            }
        }

        double maxSum = -1d;
        TIntSet ties = new TIntHashSet();
        TIntDoubleIterator iter = classSums.iterator();
        while (iter.hasNext()) {
            iter.advance();
            double weight = iter.value();
            if (weight > maxSum) {
                maxSum = weight;
                ties.clear();
            }
            if (weight == maxSum)
                ties.add(iter.key());
           
        }
       
        // If there wasn't a tie after all
        int[] options = ties.toArray();
        return (options.length == 1)
            ? options[0]
            : options[RANDOM.nextInt(options.length)];
    }
}
TOP

Related Classes of edu.ucla.sspace.graph.ChineseWhispersClustering

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.