package cc.mallet.cluster.util;
import cc.mallet.cluster.Clustering;
import cc.mallet.pipe.Noop;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.Randoms;
/**
* Utility functions for Clusterings.
*
* @author "Aron Culotta" <culotta@degas.cs.umass.edu>
* @version 1.0
* @since 1.0
* @see Clustering
*/
public class ClusterUtils {
/**
* @param li
* @param lj
* @return A new {@link InstanceList} where <code>lj</code> is appended to <code>li</code>.
*/
public static InstanceList combineLists (InstanceList li,
InstanceList lj) {
InstanceList newList = new InstanceList(li.getPipe());
for (int i = 0; i < li.size(); i++)
newList.add(li.get(i));
for (int i = 0; i < lj.size(); i++)
newList.add(lj.get(i));
return newList;
}
/**
* Relabels the clustering to reflect merging clusters i and
* j. Relabels all of Instances with label j to label i.
* @param clustering
* @param i
* @param j
* @return Modified Clustering.
*/
public static Clustering mergeClusters (Clustering clustering,
int labeli, int labelj) {
if (labeli == labelj)
return clustering;
// Set all labelj labels to labeli.
InstanceList instances = clustering.getInstances();
for (int i = 0; i < instances.size(); i++) {
int idx = clustering.getLabel(i);
if (idx == labelj)
clustering.setLabel(i, labeli);
}
clustering.setNumLabels(clustering.getNumClusters() - 1);
// Decrement cluster indices that are greater than the number of clusters.
for (int i = 0; i < instances.size(); i++) {
int idx = clustering.getLabel(i);
if (idx > labelj)
clustering.setLabel(i, idx - 1);
}
return clustering;
}
/**
* Merge clusters containing the specified instances.
* @param clustering
* @param instances
* @return Modified Clustering.
*/
public static Clustering mergeInstances (Clustering clustering,
int[] instances) {
for (int i = 0; i < instances.length; i++) {
for (int j = i + 1; j < instances.length; j++) {
int labeli = clustering.getLabel(instances[i]);
int labelj = clustering.getLabel(instances[j]);
clustering = mergeClusters(clustering, labeli, labelj);
}
}
return clustering;
}
public static int[] getCombinedInstances (Clustering clustering, int i, int j) {
int[] ci = clustering.getIndicesWithLabel(i);
int[] cj = clustering.getIndicesWithLabel(j);
int[] merged = new int[ci.length + cj.length];
System.arraycopy(ci, 0, merged, 0, ci.length);
System.arraycopy(cj, 0, merged, ci.length, cj.length);
return merged;
}
public static Clustering mergeInstances (Clustering clustering,
int i, int j) {
return mergeInstances(clustering, new int[]{i, j});
}
/**
* Initializes Clustering to one Instance per cluster.
* @param instances
* @return Singleton Clustering.
*/
public static Clustering createSingletonClustering (InstanceList instances) {
int[] labels = new int[instances.size()];
for (int i = 0; i < labels.length; i++)
labels[i] = i;
return new Clustering(instances,
labels.length,
labels);
}
public static Clustering createRandomClustering (InstanceList instances,
Randoms random) {
Clustering clustering = createSingletonClustering(instances);
int numMerges = 2 + random.nextInt(instances.size() - 2);
for (int i = 0; i < numMerges; i++)
clustering = mergeInstances(clustering,
random.nextInt(instances.size()),
random.nextInt(instances.size()));
return clustering;
}
/**
*
* @param clustering
* @param indices
* @return A Clustering where no Instances in <code>indices</code>
* are in the same cluster.
*/
public static Clustering shatterInstances (Clustering clustering, int[] indices) {
for (int i = 0; i < indices.length - 1; i++) {
clustering.setLabel(indices[i], clustering.getNumClusters());
clustering.setNumLabels(clustering.getNumClusters() + 1);
}
return clustering;
}
/**
*
* @param i
* @param j
* @return A new {@link InstanceList} containing the two argument {@link Instance}s.
*/
public static InstanceList makeList (Instance i, Instance j) {
InstanceList list = new InstanceList(new Noop(i.getDataAlphabet(), i.getTargetAlphabet()));
list.add(i);
list.add(j);
return list;
}
/**
* @param clustering
* @return A shallow copy of the argument where new objects are only
* allocated for the cluster assignment.
*/
public static Clustering copyWithNewLabels (Clustering clustering) {
int[] oldLabels = clustering.getLabels();
int[] newLabels = new int[oldLabels.length];
System.arraycopy(oldLabels, 0, newLabels, 0, oldLabels.length);
return new Clustering(clustering.getInstances(),
clustering.getNumClusters(),
newLabels);
}
public static Clustering mergeInstancesWithSameLabel (Clustering clustering) {
InstanceList list = clustering.getInstances();
for (int i = 0; i < list.size(); i++) {
Instance ii = list.get(i);
int li = clustering.getLabel(i);
for (int j = i + 1; j < list.size(); j++) {
Instance ij = list.get(j);
int lj = clustering.getLabel(j);
if (li != lj && ii.getLabeling().equals(ij.getLabeling()))
clustering = ClusterUtils.mergeClusters(clustering, li, lj);
}
}
return clustering;
}
/**
*
* @param clustering
* @param i
* @param j
* @return A new copy of <code>clustering</code> in which clusters
* with labels <code>i</code> and <code>j</code> have been merged.
*/
public static Clustering copyAndMergeClusters (Clustering clustering, int i, int j) {
return mergeClusters(copyWithNewLabels(clustering), i, j);
}
/**
*
* @param clustering
* @param i
* @param j
* @return A new copy of <code>clustering</code> in which {@link
* Instance}s <code>i</code> and <code>j</code> have been put in the
* same cluster.
*/
public static Clustering copyAndMergeInstances (Clustering clustering, int i, int j) {
return copyAndMergeInstances(clustering, new int[]{i, j});
}
/**
*
* @param clustering
* @param instances
* @return A new copy of <code>clustering</code> in which the
* clusters containing the specified {@link Instance}s have been
* merged together into one cluster.
*/
public static Clustering copyAndMergeInstances (Clustering clustering, int[] instances) {
return mergeInstances(copyWithNewLabels(clustering), instances);
}
}