Package cc.mallet.cluster.util

Source Code of cc.mallet.cluster.util.ClusterUtils

package cc.mallet.cluster.util;

import cc.mallet.cluster.Clustering;
import cc.mallet.pipe.Noop;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.Randoms;

/**
* Utility functions for Clusterings.
*
* @author "Aron Culotta" <culotta@degas.cs.umass.edu>
* @version 1.0
* @since 1.0
* @see Clustering
*/
public class ClusterUtils {
 
  /**
   * @param li
   * @param lj
   * @return A new {@link InstanceList} where <code>lj</code> is appended to <code>li</code>.
   */
  public static InstanceList combineLists (InstanceList li,
                                           InstanceList lj) {
    InstanceList newList = new InstanceList(li.getPipe());
    for (int i = 0; i < li.size(); i++)
      newList.add(li.get(i));
    for (int i = 0; i < lj.size(); i++)
      newList.add(lj.get(i));
    return newList;
  }

  /**
   * Relabels the clustering to reflect merging clusters i and
   * j. Relabels all of Instances with label j to label i.
   * @param clustering
   * @param i
   * @param j
   * @return Modified Clustering.
   */
  public static Clustering mergeClusters (Clustering clustering,
                                          int labeli, int labelj) {
    if (labeli == labelj)
      return clustering;
   
    // Set all labelj labels to labeli.
    InstanceList instances = clustering.getInstances();   
    for (int i = 0; i < instances.size(); i++) {
      int idx = clustering.getLabel(i);
      if (idx == labelj)
        clustering.setLabel(i, labeli);
    }
    clustering.setNumLabels(clustering.getNumClusters() - 1);

    // Decrement cluster indices that are greater than the number of clusters.
    for (int i = 0; i < instances.size(); i++) {
      int idx = clustering.getLabel(i);
      if (idx > labelj)
        clustering.setLabel(i, idx - 1);
    }
   
    return clustering;
  }
 
  /**
   * Merge clusters containing the specified instances.
   * @param clustering
   * @param instances
   * @return Modified Clustering.
   */
  public static Clustering mergeInstances (Clustering clustering,
                                           int[] instances) {
    for (int i = 0; i < instances.length; i++) {
      for (int j = i + 1; j < instances.length; j++) {
        int labeli = clustering.getLabel(instances[i]);
        int labelj = clustering.getLabel(instances[j]);
        clustering = mergeClusters(clustering, labeli, labelj);
      }
    }   
    return clustering;
  }

  public static int[] getCombinedInstances (Clustering clustering, int i, int j) {
    int[] ci = clustering.getIndicesWithLabel(i);
    int[] cj = clustering.getIndicesWithLabel(j);
    int[] merged = new int[ci.length + cj.length];
    System.arraycopy(ci, 0, merged, 0, ci.length);
    System.arraycopy(cj, 0, merged, ci.length, cj.length);
    return merged;
  }
 
  public static Clustering mergeInstances (Clustering clustering,
                                           int i, int j) {
    return mergeInstances(clustering, new int[]{i, j});
  }

  /**
   * Initializes Clustering to one Instance per cluster.
   * @param instances
   * @return Singleton Clustering.
   */
  public static Clustering createSingletonClustering (InstanceList instances) {
    int[] labels = new int[instances.size()];
    for (int i = 0; i < labels.length; i++)
      labels[i] = i;
     return new Clustering(instances,
                          labels.length,
                          labels);
  }

  public static Clustering createRandomClustering (InstanceList instances,
                                                   Randoms random) {
    Clustering clustering = createSingletonClustering(instances);
    int numMerges = 2 + random.nextInt(instances.size() - 2);
    for (int i = 0; i < numMerges; i++)
      clustering = mergeInstances(clustering,
                                  random.nextInt(instances.size()),
                                  random.nextInt(instances.size()));
    return clustering;   
  }

  /**
   *
   * @param clustering
   * @param indices
   * @return A Clustering where no Instances in <code>indices</code>
   * are in the same cluster.
   */
  public static Clustering shatterInstances (Clustering clustering, int[] indices) {
    for (int i = 0; i < indices.length - 1; i++) {
      clustering.setLabel(indices[i], clustering.getNumClusters());
      clustering.setNumLabels(clustering.getNumClusters() + 1);     
    }
    return clustering;
  }
 
  /**
   *
   * @param i
   * @param j
   * @return A new {@link InstanceList} containing the two argument {@link Instance}s.
   */
  public static InstanceList makeList (Instance i, Instance j) {
    InstanceList list = new InstanceList(new Noop(i.getDataAlphabet(), i.getTargetAlphabet()));
    list.add(i);
    list.add(j);
    return list;
  }

  /**
   * @param clustering
   * @return A shallow copy of the argument where new objects are only
   * allocated for the cluster assignment.
   */
  public static Clustering copyWithNewLabels (Clustering clustering) {
    int[] oldLabels = clustering.getLabels();
    int[] newLabels = new int[oldLabels.length];
    System.arraycopy(oldLabels, 0, newLabels, 0, oldLabels.length);
    return new Clustering(clustering.getInstances(),
                          clustering.getNumClusters(),
                          newLabels);
  }
 
  public static Clustering mergeInstancesWithSameLabel (Clustering clustering) {
    InstanceList list = clustering.getInstances();
    for (int i = 0; i < list.size(); i++) {
      Instance ii = list.get(i);
      int li = clustering.getLabel(i);
      for (int j = i + 1; j < list.size(); j++) {
        Instance ij = list.get(j);
        int lj = clustering.getLabel(j);
        if (li != lj && ii.getLabeling().equals(ij.getLabeling()))
          clustering = ClusterUtils.mergeClusters(clustering, li, lj);
      }
    } 
    return clustering;
  }


  /**
   *
   * @param clustering
   * @param i
   * @param j
   * @return A new copy of <code>clustering</code> in which clusters
   * with labels <code>i</code> and <code>j</code> have been merged.
   */
  public static Clustering copyAndMergeClusters (Clustering clustering, int i, int j) {
     return mergeClusters(copyWithNewLabels(clustering), i, j);
  }

  /**
   *
   * @param clustering
   * @param i
   * @param j
   * @return A new copy of <code>clustering</code> in which {@link
   * Instance}s <code>i</code> and <code>j</code> have been put in the
   * same cluster.
   */
  public static Clustering copyAndMergeInstances (Clustering clustering, int i, int j) {
     return copyAndMergeInstances(clustering, new int[]{i, j});
  }

  /**
   *
   * @param clustering
   * @param instances
   * @return A new copy of <code>clustering</code> in which the
   * clusters containing the specified {@link Instance}s have been
   * merged together into one cluster.
   */
  public static Clustering copyAndMergeInstances (Clustering clustering, int[] instances) {
     return mergeInstances(copyWithNewLabels(clustering), instances);   
  }

}
TOP

Related Classes of cc.mallet.cluster.util.ClusterUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.