Package syntaxLearner

Source Code of syntaxLearner.Cluster

package syntaxLearner;

import java.util.Set;
import java.util.TreeSet;

import syntaxLearner.corpus.Vocabulary;
import syntaxLearner.corpus.Word;

public class Cluster {

  public final short ID;
  public int totalSize;  //size of this portion of the corpus
  public int wordCount;  //individual word
  public Set<Integer> words;
  //private Map<Context,Double> distribution;
  private double[][] distribution;
  private final Vocabulary vocab;
  private final Learner learner;
  private boolean isNew = false;

  public Cluster(Vocabulary v, Learner l){
    this.vocab=v;
    this.learner=l;
    this.ID = learner.newClusterID();
    words = new TreeSet<Integer>();
    reset();
  }

  public Cluster (Vocabulary v, Learner l, boolean isGround){
    this.vocab=v;
    this.learner=l;
    this.ID = isGround? -1 : learner.newClusterID();
    words = new TreeSet<Integer>();
    reset();
  }



  /**
   * Calculates the distribution of an entire Cluster.
   * Similar to calculating a centroid with k-means.
   * @return a "Map" vector
   */
  public double[][] clusterDistribution(){
    if (learner.isClusterUpdated(this)) {
      return distribution;
    } else {
      /* Initialize */
      distribution = new double[learner.NUMBER_OF_CLUSTERS+1][learner.NUMBER_OF_CLUSTERS+1];
      Set<ClusterContext> clusterContexts = learner.getClusterContexts();

      /* Sum every context in every word */

      for (int i : words){
        Word w = vocab.getWord(i);
        double weight =  (1.0*w.frequency)/(totalSize*wordCount);
        for (ClusterContext cc : clusterContexts){
          distribution[cc.type1 + 1][cc.type2 + 1]+= (w.clusterDistribution(cc)*weight);
        }
        learner.registerClusterUpdate(this);
      }
      return distribution;
    }
  }

  /* Sets parenthood */
  public void add(Cluster c){
    //Actually asserting that intersection (this, c) is empty.
    for (int i: c.words){
      vocab.getWord(i).setParent(this);
      learner.setParent(i,this.ID);
    }
    words.addAll(c.words);
    totalSize+=c.totalSize;
    wordCount+=c.wordCount;
  }

  /* Adds, counts the values and sets parenthood */
  public void add(int i){
    words.add(i);
    wordCount++;
    Word w = vocab.getWord(i);
    totalSize+=w.frequency;
    w.setParent(this);
  }

  public void remove(int i){
    if (words.contains(i)){
      words.remove(i);
      wordCount--;
      totalSize-=vocab.getWord(i).frequency;
      assert (totalSize>=0 && wordCount>=0);
      }
  }

  /**
   * Wipes all data apart from the parent vocabulary.
   * Done this way instead of destructing the object
   * altogether in order to keep a constant number of
   * clusters in the count.
   */
  public void reset(){
    words.clear();
    wordCount = 0;
    totalSize=0
  }

  public short getID(){
    return ID;
  }
 
  public boolean isNew(){
    return isNew;
  }
 
  public void setNew(boolean b){
    isNew=b;
  }
}
TOP

Related Classes of syntaxLearner.Cluster

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.