Source Code of joshua.corpus.AbstractPhrase

/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */
package joshua.corpus;


import joshua.corpus.vocab.SymbolTable;




/**
 * This class provides a skeletal implementation of the base methods
 * likely to be common to most or all implementations of the
 * <code>Phrase</code> interface.
 * 
 * @author Lane Schwartz
 * @author Chris Callison-Burch
 */
public abstract class AbstractPhrase implements Phrase {


  //===============================================================
  // Constants
  //===============================================================


  /** seed used in hash code generation */
  public static final int HASH_SEED = 17;


  /** offset used in has code generation */
  public static final int HASH_OFFSET = 37;
  
  /**
   * Splits a sentence (on white space), then looks up the
   * integer representations of each word using the supplied
   * symbol table.
   * 
   * @param sentence   White-space separated String of words.
   * @param vocabulary Symbol table for mapping tokens to
   *                   integers.
   * @return Array of integers corresponding to the words in
   *         the sentence.
   */
  protected int[] splitSentence(String sentence, SymbolTable vocabulary) {
    String[] w      = sentence.split("\\s+");
    int[] words      = new int[w.length];
    for (int i = 0; i < w.length; i++)
      words[i] = vocabulary.addTerminal(w[i]);
    return words;
  }
  
  /**
   * Uses the standard java approach of calculating hashCode.
   * Start with a seed, add in every value multiplying the
   * exsiting hash times an offset.
   *
   * @return int hashCode for the list
   */
  public int hashCode() {
    int result = HASH_SEED;
    for (int i=0; i < size(); i++) {
      result = HASH_OFFSET*result + getWordID(i);
    }
    return result;
  }
  


  /**
   * Two phrases are their word IDs are the same. Note that
   * this could give a false positive if their Vocabularies
   * were different but their IDs were somehow the same.
   */
  public boolean equals(Object o) {
    
    if (o instanceof Phrase) {
      Phrase other = (Phrase) o;
      
      if(this.size() != other.size()) return false;
      for (int i=0; i < size(); i++) {
        if(this.getWordID(i) != other.getWordID(i)) return false;
      }
      return true;
    } else {
      return false;
    }
    
  }
  
  
  /**
   * Compares the two strings based on the lexicographic order
   * of words defined in the Vocabulary.
   *
   * @param other the object to compare to
   * @return -1 if this object is less than the parameter, 0
   *         if equals, 1 if greater
   * @exception ClassCastException if the passed object is
   *                               not of type Phrase
   */
  public int compareTo(Phrase other) {
    int length = size();
    int otherLength = other.size();
    for (int i = 0; i < length; i++) {
      if (i < otherLength) {
        int difference = getWordID(i) - other.getWordID(i);
        if (difference != 0) return difference;
      } else {
        //same but other is shorter, so we are after
        return 1;
      }
    }
    if (length < otherLength) {
      return -1;
    } else {
      return 0;
    }
  }
  
  
  
  /**
   * Returns a string representation of the phrase.
   *
   * @return a space-delimited string of the words in the
   *         phrase.
   */
  public String toString() {
    SymbolTable vocab = getVocab();
    StringBuffer buf = new StringBuffer();
        for (int i=0; i<size(); i++) {
      String word = vocab.getWord(getWordID(i));
      if (i != 0) buf.append(' ');
            buf.append(word);
        }
        return buf.toString();
  }
  
}
Source Code of joshua.corpus.AbstractPhrase

Related Classes of joshua.corpus.AbstractPhrase