Package joshua.corpus

Source Code of joshua.corpus.AbstractPhrase

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus;

import joshua.corpus.vocab.SymbolTable;


/**
* This class provides a skeletal implementation of the base methods
* likely to be common to most or all implementations of the
* <code>Phrase</code> interface.
*
* @author Lane Schwartz
* @author Chris Callison-Burch
*/
public abstract class AbstractPhrase implements Phrase {

  //===============================================================
  // Constants
  //===============================================================

  /** seed used in hash code generation */
  public static final int HASH_SEED = 17;

  /** offset used in has code generation */
  public static final int HASH_OFFSET = 37;
 
  /**
   * Splits a sentence (on white space), then looks up the
   * integer representations of each word using the supplied
   * symbol table.
   *
   * @param sentence   White-space separated String of words.
   * @param vocabulary Symbol table for mapping tokens to
   *                   integers.
   * @return Array of integers corresponding to the words in
   *         the sentence.
   */
  protected int[] splitSentence(String sentence, SymbolTable vocabulary) {
    String[] w      = sentence.split("\\s+");
    int[] words      = new int[w.length];
    for (int i = 0; i < w.length; i++)
      words[i] = vocabulary.addTerminal(w[i]);
    return words;
  }
 
  /**
   * Uses the standard java approach of calculating hashCode.
   * Start with a seed, add in every value multiplying the
   * exsiting hash times an offset.
   *
   * @return int hashCode for the list
   */
  public int hashCode() {
    int result = HASH_SEED;
    for (int i=0; i < size(); i++) {
      result = HASH_OFFSET*result + getWordID(i);
    }
    return result;
  }
 

  /**
   * Two phrases are their word IDs are the same. Note that
   * this could give a false positive if their Vocabularies
   * were different but their IDs were somehow the same.
   */
  public boolean equals(Object o) {
   
    if (o instanceof Phrase) {
      Phrase other = (Phrase) o;
     
      if(this.size() != other.size()) return false;
      for (int i=0; i < size(); i++) {
        if(this.getWordID(i) != other.getWordID(i)) return false;
      }
      return true;
    } else {
      return false;
    }
   
  }
 
 
  /**
   * Compares the two strings based on the lexicographic order
   * of words defined in the Vocabulary.
   *
   * @param other the object to compare to
   * @return -1 if this object is less than the parameter, 0
   *         if equals, 1 if greater
   * @exception ClassCastException if the passed object is
   *                               not of type Phrase
   */
  public int compareTo(Phrase other) {
    int length = size();
    int otherLength = other.size();
    for (int i = 0; i < length; i++) {
      if (i < otherLength) {
        int difference = getWordID(i) - other.getWordID(i);
        if (difference != 0) return difference;
      } else {
        //same but other is shorter, so we are after
        return 1;
      }
    }
    if (length < otherLength) {
      return -1;
    } else {
      return 0;
    }
  }
 
 
 
  /**
   * Returns a string representation of the phrase.
   *
   * @return a space-delimited string of the words in the
   *         phrase.
   */
  public String toString() {
    SymbolTable vocab = getVocab();
    StringBuffer buf = new StringBuffer();
        for (int i=0; i<size(); i++) {
      String word = vocab.getWord(getWordID(i));
      if (i != 0) buf.append(' ');
            buf.append(word);
        }
        return buf.toString();
  }
 
}
TOP

Related Classes of joshua.corpus.AbstractPhrase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.