Package joshua.corpus

Source Code of joshua.corpus.BasicPhrase

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
/*
* This file is based on the edu.umd.clip.mt.Phrase class from the
* University of Maryland's umd-hadoop-mt-0.01 project. That project
* is released under the terms of the Apache License 2.0, but with
* special permission for the Joshua Machine Translation System to
* release modifications under the LGPL version 2.1. LGPL version
* 3 requires no special permission since it is compatible with
* Apache License 2.0
*/
package joshua.corpus;

import java.util.ArrayList;

import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;



/**
* The simplest concrete implementation of Phrase.
*
* @author wren ng thornton <wren@users.sourceforge.net>
* @version $LastChangedDate: 2009-09-04 15:09:50 -0500 (Fri, 04 Sep 2009) $
*/
public class BasicPhrase extends AbstractPhrase {
  private byte       language;
  private SymbolTable vocabulary;
  private int[]      words;
 
 
  public BasicPhrase(byte language, String sentence) {
    this.language   = language;
    this.vocabulary = new Vocabulary();
    this.words = splitSentence(sentence, vocabulary);
  }
 
  /** Note that the Vocabulary is shared, not cloned. */
  public BasicPhrase(byte language, String sentence, Vocabulary vocabulary) {
    this.language   = language;
    this.vocabulary = vocabulary;
    this.words = splitSentence(sentence, vocabulary);
  }
 
 
  private BasicPhrase() {}
 
  public int[] getWordIDs()  {
    return words;
  }
 
  /* See Javadoc for Phrase interface. */
  public BasicPhrase subPhrase(int start, int end) {
    BasicPhrase that = new BasicPhrase();
    that.language    = this.language;
    that.vocabulary  = this.vocabulary;
    that.words       = new int[end-start+1];
    System.arraycopy(this.words, start, that.words, 0, end-start+1);
    return that;
  }
 
  /* See Javadoc for Phrase interface. */
  public ArrayList<Phrase> getSubPhrases() {
    return this.getSubPhrases(this.size());
  }
 
  /* See Javadoc for Phrase interface. */
  public ArrayList<Phrase> getSubPhrases(int maxLength) {
    ArrayList<Phrase> phrases = new ArrayList<Phrase>();
    int len = this.size();
    for (int n = 1; n <= maxLength; n++)
      for (int i = 0; i <= len-n; i++)
        phrases.add(this.subPhrase(i, i + n - 1));
    return phrases;
  }
 
  /* See Javadoc for Phrase interface. */
  public int size() { return (words == null ? 0 : words.length); }
 
  /* See Javadoc for Phrase interface. */
  public int getWordID(int position) { return words[position]; }
 
  /* See Javadoc for Phrase interface. */
  public SymbolTable getVocab()       { return vocabulary; }
 
  /**
   * Returns a human-readable String representation of the
   * phrase.
   * <p>
   * The implementation of this method is slightly more
   * efficient than that inherited from <code>AbstractPhrase</code>.
   *
   * @return a human-readable String representation of the
   *         phrase.
   */
  public String toString() {
    StringBuffer sb = new StringBuffer();
    if (words != null) {
      for (int i = 0; i < words.length; ++i) {
        if (i != 0) sb.append(' ');
        sb.append(vocabulary.getWord(words[i]));
      }
    }
    return sb.toString();
  }
}
TOP

Related Classes of joshua.corpus.BasicPhrase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.