/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
/*
* This file is based on the edu.umd.clip.mt.Phrase class from the
* University of Maryland's umd-hadoop-mt-0.01 project. That project
* is released under the terms of the Apache License 2.0, but with
* special permission for the Joshua Machine Translation System to
* release modifications under the LGPL version 2.1. LGPL version
* 3 requires no special permission since it is compatible with
* Apache License 2.0
*/
package joshua.corpus;
import java.util.ArrayList;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;
/**
* The simplest concrete implementation of Phrase.
*
* @author wren ng thornton <wren@users.sourceforge.net>
* @version $LastChangedDate: 2009-09-04 15:09:50 -0500 (Fri, 04 Sep 2009) $
*/
public class BasicPhrase extends AbstractPhrase {
private byte language;
private SymbolTable vocabulary;
private int[] words;
public BasicPhrase(byte language, String sentence) {
this.language = language;
this.vocabulary = new Vocabulary();
this.words = splitSentence(sentence, vocabulary);
}
/** Note that the Vocabulary is shared, not cloned. */
public BasicPhrase(byte language, String sentence, Vocabulary vocabulary) {
this.language = language;
this.vocabulary = vocabulary;
this.words = splitSentence(sentence, vocabulary);
}
private BasicPhrase() {}
public int[] getWordIDs() {
return words;
}
/* See Javadoc for Phrase interface. */
public BasicPhrase subPhrase(int start, int end) {
BasicPhrase that = new BasicPhrase();
that.language = this.language;
that.vocabulary = this.vocabulary;
that.words = new int[end-start+1];
System.arraycopy(this.words, start, that.words, 0, end-start+1);
return that;
}
/* See Javadoc for Phrase interface. */
public ArrayList<Phrase> getSubPhrases() {
return this.getSubPhrases(this.size());
}
/* See Javadoc for Phrase interface. */
public ArrayList<Phrase> getSubPhrases(int maxLength) {
ArrayList<Phrase> phrases = new ArrayList<Phrase>();
int len = this.size();
for (int n = 1; n <= maxLength; n++)
for (int i = 0; i <= len-n; i++)
phrases.add(this.subPhrase(i, i + n - 1));
return phrases;
}
/* See Javadoc for Phrase interface. */
public int size() { return (words == null ? 0 : words.length); }
/* See Javadoc for Phrase interface. */
public int getWordID(int position) { return words[position]; }
/* See Javadoc for Phrase interface. */
public SymbolTable getVocab() { return vocabulary; }
/**
* Returns a human-readable String representation of the
* phrase.
* <p>
* The implementation of this method is slightly more
* efficient than that inherited from <code>AbstractPhrase</code>.
*
* @return a human-readable String representation of the
* phrase.
*/
public String toString() {
StringBuffer sb = new StringBuffer();
if (words != null) {
for (int i = 0; i < words.length; ++i) {
if (i != 0) sb.append(' ');
sb.append(vocabulary.getWord(words[i]));
}
}
return sb.toString();
}
}