/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.corpus.Corpus;
import joshua.corpus.alignment.Alignments;
import joshua.corpus.alignment.mm.MemoryMappedAlignmentGrids;
import joshua.corpus.mm.MemoryMappedCorpusArray;
import joshua.corpus.suffix_array.ParallelCorpusGrammarFactory;
import joshua.corpus.suffix_array.Suffixes;
import joshua.corpus.suffix_array.mm.MemoryMappedSuffixArray;
import joshua.corpus.vocab.BuildinSymbol;
import joshua.corpus.vocab.SrilmSymbol;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;
import joshua.decoder.ff.ArityPhrasePenaltyFF;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.PhraseModelFF;
import joshua.decoder.ff.SourcePathFF;
import joshua.decoder.ff.WordPenaltyFF;
import joshua.decoder.ff.lm.LanguageModelFF;
import joshua.decoder.ff.lm.NGramLanguageModel;
import joshua.decoder.ff.lm.bloomfilter_lm.BloomFilterLanguageModel;
import joshua.decoder.ff.lm.buildin_lm.LMGrammarJAVA;
import joshua.decoder.ff.lm.buildin_lm.TrieLM;
import joshua.decoder.ff.lm.distributed_lm.LMGrammarRemote;
import joshua.decoder.ff.lm.srilm.LMGrammarSRILM;
import joshua.decoder.ff.state_maintenance.NgramStateComputer;
import joshua.decoder.ff.state_maintenance.StateComputer;
import joshua.decoder.ff.tm.Grammar;
import joshua.decoder.ff.tm.GrammarFactory;
import joshua.decoder.ff.tm.hiero.MemoryBasedBatchGrammar;
import joshua.discriminative.DiscriminativeSupport;
import joshua.discriminative.feature_related.feature_function.BLEUOracleModel;
import joshua.discriminative.feature_related.feature_function.FeatureTemplateBasedFF;
import joshua.ui.hypergraph_visualizer.HyperGraphViewer;
import joshua.util.FileUtility;
import joshua.util.Regex;
import joshua.util.io.BinaryIn;
import joshua.util.io.LineReader;
/**
* Implements decoder initialization,
* including interaction with <code>JoshuaConfiguration</code>
* and <code>DecoderThread</code>.
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @author wren ng thornton <wren@users.sourceforge.net>
* @author Lane Schwartz <dowobeha@users.sourceforge.net>
* @version $LastChangedDate: 2010-02-03 09:20:31 -0600 (Wed, 03 Feb 2010) $
*/
public class JoshuaDecoder {
/*
* Many of these objects themselves are global objects. We
* pass them in when constructing other objects, so that
* they all share pointers to the same object. This is good
* because it reduces overhead, but it can be problematic
* because of unseen dependencies (for example, in the
* SymbolTable shared by language model, translation grammar,
* etc).
*/
/** The DecoderFactory is the main thread of decoding */
private DecoderFactory decoderFactory;
private List<GrammarFactory> grammarFactories;
private ArrayList<FeatureFunction> featureFunctions;
private NGramLanguageModel languageModel;
private List<StateComputer> stateComputers;
private Map<String,Integer> ruleStringToIDTable;
/**
* Shared symbol table for source language terminals, target
* language terminals, and shared nonterminals.
*/
private SymbolTable symbolTable;
/** Logger for this class. */
private static final Logger logger =
Logger.getLogger(JoshuaDecoder.class.getName());
//===============================================================
// Constructors
//===============================================================
/**
* Constructs a new decoder using the specified configuration
* file.
*
* @param configFile Name of configuration file.
*/
public JoshuaDecoder(String configFile) {
this();
this.initialize(configFile);
}
/**
* Constructs an uninitialized decoder for use in testing.
* <p>
* This method is private because it should only ever be
* called by the {@link #getUninitalizedDecoder()} method
* to provide an uninitialized decoder for use in testing.
*/
private JoshuaDecoder() {
this.grammarFactories = new ArrayList<GrammarFactory>();
}
/**
* Gets an uninitialized decoder for use in testing.
* <p>
* This method is called by unit tests or any outside packages (e.g., MERT)
* relying on the decoder.
*/
static public JoshuaDecoder getUninitalizedDecoder() {
return new JoshuaDecoder();
}
//===============================================================
// Public Methods
//===============================================================
public void changeBaselineFeatureWeights(double[] weights){
changeFeatureWeightVector(weights, null);
}
public void changeDiscrminativeModelOnly(String discrminativeModelFile) {
changeFeatureWeightVector(null, discrminativeModelFile);
}
/**
* Sets the feature weight values used by the decoder.
* <p>
* This method assumes that the order of the provided weights
* is the same as their order in the decoder's configuration
* file.
*
* @param weights Feature weight values
*/
public void changeFeatureWeightVector(double[] weights, String discrminativeModelFile) {
if(weights!=null){
if (this.featureFunctions.size() != weights.length) {
throw new IllegalArgumentException("number of weights does not match number of feature functions");
}
int i = 0;
for (FeatureFunction ff : this.featureFunctions) {
double oldWeight = ff.getWeight();
ff.setWeight(weights[i]);
logger.info("Feature function : " + ff.getClass().getSimpleName()
+ "; weight changed from " + oldWeight + " to " + ff.getWeight());
i++;
}
}
if(discrminativeModelFile!=null)
changeDiscrminativeModelWeights(discrminativeModelFile);
//FIXME: this works for Batch grammar only; not for sentence-specific grammars
for (GrammarFactory grammarFactory : this.grammarFactories) {
// if (grammarFactory instanceof Grammar) {
grammarFactory.getGrammarForSentence(null)
.sortGrammar(this.featureFunctions);
// }
}
}
private void changeDiscrminativeModelWeights(String discrminativeModelFile){
for (FeatureFunction ff : this.featureFunctions) {
//== set the discriminative model
if(discrminativeModelFile!=null && ff instanceof FeatureTemplateBasedFF){
HashMap<String, Double> modelTable = new HashMap<String, Double>();
DiscriminativeSupport.loadModel(discrminativeModelFile, modelTable, this.ruleStringToIDTable);
((FeatureTemplateBasedFF) ff).setModel(modelTable);
}
}
}
/**
* Decode a whole test set. This may be parallel.
*
* @param testFile
* @param nbestFile
* @param oracleFile
*/
public void decodeTestSet(String testFile, String nbestFile, String oracleFile) throws IOException {
this.decoderFactory.decodeTestSet(testFile, nbestFile, oracleFile);
}
public void decodeTestSet(String testFile, String nbestFile) {
this.decoderFactory.decodeTestSet(testFile, nbestFile, null);
}
/** Decode a sentence. This must be non-parallel. */
public void decodeSentence(String testSentence, String[] nbests) {
//TODO
}
public void cleanUp() {
//TODO
//this.languageModel.end_lm_grammar(); //end the threads
}
public void visualizeHyperGraphForSentence(String sentence)
{
HyperGraphViewer.visualizeHypergraphInFrame(this.decoderFactory.getHyperGraphForSentence(sentence), this.symbolTable);
}
public static void writeConfigFile(double[] newWeights, String template, String outputFile, String newDiscriminativeModel) {
try {
int columnID = 0;
BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
LineReader reader = new LineReader(template);
try { for (String line : reader) {
line = line.trim();
if (Regex.commentOrEmptyLine.matches(line)
|| line.indexOf("=") != -1) {
//comment, empty line, or parameter lines: just copy
writer.write(line);
writer.newLine();
} else { //models: replace the weight
String[] fds = Regex.spaces.split(line);
StringBuffer newSent = new StringBuffer();
if (! Regex.floatingNumber.matches(fds[fds.length-1])) {
throw new IllegalArgumentException("last field is not a number; the field is: " + fds[fds.length-1]);
}
if(newDiscriminativeModel!=null && "discriminative".equals(fds[0])){
newSent.append(fds[0]).append(' ');
newSent.append(newDiscriminativeModel).append(' ');//change the file name
for (int i = 2; i < fds.length-1; i++) {
newSent.append(fds[i]).append(' ');
}
}else{//regular
for (int i = 0; i < fds.length-1; i++) {
newSent.append(fds[i]).append(' ');
}
}
if(newWeights!=null)
newSent.append(newWeights[columnID++]);//change the weight
else
newSent.append(fds[fds.length-1]);//do not change
writer.write(newSent.toString());
writer.newLine();
}
} } finally {
reader.close();
writer.close();
}
if (newWeights!=null && columnID != newWeights.length) {
throw new IllegalArgumentException("number of models does not match number of weights");
}
} catch (IOException e) {
e.printStackTrace();
}
}
//===============================================================
// Initialization Methods
//===============================================================
/**
* Initialize all parts of the JoshuaDecoder.
*
* @param configFile File containing configuration options
* @return An initialized decoder
*/
public JoshuaDecoder initialize(String configFile) {
try {
JoshuaConfiguration.readConfigFile(configFile);
if (JoshuaConfiguration.tm_file != null) {
//TODO: should not use file suffix to decide which kind of grammar we are using
if (JoshuaConfiguration.tm_file.endsWith(".josh")) {
try {
// Use corpus-based grammar
//inside getParallelCorpus, we will initialize symboltable, lm, and feature functions
ParallelCorpusGrammarFactory parallelCorpus = getParallelCorpus(configFile);
grammarFactories.add(parallelCorpus);
} catch (Exception e) {
IOException ioe = new IOException("Error reading suffix array grammar.");
ioe.initCause(e);
throw ioe;
}
} else {
// Sets: symbolTable, defaultNonterminals
//symbol table may grow on the fly during decoding
this.initializeSymbolTable(null);
// Needs: symbolTable; Sets: languageModel
if (JoshuaConfiguration.have_lm_model)
initializeLanguageModel();
// initialize and load grammar
this.initializeGlueGrammar();
this.initializeMainTranslationGrammar();
// Initialize the features: requires that
// LM model has been initialized. If an LM
// feature is used, need to read config file
// again
this.initializeFeatureFunctions(configFile);
this.initializeStateComputers(symbolTable, JoshuaConfiguration.lmOrder, JoshuaConfiguration.ngramStateID);
}
} else {
throw new RuntimeException("No translation grammar or suffix array grammar was specified.");
}
// Sort the TM grammars (needed to do cube pruning)
for (GrammarFactory grammarFactory : this.grammarFactories) {
if (grammarFactory instanceof Grammar) {
Grammar batchGrammar = (Grammar) grammarFactory;
batchGrammar.sortGrammar(this.featureFunctions);
}
}
this.decoderFactory = new DecoderFactory(
this.grammarFactories,
JoshuaConfiguration.have_lm_model,
this.featureFunctions,
this.stateComputers,
this.symbolTable);
} catch (IOException e) {
e.printStackTrace();
}
return this;
}
// TODO: maybe move to JoshuaConfiguration to enable moving the featureFunction parsing there (Sets: symbolTable, defaultNonterminals)
private void initializeSymbolTable(SymbolTable existingSymbols) {
if (JoshuaConfiguration.use_remote_lm_server) {
if (null == existingSymbols) {
// Within the decoder, we assume BuildinSymbol when using the remote LM
this.symbolTable = new BuildinSymbol(JoshuaConfiguration.remote_symbol_tbl);
} else {
this.symbolTable = existingSymbols;
}
} else if (JoshuaConfiguration.use_srilm) {
logger.finest("Using SRILM symbol table");
if (null == existingSymbols) {
this.symbolTable = new SrilmSymbol(JoshuaConfiguration.lmOrder);
} else {
logger.finest("Populating SRILM symbol table with symbols from existing symbol table");
this.symbolTable = new SrilmSymbol(existingSymbols, JoshuaConfiguration.lmOrder);
}
} else {
if (null == existingSymbols) {
//this.symbolTable = new Vocabulary();//new BuildinSymbol(null);
this.symbolTable = new BuildinSymbol();
} else {
this.symbolTable = existingSymbols;
}
}
// Add the default nonterminal
this.symbolTable.addNonterminal(JoshuaConfiguration.default_non_terminal);
}
// TODO: maybe move to JoshuaConfiguration to enable moving the featureFunction parsing there (Needs: symbolTable; Sets: languageModel)
// TODO: check we actually have a feature that requires a language model
private void initializeLanguageModel() throws IOException {
// BUG: All these different boolean LM fields should just be an enum.
// FIXME: And we should check only once for the default (which supports left/right equivalent state) vs everything else (which doesn't)
// TODO: maybe have a special exception type for BadConfigfileException instead of using IllegalArgumentException?
if (JoshuaConfiguration.use_remote_lm_server) {
if (JoshuaConfiguration.use_left_equivalent_state
|| JoshuaConfiguration.use_right_equivalent_state) {
throw new IllegalArgumentException("using remote LM, we cannot use suffix/prefix stuff");
}
this.languageModel = new LMGrammarRemote(
this.symbolTable,
JoshuaConfiguration.lmOrder,
JoshuaConfiguration.f_remote_server_list,
JoshuaConfiguration.num_remote_lm_servers);
} else if (JoshuaConfiguration.use_srilm) {
if (JoshuaConfiguration.use_left_equivalent_state
|| JoshuaConfiguration.use_right_equivalent_state) {
throw new IllegalArgumentException("using SRILM, we cannot use suffix/prefix stuff");
}
this.languageModel = new LMGrammarSRILM(
(SrilmSymbol)this.symbolTable,
JoshuaConfiguration.lmOrder,
JoshuaConfiguration.lm_file);
} else if (JoshuaConfiguration.use_bloomfilter_lm) {
if (JoshuaConfiguration.use_left_equivalent_state
|| JoshuaConfiguration.use_right_equivalent_state) {
throw new IllegalArgumentException("using Bloomfilter LM, we cannot use suffix/prefix stuff");
}
this.languageModel = new BloomFilterLanguageModel(
this.symbolTable,
JoshuaConfiguration.lmOrder,
JoshuaConfiguration.lm_file);
} else if (JoshuaConfiguration.use_trie_lm) {
if (JoshuaConfiguration.use_left_equivalent_state
|| JoshuaConfiguration.use_right_equivalent_state) {
throw new IllegalArgumentException("using Trie LM, we cannot use suffix/prefix stuff");
}
this.languageModel = new TrieLM(
this.symbolTable,
JoshuaConfiguration.lm_file);
} else {
// logger.info("Reading language model from " + JoshuaConfiguration.lm_file + " into internal trie");
// this.languageModel = new TrieLM(
// new ArpaFile(
// JoshuaConfiguration.lm_file,
// this.symbolTable
// ));
// using the built-in JAVA implementation of LM, may not be as scalable as SRILM
this.languageModel = new LMGrammarJAVA(
this.symbolTable,
JoshuaConfiguration.lmOrder,
JoshuaConfiguration.lm_file,
JoshuaConfiguration.use_left_equivalent_state,
JoshuaConfiguration.use_right_equivalent_state);
}
}
private void initializeGlueGrammar() throws IOException {
logger.info("Constructing glue grammar...");
MemoryBasedBatchGrammar gr = new MemoryBasedBatchGrammar(
JoshuaConfiguration.glue_format,
JoshuaConfiguration.glue_file,
this.symbolTable,
JoshuaConfiguration.glue_owner,
JoshuaConfiguration.default_non_terminal,
-1,
JoshuaConfiguration.oovFeatureCost);
this.grammarFactories.add(gr);
if(JoshuaConfiguration.useRuleIDName){
if(this.ruleStringToIDTable==null)
this.ruleStringToIDTable = new HashMap<String,Integer>();
gr.obtainRulesIDTable(this.ruleStringToIDTable, this.symbolTable);
}
}
private void initializeMainTranslationGrammar() throws IOException {
if (logger.isLoggable(Level.INFO))
logger.info("Using grammar read from file " + JoshuaConfiguration.tm_file);
MemoryBasedBatchGrammar gr = new MemoryBasedBatchGrammar(
JoshuaConfiguration.tm_format,
JoshuaConfiguration.tm_file,
this.symbolTable,
JoshuaConfiguration.phrase_owner,
JoshuaConfiguration.default_non_terminal,
JoshuaConfiguration.span_limit,
JoshuaConfiguration.oovFeatureCost);
this.grammarFactories.add(gr);
if(JoshuaConfiguration.useRuleIDName){
if(this.ruleStringToIDTable==null)
this.ruleStringToIDTable = new HashMap<String,Integer>();
gr.obtainRulesIDTable(this.ruleStringToIDTable, this.symbolTable);
}
}
private ParallelCorpusGrammarFactory getParallelCorpus(String configFile)
throws IOException, ClassNotFoundException {
int maxCacheSize = JoshuaConfiguration.sa_rule_cache_size;
String binaryVocabFileName =
JoshuaConfiguration.tm_file +
File.separator + "common.vocab";
String binarySourceCorpusFileName =
JoshuaConfiguration.tm_file +
File.separator + "source.corpus";
String binarySourceSuffixesFileName =
JoshuaConfiguration.tm_file +
File.separator + "source.suffixes";
String binaryTargetCorpusFileName =
JoshuaConfiguration.tm_file +
File.separator + "target.corpus";
String binaryTargetSuffixesFileName =
JoshuaConfiguration.tm_file +
File.separator + "target.suffixes";
{ // Load the symbol table from disk
// Keep this code in its own block
// to ensure that this symbol table is not
// accidentally used anywhere.
if (logger.isLoggable(Level.INFO))
logger.info("Reading common vocabulary from " +
binaryVocabFileName);
Vocabulary commonVocab = new Vocabulary();
commonVocab.readExternal(
BinaryIn.vocabulary(binaryVocabFileName));
// Initialize symbol table using suffix array's vocab
this.initializeSymbolTable(commonVocab);
}
initializeGlueGrammar();
// Needs: symbolTable; Sets: languageModel
if (JoshuaConfiguration.have_lm_model)
initializeLanguageModel();
// Initialize the features: requires that
// LM model has been initialized. If an LM
// feature is used, need to read config file
// again
this.initializeFeatureFunctions(configFile);
this.initializeStateComputers(symbolTable, JoshuaConfiguration.lmOrder, JoshuaConfiguration.ngramStateID);
if (logger.isLoggable(Level.INFO))
logger.info("Reading source language corpus from " +
binarySourceCorpusFileName);
Corpus sourceCorpusArray =
new MemoryMappedCorpusArray(
this.symbolTable, binarySourceCorpusFileName);
if (logger.isLoggable(Level.INFO))
logger.info("Reading source language suffix array from " +
binarySourceSuffixesFileName);
Suffixes sourceSuffixArray =
new MemoryMappedSuffixArray(
binarySourceSuffixesFileName,
sourceCorpusArray,
maxCacheSize);
if (logger.isLoggable(Level.INFO))
logger.info("Reading target language corpus from " +
binaryTargetCorpusFileName);
Corpus targetCorpusArray =
new MemoryMappedCorpusArray(
this.symbolTable, binaryTargetCorpusFileName);
if (logger.isLoggable(Level.INFO))
logger.info("Reading target language suffix array from " +
binaryTargetSuffixesFileName);
Suffixes targetSuffixArray =
new MemoryMappedSuffixArray(
binaryTargetSuffixesFileName,
targetCorpusArray,
maxCacheSize);
String binaryAlignmentFileName =
JoshuaConfiguration.tm_file +
File.separator + "alignment.grids";
if (logger.isLoggable(Level.INFO))
logger.info("Reading alignment grid data from " +
binaryAlignmentFileName);
Alignments alignments =
new MemoryMappedAlignmentGrids(
binaryAlignmentFileName,
sourceCorpusArray,
targetCorpusArray);
// Finally, add the parallel corpus that will serve as a grammar
ParallelCorpusGrammarFactory parallelCorpus = new ParallelCorpusGrammarFactory(
sourceSuffixArray,
targetSuffixArray,
alignments,
this.featureFunctions,
JoshuaConfiguration.sa_rule_sample_size,
JoshuaConfiguration.sa_max_phrase_span,
JoshuaConfiguration.sa_max_phrase_length,
JoshuaConfiguration.sa_max_nonterminals,
JoshuaConfiguration.sa_min_nonterminal_span,
JoshuaConfiguration.sa_lex_floor_prob,
JoshuaConfiguration.phrase_owner, JoshuaConfiguration.default_non_terminal, JoshuaConfiguration.oovFeatureCost);
return parallelCorpus;
}
private void initializeStateComputers(SymbolTable symbolTable, int nGramOrder, int ngramStateID){
stateComputers = new ArrayList<StateComputer>();
StateComputer ngramStateComputer = new NgramStateComputer(symbolTable, nGramOrder, ngramStateID);
stateComputers.add(ngramStateComputer);
}
// BUG: why are we re-reading the configFile? JoshuaConfiguration should do this. (Needs: languageModel, symbolTable, (logger?); Sets: featureFunctions)
private void initializeFeatureFunctions(String configFile)
throws IOException {
this.featureFunctions = new ArrayList<FeatureFunction>();
LineReader reader = new LineReader(configFile);
try { for (String line : reader) {
line = line.trim();
if (Regex.commentOrEmptyLine.matches(line))
continue;
if (line.indexOf("=") == -1) { // ignore lines with "="
String[] fds = Regex.spaces.split(line);
if ("lm".equals(fds[0]) && fds.length == 2) { // lm weight
if (null == this.languageModel) {
throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
}
double weight = Double.parseDouble(fds[1].trim());
this.featureFunctions.add(
new LanguageModelFF(
JoshuaConfiguration.ngramStateID,
this.featureFunctions.size(),
JoshuaConfiguration.lmOrder,
this.symbolTable, this.languageModel, weight));
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Line: %s\nAdd LM, order: %d; weight: %.3f;",
line, JoshuaConfiguration.lmOrder, weight));
} else if ("oracle".equals(fds[0]) && fds.length >= 3) { //oracle files weight
if (null == this.languageModel) {
throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
}
String[] referenceFiles = new String[fds.length-2];
for(int i=0; i< referenceFiles.length; i++)
referenceFiles[i] = fds[i+1].trim();
double weight = Double.parseDouble(fds[fds.length-1].trim());
this.featureFunctions.add(
new BLEUOracleModel(JoshuaConfiguration.ngramStateID, JoshuaConfiguration.lmOrder,
this.featureFunctions.size(), this.symbolTable, weight, referenceFiles, JoshuaConfiguration.linearCorpusGainThetas));
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Line: %s\nAdd BLEUOracleModel, order: %d; weight: %.3f;",
line, JoshuaConfiguration.lmOrder, weight));
} else if ("discriminative".equals(fds[0]) && fds.length == 3) { //discriminative weight modelFile
if (null == this.languageModel) {
throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
}
String featureFile = null;//TODO???????
String modelFile = fds[1].trim();
double weight = Double.parseDouble(fds[2].trim());
this.featureFunctions.add (DiscriminativeSupport.setupRerankingFeature(this.featureFunctions.size(), weight, symbolTable,
JoshuaConfiguration.useTMFeat, JoshuaConfiguration.useLMFeat, JoshuaConfiguration.useEdgeNgramOnly, JoshuaConfiguration.useTMTargetFeat,
JoshuaConfiguration.useMicroTMFeat, JoshuaConfiguration.wordMapFile,
JoshuaConfiguration.ngramStateID,
JoshuaConfiguration.lmOrder, JoshuaConfiguration.startNgramOrder, JoshuaConfiguration.endNgramOrder, featureFile, modelFile, this.ruleStringToIDTable) );
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Line: %s\nAdd FeatureTemplateBasedFF, order: %d; weight: %.3f;",
line, JoshuaConfiguration.lmOrder, weight));
} else if ("latticecost".equals(fds[0]) && fds.length == 2) {
double weight = Double.parseDouble(fds[1].trim());
this.featureFunctions.add(
new SourcePathFF(
this.featureFunctions.size(), weight));
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Line: %s\nAdd Source lattice cost, weight: %.3f",
line, weight));
} else if ("phrasemodel".equals(fds[0]) && fds.length == 4) { // phrasemodel owner column(0-indexed) weight
int owner = this.symbolTable.addTerminal(fds[1]);
int column = Integer.parseInt(fds[2].trim());
double weight = Double.parseDouble(fds[3].trim());
this.featureFunctions.add(
new PhraseModelFF(
this.featureFunctions.size(),
weight, owner, column));
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Process Line: %s\nAdd PhraseModel, owner: %s; column: %d; weight: %.3f",
line, owner, column, weight));
} else if ("arityphrasepenalty".equals(fds[0]) && fds.length == 5) { // arityphrasepenalty owner start_arity end_arity weight
int owner = this.symbolTable.addTerminal(fds[1]);
int startArity = Integer.parseInt(fds[2].trim());
int endArity = Integer.parseInt(fds[3].trim());
double weight = Double.parseDouble(fds[4].trim());
this.featureFunctions.add(
new ArityPhrasePenaltyFF(
this.featureFunctions.size(),
weight, owner, startArity, endArity));
if (logger.isLoggable(Level.INFO))
logger.finest(String.format(
"Process Line: %s\nAdd ArityPhrasePenalty, owner: %s; startArity: %d; endArity: %d; weight: %.3f",
line, owner, startArity, endArity, weight));
} else if ("wordpenalty".equals(fds[0]) && fds.length == 2) { // wordpenalty weight
double weight = Double.parseDouble(fds[1].trim());
this.featureFunctions.add(
new WordPenaltyFF(
this.featureFunctions.size(), weight));
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format(
"Process Line: %s\nAdd WordPenalty, weight: %.3f",
line, weight));
} else {
throw new IllegalArgumentException("Wrong config line: " + line);
}
}
} } finally {
reader.close();
}
}
//===============================================================
// Main
//===============================================================
public static void main(String[] args) throws IOException {
logger.finest("Starting decoder");
long startTime = 0;
if (logger.isLoggable(Level.INFO)) {
startTime = System.currentTimeMillis();
}
if (args.length != 3 && args.length != 4) {
System.out.println("Usage: java " +
JoshuaDecoder.class.getName() +
" configFile testFile outputFile (oracleFile)");
System.out.println("num of args is " + args.length);
for (int i = 0; i < args.length; i++) {
System.out.println("arg is: " + args[i]);
}
System.exit(1);
}
String configFile = args[0].trim();
String testFile = args[1].trim();
String nbestFile = args[2].trim();
String oracleFile = (4 == args.length ? args[3].trim() : null);
/* Step-1: initialize the decoder, test-set independent */
JoshuaDecoder decoder = new JoshuaDecoder(configFile);
if (logger.isLoggable(Level.INFO)) {
logger.info("Before translation, loading time is "
+ ((double)(System.currentTimeMillis() - startTime) / 1000.0)
+ " seconds");
}
/* Step-2: Decoding */
decoder.decodeTestSet(testFile, nbestFile, oracleFile);
/* Step-3: clean up */
decoder.cleanUp();
if (logger.isLoggable(Level.INFO)) {
logger.info("Total running time is "
+ ((double)(System.currentTimeMillis() - startTime) / 1000.0)
+ " seconds");
}
}
}