Package joshua.discriminative.monolingual_parser

Source Code of joshua.discriminative.monolingual_parser.MonolingualDecoderThread

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/

package joshua.discriminative.monolingual_parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.chart_parser.Chart;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.tm.Grammar;
import joshua.decoder.ff.tm.GrammarFactory;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.lattice.Lattice;
import joshua.util.FileUtility;



/**
* this class implements:
* (1) interact with the chart-parsing functions to do the true decoding
*
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2008-10-20 00:12:30 -0400 $
*/

//TODO: known synchronization problem: LM cache; srilm call;

public abstract class MonolingualDecoderThread extends Thread {
  //these variables may be the same across all threads (e.g., just copy from DecoderFactory), or differ from thread to thread
  private final GrammarFactory[] grammarFactories;// = null;
  private final boolean haveLMModel;// = false;
  protected final List<FeatureFunction> featFunctions;// = null;
  private final List<Integer> defaultNonterminals;// = null;
  protected final SymbolTable symbolTable;// = null;
 
  //more test set specific
  private final String          testFile;
  private final int             startSentID; //start sent id
  private BufferedReader testReader;
   
  private static final Logger logger = Logger.getLogger(MonolingualDecoderThread.class.getName());
 
 
  public MonolingualDecoderThread(GrammarFactory[] grammarFactories,
    boolean haveLMModel, List<FeatureFunction> featFunctions,
    List<Integer> defaultNonterminals, SymbolTable symbolTable,
    String testFile,
    int startSentID)
  throws IOException {
   
    this.grammarFactories    = grammarFactories;
    this.haveLMModel          = haveLMModel;
    this.featFunctions     = featFunctions;
    this.defaultNonterminals = defaultNonterminals;
    this.symbolTable          = symbolTable;
   
    this.testFile       = testFile;
    this.startSentID   = startSentID;
    this.testReader =  FileUtility.getReadFileStream(testFile);
  }
 

  public abstract void  postProcessHypergraph(HyperGraph hypergraph, int sentenceID) throws IOException;
 
  public abstract void postProcess() throws IOException;
 
 
  // DecoderThread.run() cannot throw anything
  public void run() {
    try {
      decodeFile();
    } catch (IOException e) {
      e.printStackTrace();
      System.exit(1);
    }
  }
 

//  TODO: log file is not properly handled for parallel decoding
  public void decodeFile()
  throws IOException {   
    String cn_sent;
    int sent_id = startSentID; // if no sent tag, then this will be used
    while ((cn_sent = FileUtility.read_line_lzf(testReader)) != null) {
      if (logger.isLoggable(Level.FINE))
        logger.fine("now translate\n" + cn_sent);
      int[] tem_id = new int[1];
      cn_sent = get_sent_id(cn_sent, tem_id);
      if (tem_id[0] > 0) {
        sent_id = tem_id[0];
      }
     
      translate(
        this.grammarFactories,
        this.featFunctions,
        cn_sent,
        this.defaultNonterminals,
        sent_id
        );
      sent_id++;
    }
   
    testReader.close();
    postProcess();
  }
 
 
 
  /**
   * Translate a sentence.
   *
   * @param grammars Translation grammars to be used during translation.
   * @param models   Models to be used when scoring rules.
   * @param sentence The sentence to be translated.
   * @param defaultNonterminals
   * @param sentenceID
   * @param topN
   * @param diskHyperGraph
   * @param kbestExtractor
   */
  private void translate(GrammarFactory[] grammarFactories,
    List<FeatureFunction> models,
    String sentence,
    List<Integer> defaultNonterminals,
    int sentenceID
  ) throws IOException {
    long  start            = System.currentTimeMillis();
    int[] sentence_numeric = this.symbolTable.getIDs(sentence);
   
    Integer[] input = new Integer[sentence_numeric.length];
    for (int i = 0; i < sentence_numeric.length; i++) {
      input[i] = sentence_numeric[i];
    }
    Lattice<Integer> inputLattice = new Lattice<Integer>(input);
   
    Grammar[] grammars = new Grammar[grammarFactories.length];
    for (int i = 0; i < grammarFactories.length; i++) {
      grammars[i] = grammarFactories[i].getGrammarForSentence(null);//??????????????????????????????????????????????????????????????????????
//      grammars[i].sortGrammar(models);//TODO: for batch grammar, we do not want to sort it every time
    }
   
    //==========================seeding: the chart only sees the grammars, not the grammarFactories
    Chart chart = new Chart(
      inputLattice,
      models,
      null,
      this.symbolTable,
      sentenceID,
      grammars,
      this.haveLMModel,
      JoshuaConfiguration.goal_symbol,
      null);
    if (logger.isLoggable(Level.FINER))
      logger.finer("after seed, time: "
        + (System.currentTimeMillis() - start) / 1000);
   
    //=========================parsing
    HyperGraph p_hyper_graph = chart.expand();
    if (logger.isLoggable(Level.FINER))
      logger.finer("after expand, time: "  + (System.currentTimeMillis() - start) / 1000);   
   
    postProcessHypergraph(p_hyper_graph, sentenceID);
  }

 
  //return sent without the tag
  //if no sent id, then return -1 in sent_id[]
  private static String get_sent_id(String sent, int[] sent_id) {
    if (sent.matches("^<seg\\s+id=.*$")) { // havd sent id
      String res_sent = sent.replaceAll("^<seg\\s+id=\"", "");
      String str_id   = "";
      for (int i = 0; i < res_sent.length(); i++) {
        char cur = res_sent.charAt(i);
        if (cur != '"') {
          str_id += cur;
        } else {
          break;
        }
      }
      int res_id = Integer.parseInt(str_id);
      res_sent   = res_sent.replaceFirst(str_id + "\">", "");
      res_sent   = res_sent.replaceAll("</seg>", "");
      sent_id[0] = res_id;
      return res_sent;
    } else {
      sent_id[0] = -1;
      return sent;
    }
  }
}
TOP

Related Classes of joshua.discriminative.monolingual_parser.MonolingualDecoderThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.