Package joshua.corpus.lexprob

Source Code of joshua.corpus.lexprob.WriteLexProbs

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.corpus.lexprob;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.OutputStreamWriter;

import joshua.corpus.AlignedParallelCorpus;
import joshua.corpus.Corpus;
import joshua.corpus.ParallelCorpus;
import joshua.corpus.alignment.Alignments;
import joshua.corpus.alignment.mm.MemoryMappedAlignmentGrids;
import joshua.corpus.mm.MemoryMappedCorpusArray;
import joshua.corpus.vocab.Vocabulary;
import joshua.util.io.BinaryIn;

/**
* Ant task to export a human-readable lexical probabilities table
* to disk from a binary josh directory.
*
* @author Lane Schwartz
*/
public class WriteLexProbs {

  private String encoding = "UTF-8";
  private int cacheSize = 1000;
  private String joshDir;
  private String output;
 
  public void setEncoding(String encoding) {
    this.encoding = encoding;
  }
 
  public void setCacheSize(int cacheSize) {
    this.cacheSize = cacheSize;
  }
 
  public void setJoshDir(String joshDir) {
    System.out.println("Setting " + joshDir);
    this.joshDir = joshDir;
  }
 
  public void setOutput(String output) {
    this.output = output;
  }
 
    public void execute() throws IOException, ClassNotFoundException {
     
        System.out.println("Getting parallel corpus");
      ParallelCorpus parallelCorpus = getParallelCorpus(joshDir, cacheSize);
   
      System.out.println("Getting lexprobs");
      LexicalProbabilities lexProbs =
      new LexProbs(parallelCorpus, Float.MIN_VALUE);
   
      FileOutputStream stream = new FileOutputStream(output);
      OutputStreamWriter out = new OutputStreamWriter(stream, encoding);
      try {
       
        String s = lexProbs.toString();

        System.out.println("Writing lexprobs from " + joshDir + " to " + output);
          out.write(s)
           
        } catch (IOException e) {
          System.out.println("Failure");
        } finally {
          out.close();
        }
       
    }

  private static ParallelCorpus getParallelCorpus(String joshDir, int cacheSize) throws IOException, ClassNotFoundException {
   
    Vocabulary commonVocab = new Vocabulary();
      String binaryVocabFileName = joshDir + "/common.vocab";
      ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
    commonVocab.readExternal(in);
   
    String sourceFileName = joshDir + "/source.corpus";
    Corpus sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);

    String targetFileName = joshDir + "/target.corpus";
    Corpus targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
 
    String alignmentFileName = joshDir + "/alignment.grids";
    Alignments alignments = new MemoryMappedAlignmentGrids(alignmentFileName, sourceCorpusArray, targetCorpusArray);
 
    return new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, alignments);
  }
 
 

  /**
   * Takes a directory containing a compiled suffix array and writes LexProb file to disk.
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException, ClassNotFoundException
  {
    if(args.length != 2) {
      System.err.println("Usage: java LexProbs joshDir outputFile");
      System.exit(0);
    }
   
    String joshDir = args[0];
    String outputFile = args[1];
   
    WriteLexProbs lexProbWriter = new WriteLexProbs();
    lexProbWriter.setJoshDir(joshDir);
    lexProbWriter.setOutput(outputFile);
    lexProbWriter.execute();
   
  }
}
TOP

Related Classes of joshua.corpus.lexprob.WriteLexProbs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.