Package joshua.discriminative.training

Source Code of joshua.discriminative.training.NbestMerger

package joshua.discriminative.training;

import java.io.BufferedWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


import joshua.discriminative.bleu_approximater.NbestReader;
import joshua.util.FileUtility;
import joshua.util.Regex;

public class NbestMerger {
 
  public static int mergeNbest(String nbestFile1, String nbestFile2, String nbestOutFile){
    int totalNumHyp = 0;
    try {
      NbestReader nbestReader1 = new NbestReader(nbestFile1);
      NbestReader nbestReader2 = new NbestReader(nbestFile2);
      BufferedWriter outWriter = FileUtility.getWriteFileStream(nbestOutFile);
     
      while(nbestReader1.hasNext()){
        List<String> nbest1 = nbestReader1.next();
        List<String> nbest2 = nbestReader2.next();
    
        List<String> newNbest = processOneSentence(nbest1, nbest2);
        for(String hyp : newNbest){
          outWriter.write(hyp+"\n");
        }
        totalNumHyp += newNbest.size();
      }
      outWriter.close();
   
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    System.out.println("totalNumHyp="+totalNumHyp);
    return totalNumHyp;
  }
 
  private static List<String> processOneSentence(List<String> nbest1, List<String> nbest2){
   
    List<String> newNbest = new ArrayList<String>();
    Set<String> uniqueNbests = new HashSet<String>();
    processOneNbest(nbest1, uniqueNbests, newNbest);
    processOneNbest(nbest2, uniqueNbests, newNbest);
    return newNbest;
  }
 
  private static void processOneNbest(List<String> nbest, Set<String> uniqueNbests, List<String> newNbest){
    for(String line : nbest){
      String[] fds = Regex.threeBarsWithSpace.split(line);
      String hypItself = fds[1];
     
      if(uniqueNbests.contains(hypItself)){
        //skip
      }else{
        uniqueNbests.add(hypItself);
        newNbest.add(line);
      }
    }
  }
 
}
TOP

Related Classes of joshua.discriminative.training.NbestMerger

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.