Package joshua.discriminative.feature_related

Source Code of joshua.discriminative.feature_related.FeatureSelectionHG

package joshua.discriminative.feature_related;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import joshua.corpus.vocab.BuildinSymbol;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.hypergraph.DiskHyperGraph;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.discriminative.DiscriminativeSupport;
import joshua.discriminative.FileUtilityOld;
import joshua.discriminative.feature_related.feature_template.EdgeBigramFT;
import joshua.discriminative.feature_related.feature_template.FeatureTemplate;
import joshua.discriminative.feature_related.feature_template.NgramFT;
import joshua.discriminative.feature_related.feature_template.TMFT;



public class FeatureSelectionHG {
 
 
  /*for 919 sent, time_on_reading: 148797
  time_on_orc_extract: 580286*/
  public static void main(String[] args) { 
    //long start_time = System.currentTimeMillis();
     
    if(args.length<8){
      System.out.println("wrong command, correct command should be: java FeatureSelection f_l_test_items f_l_test_rules f_l_num_sents use_tm_feat use_lm_feat use_edge_ngram_only use_joint_tm_lm_feature f_feature_set_out [threshold]");
      System.out.println("num of args is "+ args.length);
      for(int i=0; i <args.length; i++)System.out.println("arg is: " + args[i]);
      System.exit(0);   
    }
   
    String f_l_test_items= args[0].trim();
    String f_l_test_rules= args[1].trim();
    String f_l_num_sents= args[2].trim();   
    boolean use_tm_feat = new Boolean(args[3].trim());
    boolean use_lm_feat = new Boolean(args[4].trim());
    boolean use_edge_ngram_only = new Boolean(args[5].trim());
    boolean use_joint_tm_lm_feature = new Boolean(args[6].trim());
    String f_feature_set_out= args[7].trim();//output file
    int baseline_lm_order = new Integer(args[8].trim());
    double threshold = 0.0;
    if(args.length>9) threshold = new Double(args[9].trim());
   
    boolean saveModelScore = true;//diskHG have costs stored
   
//    ????????????????????????????????????????????????????
    int ngramStateID = 0;
    //??????????????????????????????????????
   
    boolean addBaselineFeature = true;//TODO
    String baselineFeatureName = "baseline_lzf";//TODO
   
   
    SymbolTable p_symbol = new BuildinSymbol(null);
   
//    ##setup feature templates list
    ArrayList<FeatureTemplate> featureTemplates =  new ArrayList<FeatureTemplate>();
   
    boolean useIntegerString = false;
    boolean useRuleIDName = false;
   
    if(use_tm_feat==true){
      FeatureTemplate ft = new TMFT(p_symbol, useIntegerString, useRuleIDName);
      featureTemplates.add(ft);
    }
   
    if(use_lm_feat==true){
      FeatureTemplate ft = new NgramFT(p_symbol, false, ngramStateID, baseline_lm_order, 1 ,2);//TODO: unigram and bi gram
      featureTemplates.add(ft);
    }else if(use_edge_ngram_only){//exclusive with use_lm_feat
      FeatureTemplate ft = new EdgeBigramFT(p_symbol, ngramStateID, baseline_lm_order, useIntegerString);
      featureTemplates.add(ft);
    }
   
    if(use_joint_tm_lm_feature){
      //TODO: not implement
      System.out.println("not implemented"); System.exit(0);
    }
   
   
    List<String> testItemsFiles = DiscriminativeSupport.readFileList(f_l_test_items);
    List<String> testRulesFiles = DiscriminativeSupport.readFileList(f_l_test_rules);
    List<String> l_num_sents = DiscriminativeSupport.readFileList(f_l_num_sents);       
 
    //#### extract feat tbl
    HashMap<String, Double> tbl_feats = new HashMap<String, Double>();
       
    for(int fid=0; fid < testItemsFiles.size(); fid++){
      System.out.println("############Process file id " + fid);
      DiskHyperGraph dhg_train = new DiskHyperGraph(p_symbol, ngramStateID, saveModelScore, null);
      dhg_train.initRead((String)testItemsFiles.get(fid), (String)testRulesFiles.get(fid),null);   
      int total_num_sent = new Integer((String)l_num_sents.get(fid));
      for(int sent_id=0; sent_id < total_num_sent; sent_id ++){
        System.out.println("############Process sentence " + sent_id);
        HyperGraph hg_train = dhg_train.readHyperGraph();       
        FeatureExtractionHG.featureExtractionOnHG(hg_train, tbl_feats, null, featureTemplates);
      }
    }
    System.out.println("===feature table size is " + tbl_feats.size());
    //#### write hashtable
    boolean useZeroValue = true;
    boolean keyOnly = false;   
    FileUtilityOld.printHashTblAboveThreshold(tbl_feats, f_feature_set_out, keyOnly, threshold, useZeroValue, addBaselineFeature, baselineFeatureName);
   
 
 
 
 
 
}
TOP

Related Classes of joshua.discriminative.feature_related.FeatureSelectionHG

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.