Package joshua.util.io

Examples of joshua.util.io.LineReader


   
    this.parallelThreads = new DecoderThread[JoshuaConfiguration.num_parallel_decoders];
   
    //==== compute number of lines for each decoder
    int n_lines = 0; {
      LineReader testReader = new LineReader(testFile);
      try {
        n_lines = testReader.countLines();
      } finally { testReader.close(); }
    }
   
    double num_per_thread_double = n_lines * 1.0 / JoshuaConfiguration.num_parallel_decoders;
    int    num_per_thread_int    = (int) num_per_thread_double;
   
    if (logger.isLoggable(Level.INFO))
      logger.info("num_per_file_double: " + num_per_thread_double
        + "; num_per_file_int: " + num_per_thread_int);
   
   
    //==== Initialize all threads and their input files
    int decoder_i = 1;
    String cur_test_file  = JoshuaConfiguration.parallel_files_prefix + ".test." + decoder_i;
    String cur_nbest_file = JoshuaConfiguration.parallel_files_prefix + ".nbest." + decoder_i;
    BufferedWriter t_writer_test = 
      FileUtility.getWriteFileStream(cur_test_file);
    int sent_id       = 0;
    int start_sent_id = sent_id;
   
    LineReader testReader = new LineReader(testFile);
    try {
      for (String cn_sent : testReader) {
        sent_id++;
        t_writer_test.write(cn_sent);
        t_writer_test.newLine();
       
        //make the Symbol table finalized before running multiple threads, this is to avoid synchronization among threads
        {
          String words[] = Regex.spaces.split(cn_sent);
          this.symbolTable.addTerminals(words); // TODO
        }
        //logger.info("sent_id="+sent_id);
        // we will include all additional lines into last file
        //prepare current job
        if (0 != sent_id
        && decoder_i < JoshuaConfiguration.num_parallel_decoders
        && sent_id % num_per_thread_int == 0
        ) {
          t_writer_test.flush();
          t_writer_test.close();
         
          DecoderThread pdecoder = new DecoderThread(
            this.grammarFactories,
            this.hasLanguageModel,
            this.featureFunctions,
            this.stateComputers,
            this.symbolTable,
            cur_test_file,
            cur_nbest_file,
            null,
            start_sent_id);
          this.parallelThreads[decoder_i-1] = pdecoder;
         
          // prepare next job
          start_sent_id  = sent_id;
          decoder_i++;
          cur_test_file  = JoshuaConfiguration.parallel_files_prefix + ".test." + decoder_i;
          cur_nbest_file = JoshuaConfiguration.parallel_files_prefix + ".nbest." + decoder_i;
          t_writer_test  = FileUtility.getWriteFileStream(cur_test_file);
        }
      }
    }finally {
      testReader.close();
     
      //==== prepare the the last job
      t_writer_test.flush();
      t_writer_test.close();
    }
   
    DecoderThread pdecoder = new DecoderThread(
      this.grammarFactories,
      this.hasLanguageModel,
      this.featureFunctions,
      this.stateComputers,
      this.symbolTable,
      cur_test_file,
      cur_nbest_file,
      null,
      start_sent_id);
    this.parallelThreads[decoder_i-1] = pdecoder;
   
    // End initializing threads and their files
     
   
    //==== run all the jobs
    for (int i = 0; i < this.parallelThreads.length; i++) {
      if (logger.isLoggable(Level.INFO))
        logger.info("##############start thread " + i);
      this.parallelThreads[i].start();
    }
   
    //==== wait for the threads finish
    for (int i = 0; i < this.parallelThreads.length; i++) {
      try {
        this.parallelThreads[i].join();
      } catch (InterruptedException e) {
        if (logger.isLoggable(Level.WARNING))
          logger.warning("thread is interupted for server " + i);
      }
    }
   
    //==== merge the nbest files, and remove tmp files
    BufferedWriter nbestWriter =  FileUtility.getWriteFileStream(nbestFile);
    BufferedWriter itemsWriter = null;
    if (JoshuaConfiguration.save_disk_hg) {
      itemsWriter = FileUtility.getWriteFileStream(nbestFile + ".hg.items");
    }
    for (DecoderThread decoder : this.parallelThreads) {
      //merge nbest
      LineReader nbestReader = new LineReader(decoder.nbestFile);
      try {
        for (String sent : nbestReader) {
          nbestWriter.write(sent);
          nbestWriter.newLine();
        }
      } finally {
        nbestReader.close();
      }

      //remove the tem nbest file
      FileUtility.deleteFile(decoder.nbestFile);
      FileUtility.deleteFile(decoder.testFile);
     
      //merge hypergrpah items
      if (JoshuaConfiguration.save_disk_hg) {
        LineReader itemReader = new LineReader(decoder.nbestFile + ".hg.items");
        try {
          for (String sent : itemReader) {         
            itemsWriter.write(sent);
            itemsWriter.newLine();
          }
        } finally {
          itemReader.close();
          decoder.hypergraphSerializer.closeItemsWriter();
        }
        //remove the tem item file
        FileUtility.deleteFile(decoder.nbestFile + ".hg.items");
      }
View Full Code Here


  public static void writeConfigFile(double[] newWeights, String template, String outputFile, String newDiscriminativeModel) {
    try {
      int columnID = 0;
     
      BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
      LineReader     reader = new LineReader(template);
      try { for (String line : reader) {
        line = line.trim();
        if (Regex.commentOrEmptyLine.matches(line)
        || line.indexOf("=") != -1) {
          //comment, empty line, or parameter lines: just copy
          writer.write(line);
          writer.newLine();
         
        } else { //models: replace the weight
          String[] fds = Regex.spaces.split(line);
          StringBuffer newSent = new StringBuffer();
          if (! Regex.floatingNumber.matches(fds[fds.length-1])) {
            throw new IllegalArgumentException("last field is not a number; the field is: " + fds[fds.length-1]);
          }
         
          if(newDiscriminativeModel!=null && "discriminative".equals(fds[0])){
            newSent.append(fds[0]).append(' ');
            newSent.append(newDiscriminativeModel).append(' ');//change the file name
            for (int i = 2; i < fds.length-1; i++) {
              newSent.append(fds[i]).append(' ');
            }
          }else{//regular
            for (int i = 0; i < fds.length-1; i++) {
              newSent.append(fds[i]).append(' ');
            }
          }
          if(newWeights!=null)
            newSent.append(newWeights[columnID++]);//change the weight
          else
            newSent.append(fds[fds.length-1]);//do not change
         
          writer.write(newSent.toString());
          writer.newLine();
        }
      } } finally {
        reader.close();
        writer.close();
      }
     
      if (newWeights!=null && columnID != newWeights.length) {
        throw new IllegalArgumentException("number of models does not match number of weights");
View Full Code Here

  // BUG: why are we re-reading the configFile? JoshuaConfiguration should do this. (Needs: languageModel, symbolTable, (logger?); Sets: featureFunctions)
  private void initializeFeatureFunctions(String configFile)
  throws IOException {
    this.featureFunctions = new ArrayList<FeatureFunction>();
   
    LineReader reader = new LineReader(configFile);
    try { for (String line : reader) {
      line = line.trim();
      if (Regex.commentOrEmptyLine.matches(line))
        continue;
     
      if (line.indexOf("=") == -1) { // ignore lines with "="
        String[] fds = Regex.spaces.split(line);
       
        if ("lm".equals(fds[0]) && fds.length == 2) { // lm weight
          if (null == this.languageModel) {
            throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
          }
          double weight = Double.parseDouble(fds[1].trim());
          this.featureFunctions.add(
            new LanguageModelFF(
              JoshuaConfiguration.ngramStateID, 
              this.featureFunctions.size(),
              JoshuaConfiguration.lmOrder,
              this.symbolTable, this.languageModel, weight));
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Line: %s\nAdd LM, order: %d; weight: %.3f;",
              line, JoshuaConfiguration.lmOrder, weight));
         
        } else if ("oracle".equals(fds[0]) && fds.length >= 3) { //oracle files weight
          if (null == this.languageModel) {
            throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
          }         
          String[] referenceFiles = new String[fds.length-2];
          for(int i=0; i< referenceFiles.length; i++)
            referenceFiles[i] =  fds[i+1].trim();     
          double weight = Double.parseDouble(fds[fds.length-1].trim());
         
          this.featureFunctions.add(
            new BLEUOracleModel(JoshuaConfiguration.ngramStateID, JoshuaConfiguration.lmOrder,
                this.featureFunctions.size(), this.symbolTable, weight, referenceFiles, JoshuaConfiguration.linearCorpusGainThetas));
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Line: %s\nAdd BLEUOracleModel, order: %d; weight: %.3f;",
              line, JoshuaConfiguration.lmOrder, weight));
         
        } else if ("discriminative".equals(fds[0]) && fds.length == 3) { //discriminative weight modelFile
          if (null == this.languageModel) {
            throw new IllegalArgumentException("LM model has not been properly initialized before setting order and weight");
          }
         
          String featureFile = null;//TODO???????         
          String modelFile = fds[1].trim();
         
          double weight = Double.parseDouble(fds[2].trim());
         
          this.featureFunctions.add (DiscriminativeSupport.setupRerankingFeature(this.featureFunctions.size(), weight, symbolTable,
              JoshuaConfiguration.useTMFeat, JoshuaConfiguration.useLMFeat, JoshuaConfiguration.useEdgeNgramOnly, JoshuaConfiguration.useTMTargetFeat,
              JoshuaConfiguration.useMicroTMFeat, JoshuaConfiguration.wordMapFile,
              JoshuaConfiguration.ngramStateID,
              JoshuaConfiguration.lmOrder, JoshuaConfiguration.startNgramOrder, JoshuaConfiguration.endNgramOrder, featureFile, modelFile, this.ruleStringToIDTable) );
         
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Line: %s\nAdd FeatureTemplateBasedFF, order: %d; weight: %.3f;",
              line, JoshuaConfiguration.lmOrder, weight));
         
        } else if ("latticecost".equals(fds[0]) && fds.length == 2) {
          double weight = Double.parseDouble(fds[1].trim());
          this.featureFunctions.add(
            new SourcePathFF(
              this.featureFunctions.size(), weight));
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Line: %s\nAdd Source lattice cost, weight: %.3f",
              line, weight));
         
        } else if ("phrasemodel".equals(fds[0]) && fds.length == 4) { // phrasemodel owner column(0-indexed) weight
          int    owner  = this.symbolTable.addTerminal(fds[1]);
          int    column = Integer.parseInt(fds[2].trim());
          double weight = Double.parseDouble(fds[3].trim());
          this.featureFunctions.add(
            new PhraseModelFF(
              this.featureFunctions.size(),
              weight, owner, column));
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Process Line: %s\nAdd PhraseModel, owner: %s; column: %d; weight: %.3f",
              line, owner, column, weight));
         
        } else if ("arityphrasepenalty".equals(fds[0]) && fds.length == 5) { // arityphrasepenalty owner start_arity end_arity weight
          int owner      = this.symbolTable.addTerminal(fds[1]);
          int startArity = Integer.parseInt(fds[2].trim());
          int endArity   = Integer.parseInt(fds[3].trim());
          double weight  = Double.parseDouble(fds[4].trim());
          this.featureFunctions.add(
            new ArityPhrasePenaltyFF(
              this.featureFunctions.size(),
              weight, owner, startArity, endArity));
         
          if (logger.isLoggable(Level.INFO))
            logger.finest(String.format(
              "Process Line: %s\nAdd ArityPhrasePenalty, owner: %s; startArity: %d; endArity: %d; weight: %.3f",
              line, owner, startArity, endArity, weight));
         
        } else if ("wordpenalty".equals(fds[0]) && fds.length == 2) { // wordpenalty weight
          double weight = Double.parseDouble(fds[1].trim());
          this.featureFunctions.add(
            new WordPenaltyFF(
              this.featureFunctions.size(), weight));
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format(
              "Process Line: %s\nAdd WordPenalty, weight: %.3f",
              line, weight));
         
        } else {
          throw new IllegalArgumentException("Wrong config line: " + line);
        }
      }
    } } finally {
      reader.close();
    }
  }
View Full Code Here

      // TODO: see documentation for extractOneBest
      // regarding using an n-best SegmentFileParser.
      IndexedReader<String> nbestReader =
        new IndexedReader<String>("line",
          "-".equals(args[0])
            ? new LineReader(System.in)
            : new LineReader(args[0]));
     
      /* TODO: This duplicates FileUtility.getWriteFileStream
       * but with the addition of defaulting to System.out;
       * should fix that (without breaking other clients
       * of that method). We ultimately want something which
View Full Code Here

        segmentParser.parseSegmentFile(
          LineReader.getInputStream(this.testFile),
          new TranslateCoiterator(
            null == this.oracleFile
              ? new NullReader<String>()
              : new LineReader(this.oracleFile)
          )
        );
      } catch (UncheckedIOException e) {
        e.throwCheckedException();
      }
View Full Code Here

//===============================================================
 
  // This is static instead of a constructor because all the fields are static. Yuck.
  public static void readConfigFile(String configFile) throws IOException {
   
    LineReader configReader = new LineReader(configFile);
    try { for (String line : configReader) {
      line = line.trim(); // .toLowerCase();
      if (Regex.commentOrEmptyLine.matches(line)) continue;
     
     
      if (line.indexOf("=") != -1) { // parameters; (not feature function)
        String[] fds = Regex.equalsWithSpaces.split(line);
        if (fds.length != 2) {
          logger.severe("Wrong config line: " + line);
          System.exit(1);
        }
       
        if ("lm_file".equals(fds[0])) {
          lm_file = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("lm file: %s", lm_file));
         
        } else if ("tm_file".equals(fds[0])) {
          tm_file = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("tm file: %s", tm_file));
       
        } else if ("glue_file".equals(fds[0])) {
          glue_file = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("glue file: %s", glue_file));
       
        } else if ("tm_format".equals(fds[0])) {
          tm_format = fds[1].trim();
           
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("tm format: %s", tm_format));

        } else if ("glue_format".equals(fds[0])) {
          glue_format = fds[1].trim();
           
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("glue format: %s", glue_format));
         
//        } else if ("sa_source".equals(fds[0])) {
//          sa_source = fds[1].trim();
//          if (logger.isLoggable(Level.FINEST))
//            logger.finest(String.format("suffix array source file: %s", sa_source));
//         
//        } else if ("sa_target".equals(fds[0])) {
//          sa_target = fds[1].trim();
//          if (logger.isLoggable(Level.FINEST))
//            logger.finest(String.format("suffix array target file: %s", sa_target));
//         
//        } else if ("sa_alignment".equals(fds[0])) {
//          sa_alignment = fds[1].trim();
//          if (logger.isLoggable(Level.FINEST))
//            logger.finest(String.format("suffix array alignment file: %s", sa_alignment));
//         
        } else if ("sa_max_phrase_span".equals(fds[0])) {
          sa_max_phrase_span = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array maximum phrase span: %s", sa_max_phrase_span));
         
        } else if ("sa_max_phrase_length".equals(fds[0])) {
          sa_max_phrase_length = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array maximum phrase length: %s", sa_max_phrase_length));
         
        } else if ("sa_max_phrase_length".equals(fds[0])) {
          sa_max_phrase_length = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array maximum phrase length: %s", sa_max_phrase_length));
         
        } else if ("sa_max_nonterminals".equals(fds[0])) {
          sa_max_nonterminals = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array maximum number of nonterminals: %s", sa_max_nonterminals));
         
        } else if ("sa_min_nonterminal_span".equals(fds[0])) {
          sa_min_nonterminal_span = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array minimun nonterminal span: %s", sa_min_nonterminal_span));
         
        } else if ("sa_lex_sample_size".equals(fds[0])) {
          sa_lex_sample_size = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array sample size for lexical probability calculation: %s", sa_lex_sample_size));
         
        } else if ("sa_precalculate_lexprobs".equals(fds[0])) {
          sa_precalculate_lexprobs = Boolean.valueOf(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("should lexical probabilities be precalculated: %s", sa_precalculate_lexprobs));
         
        } else if ("sa_rule_sample_size".equals(fds[0])) {
          sa_rule_sample_size = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array sample size for rules: %s", sa_rule_sample_size));
         
        } else if ("sa_rule_cache_size".equals(fds[0])) {
          sa_rule_cache_size = Integer.parseInt(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("suffix array cache size for rules: %s", sa_rule_cache_size));
         
        } else if ("sa_sentence_initial_X".equals(fds[0])) {
          sa_sentence_initial_X = Boolean.valueOf(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("should suffix array rule extraction allow rules from sentence-initial X: %s", sa_sentence_initial_X));
         
        } else if ("sa_sentence_final_X".equals(fds[0])) {
          sa_sentence_final_X = Boolean.valueOf(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("should suffix array rule extraction allow rules from sentence-final X: %s", sa_sentence_final_X));
         
        } else if ("sa_edgeXMayViolatePhraseSpan".equals(fds[0])) {
          sa_edgeXMayViolatePhraseSpan = Boolean.valueOf(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("should suffix array rule extraction allow rules where sa_edgeXMayViolatePhraseSpan: %s", sa_edgeXMayViolatePhraseSpan));
        } else if ("sa_lex_floor_prob".equals(fds[0])) {
          sa_lex_floor_prob = Float.valueOf(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("floor value for probabilities returned as lexical transaltion probabilities: %s", sa_lex_floor_prob));
        } else if ("use_srilm".equals(fds[0])) {
          use_srilm = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_srilm: %s", use_srilm));
         
        } else if ("use_bloomfilter_lm".equals(fds[0])) {
          use_bloomfilter_lm = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_bloomfilter_lm: %s", use_bloomfilter_lm));
         
        } else if ("use_trie_lm".equals(fds[0])) {
          use_trie_lm = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_trie_lm: %s", use_trie_lm));
         
        } else if ("lm_ceiling_cost".equals(fds[0])) {
          lm_ceiling_cost = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("lm_ceiling_cost: %s", lm_ceiling_cost));
         
        // BUG: accepting typos in config file is not acceptable
        } else if ("use_left_euqivalent_state".equals(fds[0])) {
          use_left_equivalent_state = Boolean.parseBoolean(fds[1]);
         
          logger.warning("Misspelling in configuration file: 'use_right_euqivalent_state'");
         
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_left_equivalent_state: %s", use_left_equivalent_state));
       
        // BUG: accepting typos in config file is not acceptable
        } else if ("use_right_euqivalent_state".equals(fds[0])) {
          use_right_equivalent_state = Boolean.parseBoolean(fds[1]);
         
          logger.warning("Misspelling in configuration file: 'use_right_euqivalent_state'");
         
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_right_equivalent_state: %s", use_right_equivalent_state));
         
        } else if ("use_left_equivalent_state".equals(fds[0])) {
          use_left_equivalent_state = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_left_equivalent_state: %s", use_left_equivalent_state));
       
        } else if ("use_right_equivalent_state".equals(fds[0])) {
          use_right_equivalent_state = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_right_equivalent_state: %s", use_right_equivalent_state));
         
        } else if ("order".equals(fds[0])) {
          lmOrder = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("g_lm_order: %s", lmOrder));
         
        } else if ("use_sent_specific_lm".equals(fds[0])) {
          use_sent_specific_lm = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_sent_specific_lm: %s", use_sent_specific_lm));
         
        } else if ("sent_lm_file_name_prefix".equals(fds[0])) {
          g_sent_lm_file_name_prefix = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("sent_lm_file_name_prefix: %s", g_sent_lm_file_name_prefix));
         
        } else if ("use_sent_specific_tm".equals(fds[0])) {
          use_sent_specific_tm = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_sent_specific_tm: %s", use_sent_specific_tm));
         
        } else if ("sent_tm_file_name_prefix".equals(fds[0])) {
          g_sent_tm_file_name_prefix = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("sent_tm_file_name_prefix: %s", g_sent_tm_file_name_prefix));
         
        } else if ("span_limit".equals(fds[0])) {
          span_limit = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("span_limit: %s", span_limit));
         
        } else if ("phrase_owner".equals(fds[0])) {
          phrase_owner = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("phrase_owner: %s", phrase_owner));
         
        } else if ("glue_owner".equals(fds[0])) {
          glue_owner = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("glue_owner: %s", glue_owner));
         
        else if ("default_non_terminal".equals(fds[0])) {
          default_non_terminal = "[" + fds[1].trim() + "]";
//          default_non_terminal = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("default_non_terminal: %s", default_non_terminal));
         
        } else if ("goalSymbol".equals(fds[0])) {
          goal_symbol = "[" + fds[1].trim() + "]";
//          goal_symbol = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("goalSymbol: %s", goal_symbol));
         
        } else if ("fuzz1".equals(fds[0])) {
          fuzz1 = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("fuzz1: %s", fuzz1));
         
        } else if ("fuzz2".equals(fds[0])) {
          fuzz2 = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("fuzz2: %s", fuzz2));
         
        } else if ("max_n_items".equals(fds[0])) {
          max_n_items = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("max_n_items: %s", max_n_items));
         
        } else if ("relative_threshold".equals(fds[0])) {
          relative_threshold = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("relative_threshold: %s", relative_threshold));
         
        } else if ("max_n_rules".equals(fds[0])) {
          max_n_rules = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("max_n_rules: %s", max_n_rules));
         
        } else if ("rule_relative_threshold".equals(fds[0])) {
          rule_relative_threshold = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("rule_relative_threshold: %s", rule_relative_threshold));
         
        } else if ("use_unique_nbest".equals(fds[0])) {
          use_unique_nbest = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_unique_nbest: %s", use_unique_nbest));
         
        } else if ("add_combined_cost".equals(fds[0])) {
          add_combined_cost = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("add_combined_cost: %s", add_combined_cost));
         
        } else if ("use_tree_nbest".equals(fds[0])) {
          use_tree_nbest = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_tree_nbest: %s", use_tree_nbest));
         
        } else if ("include_align_index".equals(fds[0])) {
          include_align_index = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("include_align_index: %s", include_align_index));
         
        } else if ("top_n".equals(fds[0])) {
          topN = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("topN: %s", topN));
         
        } else if ("use_remote_lm_server".equals(fds[0])) {
          use_remote_lm_server = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_remote_lm_server: %s", use_remote_lm_server));
         
        } else if ("f_remote_server_list".equals(fds[0])) {
          f_remote_server_list = fds[1];
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("f_remote_server_list: %s", f_remote_server_list));
         
        } else if ("num_remote_lm_servers".equals(fds[0])) {
          num_remote_lm_servers = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("num_remote_lm_servers: %s", num_remote_lm_servers));
         
        } else if ("remote_symbol_tbl".equals(fds[0])) {
          remote_symbol_tbl = fds[1];
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("remote_symbol_tbl: %s", remote_symbol_tbl));
         
        } else if ("remote_lm_server_port".equals(fds[0])) {
          //port = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("remote_lm_server_port: not used"));
         
        } else if ("parallel_files_prefix".equals(fds[0])) {
          Random random = new Random();
                int v = random.nextInt(10000000);//make it random
          parallel_files_prefix = fds[1] + v;
          logger.info(String.format("parallel_files_prefix: %s", parallel_files_prefix));
        } else if ("num_parallel_decoders".equals(fds[0])) {
          num_parallel_decoders = Integer.parseInt(fds[1]);
          if (num_parallel_decoders <= 0) {
            throw new IllegalArgumentException("Must specify a positive number for num_parallel_decoders");
          }
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("num_parallel_decoders: %s", num_parallel_decoders));
         
        } else if ("save_disk_hg".equals(fds[0])) {
          save_disk_hg = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("save_disk_hg: %s", save_disk_hg));
         
        } else if ("use_kbest_hg".equals(fds[0])) {
          use_kbest_hg = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("use_kbest_hg: %s", use_kbest_hg));
         
        } else if ("forest_pruning".equals(fds[0])) {
          forest_pruning = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("forest_pruning: %s", forest_pruning));
         
        } else if ("forest_pruning_threshold".equals(fds[0])) {
          forest_pruning_threshold = Double.parseDouble(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("forest_pruning_threshold: %s", forest_pruning_threshold));
       
        } else if ("visualize_hypergraph".equals(fds[0])) {
          visualize_hypergraph = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("visualize_hypergraph: %s", visualize_hypergraph));
        } else if ("segment_file_parser_class".equals(fds[0])) {
          segmentFileParserClass = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest("segmentFileParserClass: " + segmentFileParserClass);
         
        } else if ("useCubePrune".equals(fds[0])) {
          useCubePrune = Boolean.valueOf(fds[1]);
          if(useCubePrune==false)
            logger.warning("useCubePrune=false");
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useCubePrune: %s", useCubePrune));       
        }else if ("useBeamAndThresholdPrune".equals(fds[0])) {
          useBeamAndThresholdPrune = Boolean.valueOf(fds[1]);
          if(useBeamAndThresholdPrune==false)
            logger.warning("useBeamAndThresholdPrune=false");
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useBeamAndThresholdPrune: %s", useBeamAndThresholdPrune));       
        } else if ("oovFeatureCost".equals(fds[0])) {
          oovFeatureCost = Float.parseFloat(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("oovFeatureCost: %s", oovFeatureCost));
        } else if ("useTMFeat".equals(fds[0])) {
          useTMFeat = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useTMFeat: %s", useTMFeat));
        } else if ("useLMFeat".equals(fds[0])) {
          useLMFeat = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useLMFeat: %s", useLMFeat));
        } else if ("useMicroTMFeat".equals(fds[0])) {
          useMicroTMFeat = new Boolean(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useMicroTMFeat: %s", useMicroTMFeat));         
        } else if ("wordMapFile".equals(fds[0])) {
          wordMapFile = fds[1].trim();
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("wordMapFile: %s", wordMapFile));         
        } else if ("useRuleIDName".equals(fds[0])) {
          useRuleIDName = new Boolean(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useRuleIDName: %s", useRuleIDName));         
        }else if ("startNgramOrder".equals(fds[0])) {
          startNgramOrder = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("startNgramOrder: %s", startNgramOrder));
        } else if ("endNgramOrder".equals(fds[0])) {
          endNgramOrder = Integer.parseInt(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("endNgramOrder: %s", endNgramOrder));
        }else if ("useEdgeNgramOnly".equals(fds[0])) {
          useEdgeNgramOnly = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useEdgeNgramOnly: %s", useEdgeNgramOnly));
        }else if ("useTMTargetFeat".equals(fds[0])) {
          useTMTargetFeat = Boolean.valueOf(fds[1]);
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useTMTargetFeat: %s", useTMTargetFeat));
        } else if ("useGoogleLinearCorpusGain".equals(fds[0])) {
          useGoogleLinearCorpusGain = new Boolean(fds[1].trim());
          if (logger.isLoggable(Level.FINEST))
            logger.finest(String.format("useGoogleLinearCorpusGain: %s", useGoogleLinearCorpusGain));         
        } else if ("googleBLEUWeights".equals(fds[0])) {
          String[] googleWeights = fds[1].trim().split(";");
          if(googleWeights.length!=5){
            logger.severe("wrong line=" + line);
            System.exit(1);
          }
          linearCorpusGainThetas = new double[5];
          for(int i=0; i<5; i++)
            linearCorpusGainThetas[i] = new Double(googleWeights[i]);
         
         
          logger.finest(String.format("googleBLEUWeights: %s", linearCorpusGainThetas));   
         
        } else {
          logger.warning("Maybe Wrong config line: " + line);
        }
       
       
      } else { // feature function
        String[] fds = Regex.spaces.split(line);
        if ("lm".equals(fds[0]) && fds.length == 2) { // lm order weight
          have_lm_model = true;
          logger.info("you use a LM feature function, so make sure you have a LM grammar");
        }
      }
     
     
     
    } } finally { configReader.close(); }
   
    if( useGoogleLinearCorpusGain  ){
      if( linearCorpusGainThetas==null){
        logger.info("linearCorpusGainThetas is null, did you set googleBLEUWeights properly?");
        System.exit(1);
View Full Code Here

    this.symbolTable = symbolTable;
  }

  public void initialize() {
    try {
      this.reader = new LineReader(fileName);
    } catch (IOException e) {
      throw new RuntimeException(
        "Error opening translation model file: " + fileName
        + (null != e.getMessage()
          ? e.getMessage()
View Full Code Here

      new NbestMinRiskReranker(produceRerankedNbest, scalingFactor);
   
    System.out.println("##############running mbr reranking");
   
    int oldSentID = -1;
    LineReader nbestReader = new LineReader(inputNbest);
    List<String> nbest = new ArrayList<String>();

    if (numThreads==1) {
     
      try { for (String line : nbestReader) {
        String[] fds = Regex.threeBarsWithSpace.split(line);
        int newSentID = Integer.parseInt(fds[0]);
        if (oldSentID != -1 && oldSentID != newSentID) {
          String best_hyp = mbrReranker.processOneSent(nbest, oldSentID);//nbest: list of unique strings
          outWriter.write(best_hyp);
          outWriter.newLine();
          outWriter.flush();
          nbest.clear();
        }
        oldSentID = newSentID;
        nbest.add(line);
      } } finally { nbestReader.close(); }

      //last nbest
      String bestHyp = mbrReranker.processOneSent(nbest, oldSentID);
      outWriter.write(bestHyp);
      outWriter.newLine();
View Full Code Here

  }
 

//  ======== read word map
  static public Map<String, String> readWordMap(String mapFile) throws IOException{
    LineReader     mapReader = new LineReader(mapFile);
    Map<String, String> wordMap = new HashMap<String, String>();
    for (String line : mapReader) {
      String[] fds = line.trim().split("\\s+\\|{3}\\s+");
      wordMap.put(fds[0], fds[1]);
    }
    mapReader.close();
    return wordMap;
  }
View Full Code Here

   
    //======== read word map   
    Map<String, String> wordMap = MicroRuleFT.readWordMap(wordMapFile);
   
    //======== read grmmar and extract features
    LineReader   grammarReader = new LineReader(grammarFile);
    Map<String, Integer> featMap = new HashMap<String, Integer>();   
    for (String line : grammarReader) {
      Map<String,Integer> tbl = MicroRuleFT.computeTargetNgramFeature(line.trim(), wordMap, startNgramOrder, endNgramOrder);
      featMap.putAll(tbl);
    }   
    grammarReader.close();
   
    //======== write the table out
    BufferedWriter writer = FileUtility.getWriteFileStream(featureFile);
    double initWeight = 0;
    for(String name : featMap.keySet()){
View Full Code Here

TOP

Related Classes of joshua.util.io.LineReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.