Examples of joshua.util.io.LineReader

joshua.util.io.LineReader
This class provides an Iterator interface to a BufferedReader. This covers the most common use-cases for reading from files without ugly code to check whether we got a line or not. @author wren ng thornton @version $LastChangedDate: 2009-03-26 15:06:57 -0400 (Thu, 26 Mar 2009) $

    // we try...finally to be sure we close the reader.
    // reader.close may throw IOException or unchecked,
    // so we try...finally to ensure that we finish the
    // coiterator.
    try {
      LineReader reader = new LineReader(in);
      int i = 0;
      try {
        while (reader.hasNext()) {
          
          coit.coNext(new PlainSegment(
            Integer.toString(i),
            Regex.spaces.replaceAll(reader.next(), " ").trim() ));
          
          ++i;
        }
      } finally {
        reader.close();
      }
    } finally {
      coit.finish();
    }
  }

View Full Code Here

//      }
//    }
    
//    int ngramOrder = 1;
    
    LineReader grammarReader = new LineReader(arpaFileName);
    
    try {
      for (String line : grammarReader) {




//    while (scanner.hasNext()) {
//      
//      String line = scanner.nextLine();


        String[] parts = Regex.spaces.split(line);
        if (parts.length > 1) {
          String[] words = Regex.spaces.split(parts[1]);


          for (String word : words) {
            if (logger.isLoggable(Level.FINE)) logger.fine("Adding to vocab: " + word);
            vocab.addTerminal(word);  
          }


        } else {
          logger.info(line);
        }


      }
    } finally { 
      grammarReader.close(); 
    }


//      
//      boolean lineIsHeader = NGRAM_HEADER.matches(line);
//

View Full Code Here

      // Maybe just return immediately instead?
      throw new IllegalArgumentException("null input stream");
    }
    
    try {
      LineReader reader = new LineReader(in);
      this.currentSegmentID = this.startSegmentID;
      try {
        while (reader.hasNext()) {
          String cleanedSentence =
            Regex.spaces.replaceAll(reader.next(), " ").trim();
          
          Matcher m = SEG_START.matcher(cleanedSentence);
          
          final String sentence;
          if (m.find()) {
            String id = m.group(1);
            if (null == id) {
              throw new RuntimeException("Something strange happened with regexes. This is a bug.");
            } else {
              this.currentSegmentID = Integer.parseInt(id);
              sentence =
                SEG_END.matcher(m.replaceFirst("")).replaceAll("");
            }
          } else {
            this.currentSegmentID++;
            sentence = cleanedSentence;
          }
          
          coit.coNext(new PlainSegment(
            Integer.toString(this.currentSegmentID),
            sentence));
        }
      } finally {
        reader.close();
      }
    } finally {
      coit.finish();
    }
  }

View Full Code Here

    this.isReadingFromFile = true;
    //### read file into tbls
    HashMap<String,Integer> localStr2id = new HashMap<String,Integer>();
    HashMap<Integer,String> localId2str = new HashMap<Integer,String>();
    
    LineReader symboltableReader = new LineReader(fname);
    try { for (String line : symboltableReader) {
      String[] fds = Regex.spaces.split(line);
      if (2 != fds.length) {
        logger.warning("read index, bad line: " + line);
        continue;
      }
      String str = fds[0].trim();
      int id = Integer.parseInt(fds[1]);
      
      String uniqueStr;
      if (null != localStr2id.get(str)) { // it is quite possible that java will treat two stings as the same when other language (e.g., C or perl) treat them differently, due to unprintable symbols
        logger.warning("duplicate string (add fake): " + line);
        uniqueStr = str + id;//fake string
        //System.exit(1);//TODO
      } else {
        uniqueStr = str;
      }
      localStr2id.put(uniqueStr, id);
      
      //it is guaranteed that the strings in localId2str are different
      if (null != localId2str.get(id)) {
        throw new RuntimeException("duplicate id, have to exit; " + line);
      } else {
        localId2str.put(id, uniqueStr);
      }
    } } finally { symboltableReader.close(); }
    
    /*if (localId2str.size() >= this.lm_end_sym_id - this.lm_start_sym_id) {
      throw new RuntimeException("read symbol tbl, tlb is too big");
    }*/

View Full Code Here

    int order = 0;
    
    Regex blankLine  = new Regex("^\\s*$");
    Regex ngramsLine = new Regex("^\\\\\\d-grams:\\s*$");
    
    LineReader grammarReader = new LineReader(grammar_file);
    try { for (String line : grammarReader) {
      line = line.trim();
      if (blankLine.matches(line)) {
        continue;
      }
      if (ngramsLine.matches(line)) { // \1-grams:
        start = true;
        order = Integer.parseInt(line.substring(1, 2));
        if (order > ngramOrder) {
          break;
        }
        if (logger.isLoggable(Level.INFO))
          logger.info("begin to read ngrams with order " + order);
        
        continue; //skip this line
      }
      if (start) {
        add_rule(line,order, g_is_add_suffix_infor, g_is_add_prefix_infor);
      }
    } } finally { grammarReader.close(); }
    
    if (logger.isLoggable(Level.FINE)) {
      logger.fine("# of bow nodes: " + g_n_bow_nodes + " ; # of suffix nodes: " + g_n_suffix_nodes);
      logger.fine("add LMHash  " + g_n_bow_nodes);
      logger.fine("##### mem used (kb): " + Support.getMemoryUse());

View Full Code Here

   */
  public static int[] initializeVocabulary(String inputFilename, Vocabulary vocab, boolean fixVocabulary) throws IOException {
    int numSentences = 0;
    int numWords = 0;
    
    LineReader lineReader = new LineReader(inputFilename);
    
    for (String line : lineReader) {
      BasicPhrase sentence = new BasicPhrase(line, vocab);
      numWords += sentence.size();
      numSentences++;

View Full Code Here




  private HashMap<String, Double> loadModel(String file){    
    try {
      
      LineReader reader = new LineReader(file);
      HashMap<String, Double> res =new HashMap<String, Double>();
      while(reader.hasNext()){
        String line = reader.readLine();
        String[] fds = line.split("\\s+\\|{3}\\s+");// feature_key ||| feature vale; the feature_key itself may contain "|||"
        StringBuffer featKey = new StringBuffer();
        for(int i=0; i<fds.length-1; i++){
          featKey.append(fds[i]);
          if(this.useIntegerNgram){
            //TODO???????????????
            
          }
          if(i<fds.length-2) 
            featKey.append(" ||| ");
        }
        double weight = new Double(fds[fds.length-1]);//initial weight
        res.put(featKey.toString(), weight);
      }
      
      reader.close();
      return res;
      
    } catch (IOException ioe) {
      throw new UncheckedIOException(ioe);
    }

View Full Code Here

    this.linearCorpusGainThetas = linearCorpusGainThetas;
    logger.info("linearCorpusGainThetas=" + this.linearCorpusGainThetas);


    //setup reference reader
    this.referenceReaders = new LineReader[1];
    LineReader reader = openOneFile(referenceFile);
    this.referenceReaders[0] = reader;
    this.tblOfReferenceNgramTbls = new HashMap<Integer, Map<String,Integer>>();
    logger.info("number of references used is " + referenceReaders.length);
  }

View Full Code Here

    logger.info("linearCorpusGainThetas=" + this.linearCorpusGainThetas);
    
    //setup reference readers
    this.referenceReaders = new LineReader[referenceFiles.length];
    for(int i=0; i<referenceFiles.length; i++){
      LineReader reader = openOneFile(referenceFiles[i]);
      this.referenceReaders[i] = reader;
    }
    this.tblOfReferenceNgramTbls = new HashMap<Integer, Map<String,Integer>>();
    logger.info("number of references used is " + referenceReaders.length);
  }

View Full Code Here

  }




  private LineReader openOneFile(String file){
    try{      
      return new LineReader(file); 
    }catch  (IOException ioe) {
      throw new UncheckedIOException(ioe);
    }
  }

View Full Code Here

0 1 2 3

TOP

Related Classes of joshua.util.io.LineReader

joshua.corpus.suffix_array.SuffixArrayFactory

joshua.corpus.vocab.DefaultSymbol

joshua.corpus.vocab.Vocabulary

joshua.decoder.DecoderFactory

joshua.decoder.DecoderThread

joshua.decoder.ff.lm.ArpaFile

joshua.decoder.ff.lm.buildin_lm.LMGrammarJAVA

joshua.decoder.ff.lm.distributed_lm.LMGrammarRemote

joshua.decoder.ff.lm.distributed_lm.LMServer

joshua.decoder.ff.tm.GrammarReader

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.