Package joshua.corpus.vocab

Examples of joshua.corpus.vocab.Vocabulary.readExternal()


    int numTargetWords, numTargetSentences;
    String binaryCommonVocabFileName = this.commonVocabFileName;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Initializing common vocabulary from binary file " + binaryCommonVocabFileName);
      ObjectInput in = BinaryIn.vocabulary(binaryCommonVocabFileName);
      commonVocab.readExternal(in);
     
      numSourceWords = Integer.MIN_VALUE;
      numSourceSentences = Integer.MIN_VALUE;
     
      numTargetWords = Integer.MIN_VALUE;
View Full Code Here


  private static ParallelCorpus getParallelCorpus(String joshDir, int cacheSize) throws IOException, ClassNotFoundException {
   
    Vocabulary commonVocab = new Vocabulary();
      String binaryVocabFileName = joshDir + "/common.vocab";
      ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
    commonVocab.readExternal(in);
   
    String sourceFileName = joshDir + "/source.corpus";
    Corpus sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);

    String targetFileName = joshDir + "/target.corpus";
View Full Code Here

     
      if (logger.isLoggable(Level.INFO))
        logger.info("Reading common vocabulary from " +
            binaryVocabFileName);
      Vocabulary commonVocab = new Vocabulary();
      commonVocab.readExternal(
          BinaryIn.vocabulary(binaryVocabFileName));

      // Initialize symbol table using suffix array's vocab
      this.initializeSymbolTable(commonVocab);
    }
View Full Code Here

          String binaryAlignmentFileName = joshDirName + File.separator + "alignment.grids";

          logger.fine("Loading vocabulary...");
          Vocabulary commonVocab = new Vocabulary();
          ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
          commonVocab.readExternal(in);

          logger.fine("Loading source corpus...");
          Corpus sourceCorpus = new MemoryMappedCorpusArray(commonVocab, binarySourceFileName);

          logger.fine("Loading source suffix array...");
View Full Code Here

      String binarySuffixArrayFileName = args[2];

      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language vocabulary from binary file " + binarySourceVocabFileName);
      ObjectInput in = BinaryIn.vocabulary(binarySourceVocabFileName);
      symbolTable = new Vocabulary();
      symbolTable.readExternal(in);

      logger.info("Constructing corpus array from file " + binaryCorpusFileName);
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped source language corpus array.");
      corpusArray = new MemoryMappedCorpusArray(symbolTable, binaryCorpusFileName);
View Full Code Here

   
    // Read the provided symbol table
    logger.info("Reading provided symbol table");
    Vocabulary symbolTable = new Vocabulary();
    ObjectInput in = BinaryIn.vocabulary(binaryVocabFilename);
    symbolTable.readExternal(in);
   
    // Read the provided corpus
    logger.info("Reading provided corpus");
    Vocabulary oldSymbolTable = new Vocabulary();
    int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, oldSymbolTable, true);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.