Package joshua.corpus.suffix_array

Examples of joshua.corpus.suffix_array.Suffixes


    }

    //////////////////////////////////
    // Source language suffix array //
    //////////////////////////////////
    Suffixes sourceSuffixArray;
    String binarySourceSuffixArrayFileName = sourceSuffixesFileName;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from binary file " + binarySourceSuffixArrayFileName);
      sourceSuffixArray = new MemoryMappedSuffixArray(binarySourceSuffixArrayFileName, sourceCorpusArray, cacheSize);
    } else {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from source corpus.");
      sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, cacheSize);
    }
   
   

       
    //////////////////////////////////
    // Target language corpus array //
    //////////////////////////////////
    final Corpus targetCorpusArray;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped target language corpus array.");
      targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
    } else {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language corpus array.");
      targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, commonVocab, numTargetWords, numTargetSentences);
    }
   

    //////////////////////////////////
    // Target language suffix array //
    //////////////////////////////////
    Suffixes targetSuffixArray;
    String binaryTargetSuffixArrayFileName = targetSuffixesFileName;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language suffix array from binary file " + binaryTargetSuffixArrayFileName);
      targetSuffixArray = new MemoryMappedSuffixArray(binaryTargetSuffixArrayFileName, targetCorpusArray, cacheSize);
    } else {
View Full Code Here


   
   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading source language suffix array from " +
        binarySourceSuffixesFileName);
    Suffixes sourceSuffixArray =
      new MemoryMappedSuffixArray(
          binarySourceSuffixesFileName,
          sourceCorpusArray,
          maxCacheSize);

   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading target language corpus from " +
        binaryTargetCorpusFileName);
    Corpus targetCorpusArray =
      new MemoryMappedCorpusArray(
        this.symbolTable, binaryTargetCorpusFileName);
   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading target language suffix array from " +
        binaryTargetSuffixesFileName);
    Suffixes targetSuffixArray =
      new MemoryMappedSuffixArray(
          binaryTargetSuffixesFileName,
          targetCorpusArray,
          maxCacheSize);
   
View Full Code Here

  RootNode(PrefixTree tree, int incomingArcValue) {
    super(tree.parallelCorpus, 1);
    this.tree = tree;
    SymbolTable vocab = tree.vocab;
    this.matchedPhrases = HierarchicalPhrases.emptyList(vocab);
    Suffixes suffixArray = tree.suffixArray;
    if (suffixArray != null) {
      setBounds(0, suffixArray.size()-1);
    }
  }
View Full Code Here

    int[] sourceWordsSentences = Vocabulary.initializeVocabulary(sourceFileName, sourceVocab, true);
    numSourceWords = sourceWordsSentences[0];
    numSourceSentences = sourceWordsSentences[1];
   
    Corpus sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName, sourceVocab, numSourceWords, numSourceSentences);
    Suffixes sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, maxCacheSize);
   
    int numTargetWords, numTargetSentences;
    Vocabulary targetVocab = new Vocabulary();
    int[] targetWordsSentences = Vocabulary.initializeVocabulary(targetFileName, targetVocab, true);
    numTargetWords = targetWordsSentences[0];
    numTargetSentences = targetWordsSentences[1];
   
    Corpus targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, targetVocab, numTargetWords, numTargetSentences);
    Suffixes targetSuffixArray = SuffixArrayFactory.createSuffixArray(targetCorpusArray, maxCacheSize);
   
    int trainingSize = sourceCorpusArray.getNumSentences();
    boolean requireTightSpans = true;
    Alignments alignments = new AlignmentGrids(new Scanner(new File(alignmentFileName)), sourceCorpusArray, targetCorpusArray, trainingSize, requireTightSpans);
   
View Full Code Here

          logger.fine("Loading source corpus...");
          Corpus sourceCorpus = new MemoryMappedCorpusArray(commonVocab, binarySourceFileName);

          logger.fine("Loading source suffix array...");
          Suffixes sourceSuffixes = new MemoryMappedSuffixArray(binarySourceSuffixesFileName, sourceCorpus);
         
          logger.fine("Loading target corpus...");   
          Corpus targetCorpus = new MemoryMappedCorpusArray(commonVocab, binaryTargetFileName);
         
          logger.fine("Loading target suffix array...");
          Suffixes targetSuffixes = new MemoryMappedSuffixArray(binarySourceSuffixesFileName, sourceCorpus);

          logger.fine("Loading alignment grids...");
          Alignments alignments = new MemoryMappedAlignmentGrids(binaryAlignmentFileName, sourceCorpus, targetCorpus);
         
//          ParallelCorpusGrammarFactory parallelCorpus = new ParallelCorpusGrammarFactory(
View Full Code Here

TOP

Related Classes of joshua.corpus.suffix_array.Suffixes

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.