Examples of joshua.corpus.alignment.Alignments

joshua.corpus.alignment.Alignments
Represents alignment points for an aligned parallel corpus. @author Lane Schwartz

    
    /////////////////////
    // Alignment data  //
    /////////////////////
    if (logger.isLoggable(Level.INFO)) logger.info("Reading alignment data.");
    final Alignments alignments;
    if ("AlignmentArray".equals(alignmentsType)) {
      if (logger.isLoggable(Level.INFO)) logger.info("Using AlignmentArray");
      alignments = SuffixArrayFactory.createAlignments(alignmentsFileName, sourceSuffixArray, targetSuffixArray);
    } else if ("AlignmentGrids".equals(alignmentsType) || "AlignmentsGrid".equals(alignmentsType)) {
      if (logger.isLoggable(Level.INFO)) logger.info("Using AlignmentGrids");

View Full Code Here

    
    if (logger.isLoggable(Level.FINE)) {
      logger.fine("Counting word co-occurrence from parallel corpus. Using floor probability " + floorProbability);
    }
    
    Alignments alignments = parallelCorpus.getAlignments();
    Corpus sourceCorpus = parallelCorpus.getSourceCorpus();
    Corpus targetCorpus = parallelCorpus.getTargetCorpus();
    int numSentences = parallelCorpus.getNumSentences();
    
    Counts<Integer,Integer> counts = new Counts<Integer,Integer>(floorProbability);
    
    // Iterate over each sentence
    for (int sentenceID=0; sentenceID<numSentences; sentenceID++) {


      int sourceStart = sourceCorpus.getSentencePosition(sentenceID);
      int sourceEnd = sourceCorpus.getSentenceEndPosition(sentenceID);


      int targetStart = targetCorpus.getSentencePosition(sentenceID);
      int targetEnd = targetCorpus.getSentenceEndPosition(sentenceID);


      // Iterate over each word in the source sentence
      for (int sourceIndex=sourceStart; sourceIndex<sourceEnd; sourceIndex++) {


        // Get the token for the current source word
        int sourceWord = sourceCorpus.getWordID(sourceIndex);
        
        // Get the target indices aligned to this source word
        int[] targetPoints = alignments.getAlignedTargetIndices(sourceIndex);
        
        // If the source word is unaligned,
        // then we treat it as being aligned to a special NULL token;
        // we use Java's null to represent the NULL token
        if (targetPoints==null) {
          
          counts.incrementCount(sourceWord, null);
          
        } else {
          
          // If the source word is aligned,
          // then we must iterate over each aligned target point
          for (int targetPoint : targetPoints) {


            int targetWord = targetCorpus.getWordID(targetPoint);


            counts.incrementCount(sourceWord, targetWord);
          }
        }
        
      }
      
      // Iterate over each word in the target sentence
      for (int targetIndex=targetStart; targetIndex<targetEnd; targetIndex++) {


        // Get the token for the current source word
        int targetWord = targetCorpus.getWordID(targetIndex);
        
        // Get the source indices aligned to this target word
        int[] sourcePoints = alignments.getAlignedSourceIndices(targetIndex);
        
        // If the source word is unaligned,
        // then we treat it as being aligned to a special NULL token;
        // we use Java's null to represent the NULL token
        if (sourcePoints==null) {

View Full Code Here

    
    float sourceGivenTarget = 1.0f;
    
    Corpus sourceCorpus = parallelCorpus.getSourceCorpus();
    Corpus targetCorpus = parallelCorpus.getTargetCorpus();
    Alignments alignments = parallelCorpus.getAlignments();
    
    // Iterate over each terminal sequence in the source phrase
    for (int seq=0; seq<sourcePhrases.getNumberOfTerminalSequences(); seq++) {
      
      // Iterate over each source index in the current terminal sequence
      for (int sourceWordIndex=sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, seq),
            end=sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, seq);
          sourceWordIndex<end; 
          sourceWordIndex++) {
        
                
        int sourceWord = sourceCorpus.getWordID(sourceWordIndex);
        int[] targetIndices = alignments.getAlignedTargetIndices(sourceWordIndex);
        
        float sum = 0.0f;
        float average;
        
        if (targetIndices==null) {

View Full Code Here

    
    final boolean LOGGING_FINEST = logger.isLoggable(Level.FINEST);
    
    Corpus sourceCorpus = parallelCorpus.getSourceCorpus();
    Corpus targetCorpus = parallelCorpus.getTargetCorpus();
    Alignments alignments = parallelCorpus.getAlignments();
    
    StringBuilder s;
    if (LOGGING_FINEST) {
      s = new StringBuilder();
      s.append("lexProb( ");
      s.append(sourcePhrases.getPattern().toString());
      s.append(" | ");
      s.append(targetPhrase.toString());
      s.append(" )  =  1.0");
    } else {
      s = null;
    }
    
    float targetGivenSource = 1.0f;


    // Iterate over each terminal sequence in the target phrase
    for (int seq=0; seq<targetPhrase.getNumberOfTerminalSequences(); seq++) {
      
      // Iterate over each source index in the current terminal sequence
      for (int targetWordIndex=targetPhrase.getTerminalSequenceStartIndex(seq),
            end=targetPhrase.getTerminalSequenceEndIndex(seq);
          targetWordIndex<end; 
          targetWordIndex++) {
        
        int targetWord = targetCorpus.getWordID(targetWordIndex);
        int[] sourceIndices = alignments.getAlignedSourceIndices(targetWordIndex);
        
        float sum = 0.0f;
        float average;
        
        if (LOGGING_FINEST) s.append(" * (");

View Full Code Here


    String targetFileName = joshDir + "/target.corpus";
    Corpus targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
  
    String alignmentFileName = joshDir + "/alignment.grids";
    Alignments alignments = new MemoryMappedAlignmentGrids(alignmentFileName, sourceCorpusArray, targetCorpusArray);
  
    return new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, alignments);
  }

View Full Code Here

    CorpusArray targetCorpusArray =
      SuffixArrayFactory.createCorpusArray(targetFileName);
    SuffixArray targetSuffixArray = 
      SuffixArrayFactory.createSuffixArray(targetCorpusArray, SuffixArray.DEFAULT_CACHE_CAPACITY);


    Alignments alignmentArray = SuffixArrayFactory.createAlignments(alignmentFileName, sourceSuffixArray, targetSuffixArray);


    return new SampledLexProbs(Integer.MAX_VALUE, sourceSuffixArray, targetSuffixArray, alignmentArray, Cache.DEFAULT_CAPACITY, false);
    
  }

View Full Code Here

      JoshuaConfiguration.tm_file + 
      File.separator + "alignment.grids";
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading alignment grid data from " +
        binaryAlignmentFileName);
    Alignments alignments =
      new MemoryMappedAlignmentGrids(
          binaryAlignmentFileName,
          sourceCorpusArray,
          targetCorpusArray);

View Full Code Here

        
        alignedSourceIndices[i] = new int[1];
        alignedSourceIndices[i][0] = i;
      }
    }
    Alignments alignments = new AlignmentArray(alignedTargetIndices, alignedSourceIndices, 1);
    
    CorpusArray targetCorpus = new CorpusArray(sentenceF, sentenceStartPositions, vocab);
    SuffixArray targetSuffixes = new SuffixArray(targetCorpus);


    CorpusArray sourceCorpus = new CorpusArray(sentence, sentenceStartPositions, vocab);

View Full Code Here

    Corpus targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, targetVocab, numTargetWords, numTargetSentences);
    Suffixes targetSuffixArray = SuffixArrayFactory.createSuffixArray(targetCorpusArray, maxCacheSize);
    
    int trainingSize = sourceCorpusArray.getNumSentences();
    boolean requireTightSpans = true;
    Alignments alignments = new AlignmentGrids(new Scanner(new File(alignmentFileName)), sourceCorpusArray, targetCorpusArray, trainingSize, requireTightSpans);
    
//    ParallelCorpus parallelCorpus = 
//      new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, alignments);
    
//    LexicalProbabilities lexProbs =

View Full Code Here

          
          logger.fine("Loading target suffix array...");
          Suffixes targetSuffixes = new MemoryMappedSuffixArray(binarySourceSuffixesFileName, sourceCorpus);


          logger.fine("Loading alignment grids...");
          Alignments alignments = new MemoryMappedAlignmentGrids(binaryAlignmentFileName, sourceCorpus, targetCorpus);
          
//          ParallelCorpusGrammarFactory parallelCorpus = new ParallelCorpusGrammarFactory(
//              sourceSuffixes, 
//              targetSuffixes, 
//              alignments,

View Full Code Here

TOP

Related Classes of joshua.corpus.alignment.Alignments

joshua.corpus.lexprob.LexProbs

joshua.corpus.lexprob.SampledLexProbs

joshua.corpus.lexprob.WriteLexProbs

joshua.decoder.JoshuaDecoder

joshua.prefix_tree.ExtractRuleProfiler

joshua.prefix_tree.ExtractRules

joshua.prefix_tree.PrefixTreeTest

joshua.ui.alignment.GridViewer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.