Package joshua.corpus.suffix_array

Examples of joshua.corpus.suffix_array.HierarchicalPhrase


        for (int i=targetSpan.start; i<targetSpan.end; i++) {
          words[i-targetSpan.start] = targetCorpus.getWordID(i);
        }
       
        return new HierarchicalPhrase(
            words,
            targetSpan,
            Collections.<LabeledSpan>emptyList(),
            targetCorpus);
      }
    }

   
    // Handle the more complex cases...
    List<LabeledSpan> targetNTSpans = new ArrayList<LabeledSpan>();
    int patternSize = targetSpan.size();
   
    int ntIndex = 0;
   
    // For each non terminal in the source, find their corresponding positions in the target span...
   
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceStartsWithNT) {
     
      int firstTerminalIndex = sourcePhrases.getFirstTerminalIndex(sourcePhraseIndex);
     
      if (firstTerminalIndex - sourceSpan.start < minNonterminalSpan) {
       
        return null;
       
      } else {
        // If the source phrase starts with NT, then we need to calculate the span of the first NT
        Span nonterminalSourceSpan = new Span(sourceSpan.start, firstTerminalIndex);
        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        // the pattern length will be reduced by the length of the non-terminal, and increased by 1 for the NT itself.
        patternSize = patternSize - nonterminalTargetSpan.size() +1;
      }
    }
   
    // Process all internal nonterminals
    for (int i=0, n=sourcePhrases.getNumberOfTerminalSequences()-1; i<n; i++) {
     
      int nextStartIndex =
        sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, i+1);
     
      int currentEndIndex =
        sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, i);
     
      if (nextStartIndex - currentEndIndex < minNonterminalSpan) {
       
        return null;
       
      } else {
       
        Span nonterminalSourceSpan = new Span(currentEndIndex, nextStartIndex);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;
       
      }
    }
     
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceEndsWithNT) {
     
      int lastTerminalIndex = sourcePhrases.getLastTerminalIndex(sourcePhraseIndex);
     
      if (sourceSpan.end - lastTerminalIndex < minNonterminalSpan) {
       
        return null;
       
      } else {

        // If the source phrase ends with NT, then we need to calculate the span of the last NT
        Span nonterminalSourceSpan = new Span(lastTerminalIndex, sourceSpan.end);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
        if (logger.isLoggable(Level.FINEST)) logger.finest("Consistent target span " + nonterminalTargetSpan + " for NT source span " + nonterminalSourceSpan);


        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;

      }
    }
   
    boolean foundAlignedTerminal = false;
   
    // Create the pattern...
    int[] words = new int[patternSize];
    int patterCounter = 0;
   
    Collections.sort(targetNTSpans);
   
    if (targetNTSpans.get(0).getSpan().start == targetSpan.start) {
     
      int ntCumulativeSpan = 0;
     
      for (LabeledSpan span : targetNTSpans) {
        ntCumulativeSpan += span.size();
      }
     
      if (ntCumulativeSpan >= targetSpan.size()) {
        return null;
      }
     
    } else {
      // if we don't start with a non-terminal, then write out all the words
      // until we get to the first non-terminal
      for (int i = targetSpan.start; i < targetNTSpans.get(0).getSpan().start; i++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(i);
        patterCounter++;
      }
    }

    // add the first non-terminal
    words[patterCounter] = targetNTSpans.get(0).getLabel();
    patterCounter++;
   
    // add everything until the final non-terminal
    for(int i = 1; i < targetNTSpans.size(); i++) {
      LabeledSpan NT1 = targetNTSpans.get(i-1);
      LabeledSpan NT2 = targetNTSpans.get(i);
     
      for(int j = NT1.getSpan().end; j < NT2.getSpan().start; j++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(j, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(j);
        patterCounter++;
      }
      words[patterCounter] = NT2.getLabel();
      patterCounter++;
    }
   
    // if we don't end with a non-terminal, then write out all remaining words
    if(targetNTSpans.get(targetNTSpans.size()-1).getSpan().end != targetSpan.end) {
      // the target pattern starts with a non-terminal
      for(int i = targetNTSpans.get(targetNTSpans.size()-1).getSpan().end; i < targetSpan.end; i++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(i);
        patterCounter++;
      }
    }
   
    if (foundAlignedTerminal) {
      return new HierarchicalPhrase(
          words,
          targetSpan,
          targetNTSpans,
          targetCorpus);
    } else {
View Full Code Here


    ArrayList<HierarchicalPhrase> translations = new ArrayList<HierarchicalPhrase>();
   
    // For each sample HierarchicalPhrase
    for (int i=0, n=sourceHierarchicalPhrases.size(); i<n; i+=stepSize) {

      HierarchicalPhrase translation = getTranslation(sourceHierarchicalPhrases, i);
      if (translation != null) {
        translations.add(translation);
      }
    }
   
View Full Code Here

     
      // If target span and source span are consistent
      if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {
       
        // Construct a translation
        HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, sourceSpan, targetSpan, false, false);
       
        if (translation != null) {
          if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 1: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + sourceSpan);

          return translation;
        } else if (logger.isLoggable(Level.FINER)) {
          logger.finer("No valid translation returned from attempt to construct translation for source span " + sourceSpan + ", target span " + targetSpan);
        }
       
      }
     
    }
   
    // Case 2: If sourcePhrase startsWithNT && !endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && !sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 2: Source phrase startsWithNT && !endsWithNT");
     
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal starts one word before the first source terminal
      Span possibleSourceSpan = new Span(startOfTerminalSequence-1, endOfTerminalSequence);
     
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence &&
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan &&
          endOfTerminalSequence-possibleSourceSpan.start<=maxPhraseSpan) {
       
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, false);

          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 2: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);

            return translation;
          }

        }
       
        possibleSourceSpan.start--;
       
      }
     
    }
   
    // Case 3: If sourcePhrase !startsWithNT && endsWithNT
    else if (!sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 3: Source phrase !startsWithNT && endsWithNT");
     
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sourcePhrase.getSentenceNumber(sourcePhraseIndex));
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal starts one word after the last source terminal
      Span possibleSourceSpan =
        new Span(startOfTerminalSequence, endOfTerminalSequence+1);
       
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.end <= endOfSentence &&
          possibleSourceSpan.end - endOfTerminalSequence <= maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
         
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, false, true);

          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 3: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);

            return translation;
          }

        }
       
        possibleSourceSpan.end++;
       
      }
     
    }
   
    // Case 4: If sourcePhrase startsWithNT && endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 4: Source phrase startsWithNT && endsWithNT");
     
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal
      //   starts one word before the first source terminal and
      //   ends one word after the last source terminal
      Span possibleSourceSpan =
        new Span(startOfTerminalSequence-1, endOfTerminalSequence+1);
       
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence &&
          possibleSourceSpan.end <= endOfSentence &&
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan &&
          possibleSourceSpan.end-endOfTerminalSequence<=maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
   
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, true);

          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 4: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);

            return translation;
View Full Code Here

    int phraseIndex = 0;

    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 0, 1);
      HierarchicalPhrase targetPhrase = getTargetPhrase("das", 0, 1);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f)// lex P(it | das)
      Assert.assertEquals(targetGivenSource, 0.25f);// lex P(das | it)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("makes", 1, 2);
      HierarchicalPhrase targetPhrase = getTargetPhrase("macht", 1, 2);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(makes | macht)
      Assert.assertEquals(targetGivenSource, 1.0f);// lex P(macht | makes)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 2, 3);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 2, 3);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("and", 3, 4);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und", 3, 4);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f); // P(and | und)
      Assert.assertEquals(targetGivenSource, 1.0f);// P(und | and)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 4, 5);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 4, 5);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f)// lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("mars", 5, 6);
      HierarchicalPhrase targetPhrase = getTargetPhrase("besch\u00E4digt", 5, 6);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 6, 7);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 6, 7);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase(",", 7, 8);
      HierarchicalPhrase targetPhrase = getTargetPhrase(",", 7, 8);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 8, 9);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 8, 9);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f)// lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("sets", 9, 10);
      HierarchicalPhrase targetPhrase = getTargetPhrase("setzt", 9, 10);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 10, 11);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 10, 11);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("on", 11, 12);
      HierarchicalPhrase targetPhrase = getTargetPhrase("auf", 11, 12);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("yet", 12, 13);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und", 12, 13);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f); // P(yet | und)
      Assert.assertEquals(targetGivenSource, 1.0f);// P(und | yet)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 13, 14);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 13, 14);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f)// lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("takes", 14, 15);
      HierarchicalPhrase targetPhrase = getTargetPhrase("f\u00FChrt", 14, 15);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 15, 16);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 15, 16);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("off", 16, 17);
      HierarchicalPhrase targetPhrase = getTargetPhrase("aus", 16, 17);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase(".", 17, 18);
      HierarchicalPhrase targetPhrase = getTargetPhrase(".", 17, 18);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f)
    }

    ///////////

    {
      HierarchicalPhrases phrases = getSourcePhrase("yet it", 12, 14);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und es", 12, 14);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * 1.0f)// lex P(yet it | und es)
      Assert.assertEquals(targetGivenSource, 1.0f * 0.75f);// lex P(und es | yet it)
    }

    ///////////

    {
      HierarchicalPhrases phrases = getSourcePhrase("of the session", 19, 22);
      HierarchicalPhrase targetPhrase = getTargetPhrase("der sitzungsperiode", 19, 21);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * 0.5f * 1.0f)// lex P(of the session | der sitzungsperiode)
      Assert.assertEquals(targetGivenSource, 0.5f*((1.0f/3.0f) + (1.0f/3.0f)) * (1.0f/3.0f));// lex P(der sitzungsperiode | of the session)
    }

    {
      HierarchicalPhrases phrases = getSourcePhrase("thunder ; lightning", 29, 32);
      HierarchicalPhrase targetPhrase = getTargetPhrase("blitzen", 28, 29);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * (1.0f/3.0f) * 0.5f)// lex P(thunder ; lightning | blitzen)
      Assert.assertEquals(targetGivenSource, ((1.0f/2.0f) * (1.0f + 1.0f)));// lex P(blitzen | thunder ; lightning)
    }
View Full Code Here

        SymbolTable.X,
        targetVocab.getID("und"),
        targetVocab.getID("es")
      };
   
    HierarchicalPhrase targetPhrase = new HierarchicalPhrase(
        targetWords,
        new Span(0,5),
        Collections.<LabeledSpan>emptyList(),
        targetCorpusArray);
   
View Full Code Here

    return phrases;
  }
 
  private HierarchicalPhrase getTargetPhrase(String targetPhrase, int startIndex, int endIndex) {
   
    return new HierarchicalPhrase(
        targetVocab.getIDs(targetPhrase),
        new Span(startIndex,endIndex),
        Collections.<LabeledSpan>emptyList(),
        targetCorpusArray);
View Full Code Here

TOP

Related Classes of joshua.corpus.suffix_array.HierarchicalPhrase

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.