Package joshua.corpus

Examples of joshua.corpus.Span


   * This method determines whether there is a consistent
   * word alignment for the specified source phrase.
   * ccb - debugging
   */
  public boolean hasConsistentAlignment(int startSourceIndex, int endSourceIndex) {
    Span targetSpan = getAlignedTargetSpan(startSourceIndex, endSourceIndex);
    if (targetSpan.start == UNALIGNED) return false;
    // check back to see what sourceSpan the targetSpan
    // aligns back to, so that we can check that it's
    // within bounds
    Span sourceSpan = getAlignedSourceSpan(targetSpan.start, targetSpan.end);
   
    return ! (sourceSpan.start < startSourceIndex
      || sourceSpan.end > endSourceIndex);
  }
View Full Code Here


      if (alignedIndices[i] != null) {
        lowestHighestMin = ( alignedIndices[i][0] < lowestHighestMin) ?  alignedIndices[i][0] : lowestHighestMin; //Math.min(lowestAlignedIndex[i], lowestHighestMin);
        lowestHighestMax = (alignedIndices[i][alignedIndices[i].length-1] > lowestHighestMax) ? alignedIndices[i][alignedIndices[i].length-1] : lowestHighestMax; //Math.max(highestAlignedIndex[i], lowestHighestMax);
      } else if (requireTightSpans && (i==startIndex || i==endIndex-1)) { //XXX Is this the correct way to ensure tight spans?
        // If requiring tight spans
        return new Span(UNALIGNED, UNALIGNED);
      }
    }
   
    lowestHighestMax++;
    return new Span(lowestHighestMin,lowestHighestMax)
  }
View Full Code Here

       
        return null;
       
      } else {
        // If the source phrase starts with NT, then we need to calculate the span of the first NT
        Span nonterminalSourceSpan = new Span(sourceSpan.start, firstTerminalIndex);
        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        // the pattern length will be reduced by the length of the non-terminal, and increased by 1 for the NT itself.
        patternSize = patternSize - nonterminalTargetSpan.size() +1;
      }
    }
   
    // Process all internal nonterminals
    for (int i=0, n=sourcePhrases.getNumberOfTerminalSequences()-1; i<n; i++) {
     
      int nextStartIndex =
        sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, i+1);
     
      int currentEndIndex =
        sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, i);
     
      if (nextStartIndex - currentEndIndex < minNonterminalSpan) {
       
        return null;
       
      } else {
       
        Span nonterminalSourceSpan = new Span(currentEndIndex, nextStartIndex);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;
       
      }
    }
     
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceEndsWithNT) {
     
      int lastTerminalIndex = sourcePhrases.getLastTerminalIndex(sourcePhraseIndex);
     
      if (sourceSpan.end - lastTerminalIndex < minNonterminalSpan) {
       
        return null;
       
      } else {

        // If the source phrase ends with NT, then we need to calculate the span of the last NT
        Span nonterminalSourceSpan = new Span(lastTerminalIndex, sourceSpan.end);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
        if (logger.isLoggable(Level.FINEST)) logger.finest("Consistent target span " + nonterminalTargetSpan + " for NT source span " + nonterminalSourceSpan);


        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;

      }
    }
   
    boolean foundAlignedTerminal = false;
View Full Code Here

    if (!sourcePhrase.startsWithNonterminal() && !sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 1: Source phrase !startsWithNT && !endsWithNT");
     
      // Get target span
      Span sourceSpan = sourcePhrase.getSpan(sourcePhraseIndex);

      Span targetSpan = alignments.getConsistentTargetSpan(sourceSpan);
     
      // If target span and source span are consistent
      if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {
       
        // Construct a translation
        HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, sourceSpan, targetSpan, false, false);
       
        if (translation != null) {
          if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 1: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + sourceSpan);

          return translation;
        } else if (logger.isLoggable(Level.FINER)) {
          logger.finer("No valid translation returned from attempt to construct translation for source span " + sourceSpan + ", target span " + targetSpan);
        }
       
      }
     
    }
   
    // Case 2: If sourcePhrase startsWithNT && !endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && !sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 2: Source phrase startsWithNT && !endsWithNT");
     
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal starts one word before the first source terminal
      Span possibleSourceSpan = new Span(startOfTerminalSequence-1, endOfTerminalSequence);
     
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence &&
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan &&
          endOfTerminalSequence-possibleSourceSpan.start<=maxPhraseSpan) {
       
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, false);

          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 2: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);

            return translation;
          }

        }
       
        possibleSourceSpan.start--;
       
      }
     
    }
   
    // Case 3: If sourcePhrase !startsWithNT && endsWithNT
    else if (!sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 3: Source phrase !startsWithNT && endsWithNT");
     
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sourcePhrase.getSentenceNumber(sourcePhraseIndex));
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal starts one word after the last source terminal
      Span possibleSourceSpan =
        new Span(startOfTerminalSequence, endOfTerminalSequence+1);
       
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.end <= endOfSentence &&
          possibleSourceSpan.end - endOfTerminalSequence <= maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
         
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, false, true);

          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 3: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);

            return translation;
          }

        }
       
        possibleSourceSpan.end++;
       
      }
     
    }
   
    // Case 4: If sourcePhrase startsWithNT && endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
     
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 4: Source phrase startsWithNT && endsWithNT");
     
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
     
      // Start by assuming the initial source nonterminal
      //   starts one word before the first source terminal and
      //   ends one word after the last source terminal
      Span possibleSourceSpan =
        new Span(startOfTerminalSequence-1, endOfTerminalSequence+1);
       
      // Loop over all legal source spans
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence &&
          possibleSourceSpan.end <= endOfSentence &&
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan &&
          possibleSourceSpan.end-endOfTerminalSequence<=maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
   
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);

        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {

          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, true);

          if (translation != null) {
View Full Code Here

    this.requireTightSpans = requireTightSpans;
  }
 
  /* See Javadoc for Alignments interface. */
  public Span getConsistentTargetSpan(Span sourceSpan) {
    Span targetSpan = getAlignedTargetSpan(sourceSpan);
   
    if (targetSpan.start == UNALIGNED) return null;
   
    // check back to see what sourceSpan the targetSpan
    // aligns back to, so that we can check that it's
    // within bounds
    Span correspondingSourceSpan = getAlignedSourceSpan(targetSpan.start, targetSpan.end);
   
    if (correspondingSourceSpan.start < sourceSpan.start
        || correspondingSourceSpan.end > sourceSpan.end) {
      return null;
    } else {
View Full Code Here

       
    int[] sourceIndices = getSourcePoints(sentenceID, normalizedTargetStartIndex, normalizedTargetEndIndex);
   
    if (sourceIndices==null || sourceIndices.length==0) {
   
      return new Span(UNALIGNED, UNALIGNED);
   
    } else {
   
      int startSourceIndex = sourceOffset + sourceIndices[0];
      int endSourceIndex = sourceOffset + sourceIndices[sourceIndices.length-1]+1;
     
      return new Span(startSourceIndex, endSourceIndex);
     
    }
   
  }
View Full Code Here

   
    if (targetIndices==null || targetIndices.length==0 || (requireTightSpans && (
        startPoints==null || startPoints.length==0 ||
        endPoints==null || endPoints.length==0))) {
   
      return new Span(UNALIGNED, UNALIGNED);
   
    } else {
   
      int startTargetIndex = targetOffset + targetIndices[0];
      int endTargetIndex = targetOffset + targetIndices[targetIndices.length-1]+1;
     
      return new Span(startTargetIndex, endTargetIndex);
    }
  }
View Full Code Here

    List<Span> terminalSpans = new ArrayList<Span>();
    {
      int possibleStart = span.start;

      int nonterminalIndex = 0;
      Span nonterminal = span;

      for (LabeledSpan labeledNTSpan : nonterminalSpans) {

        nonterminal = labeledNTSpan.getSpan();

        if (nonterminal.start > possibleStart) {
          terminalSpans.add(new Span(possibleStart, nonterminal.start));
        }

        possibleStart = nonterminal.end;

        nonterminalIndex++;
      }

      if (span.end > possibleStart) {
        terminalSpans.add(new Span(possibleStart, span.end));
      }
    }

    // Initialize the sequence arrays
    this.terminalSequenceStartIndices = new int[terminalSpans.size()];
View Full Code Here

//        Pattern     pattern = new Pattern(vocab, vocab.getIDs("de sesiones del parlamento europeo"));
//        int[]       terminalSequenceStartIndices = {4};
//        int[]       terminalSequenceEndIndices = {9};
//        int         length = 5;

        Span span = new Span(4,9);
       
        int[] words = vocab.getIDs("de sesiones del parlamento europeo");
        List<LabeledSpan> labeledSpans = Collections.<LabeledSpan>emptyList();
       
        HierarchicalPhrase phrase =
          new HierarchicalPhrase(
              words,
              span,
              labeledSpans,
              sourceCorpusArray);
       
//        HierarchicalPhrase phrase =
//          new HierarchicalPhrase(
//              vocab.getIDs("de sesiones del parlamento europeo"),
//              terminalSequenceStartIndices,
//              terminalSequenceEndIndices,
//              sourceCorpusArray,
//              length);

        Assert.assertFalse(phrase.containsTerminalAt(0));
        Assert.assertFalse(phrase.containsTerminalAt(1));
        Assert.assertFalse(phrase.containsTerminalAt(2));
        Assert.assertFalse(phrase.containsTerminalAt(3));

        Assert.assertTrue(phrase.containsTerminalAt(4));
        Assert.assertTrue(phrase.containsTerminalAt(5));
        Assert.assertTrue(phrase.containsTerminalAt(6));
        Assert.assertTrue(phrase.containsTerminalAt(7));
        Assert.assertTrue(phrase.containsTerminalAt(8));

        Assert.assertFalse(phrase.containsTerminalAt(9));
        Assert.assertFalse(phrase.containsTerminalAt(10));
        Assert.assertFalse(phrase.containsTerminalAt(11));

        Assert.assertFalse(phrase.containsTerminalAt(Integer.MAX_VALUE));
        Assert.assertFalse(phrase.containsTerminalAt(-1));
      }
     
      {
//        Pattern     pattern = new Pattern(vocab, vocab.getIDs(","));
//        int[]       terminalSequenceStartIndices = {9};
//        int[]       terminalSequenceEndIndices = {10};
//        int         length = 1;

        HierarchicalPhrase phrase =
          new HierarchicalPhrase(
              vocab.getIDs(","),
              new Span(9,10),
              Collections.<LabeledSpan>emptyList(),
              sourceCorpusArray);
       
//        HierarchicalPhrase phrase =
//          new HierarchicalPhrase(
View Full Code Here

    int start = this.getStartPosition(phraseIndex, 0);//this.terminalSequenceStartIndices[nthPhraseIndex+0];
    int lastStart = this.getStartPosition(phraseIndex, lastIndex);//this.terminalSequenceStartIndices[nthPhraseIndex+lastIndex];
    int lastLength = this.terminalSequenceLengths[lastIndex];
    int end = lastStart + lastLength;   
   
    return new Span(start, end);
  }
View Full Code Here

TOP

Related Classes of joshua.corpus.Span

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.