Package joshua.corpus

Examples of joshua.corpus.LabeledSpan


        Span nonterminalSourceSpan = new Span(sourceSpan.start, firstTerminalIndex);
        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        // the pattern length will be reduced by the length of the non-terminal, and increased by 1 for the NT itself.
        patternSize = patternSize - nonterminalTargetSpan.size() +1;
      }
    }
   
    // Process all internal nonterminals
    for (int i=0, n=sourcePhrases.getNumberOfTerminalSequences()-1; i<n; i++) {
     
      int nextStartIndex =
        sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, i+1);
     
      int currentEndIndex =
        sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, i);
     
      if (nextStartIndex - currentEndIndex < minNonterminalSpan) {
       
        return null;
       
      } else {
       
        Span nonterminalSourceSpan = new Span(currentEndIndex, nextStartIndex);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);

        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;
       
      }
    }
     
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceEndsWithNT) {
     
      int lastTerminalIndex = sourcePhrases.getLastTerminalIndex(sourcePhraseIndex);
     
      if (sourceSpan.end - lastTerminalIndex < minNonterminalSpan) {
       
        return null;
       
      } else {

        // If the source phrase ends with NT, then we need to calculate the span of the last NT
        Span nonterminalSourceSpan = new Span(lastTerminalIndex, sourceSpan.end);

        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
        if (logger.isLoggable(Level.FINEST)) logger.finest("Consistent target span " + nonterminalTargetSpan + " for NT source span " + nonterminalSourceSpan);


        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;

        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;

      }
    }
   
    boolean foundAlignedTerminal = false;
   
    // Create the pattern...
    int[] words = new int[patternSize];
    int patterCounter = 0;
   
    Collections.sort(targetNTSpans);
   
    if (targetNTSpans.get(0).getSpan().start == targetSpan.start) {
     
      int ntCumulativeSpan = 0;
     
      for (LabeledSpan span : targetNTSpans) {
        ntCumulativeSpan += span.size();
      }
     
      if (ntCumulativeSpan >= targetSpan.size()) {
        return null;
      }
     
    } else {
      // if we don't start with a non-terminal, then write out all the words
      // until we get to the first non-terminal
      for (int i = targetSpan.start; i < targetNTSpans.get(0).getSpan().start; i++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(i);
        patterCounter++;
      }
    }

    // add the first non-terminal
    words[patterCounter] = targetNTSpans.get(0).getLabel();
    patterCounter++;
   
    // add everything until the final non-terminal
    for(int i = 1; i < targetNTSpans.size(); i++) {
      LabeledSpan NT1 = targetNTSpans.get(i-1);
      LabeledSpan NT2 = targetNTSpans.get(i);
     
      for(int j = NT1.getSpan().end; j < NT2.getSpan().start; j++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(j, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(j);
        patterCounter++;
      }
      words[patterCounter] = NT2.getLabel();
      patterCounter++;
    }
   
    // if we don't end with a non-terminal, then write out all remaining words
    if(targetNTSpans.get(targetNTSpans.size()-1).getSpan().end != targetSpan.end) {
View Full Code Here

TOP

Related Classes of joshua.corpus.LabeledSpan

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.