Span nonterminalSourceSpan = new Span(sourceSpan.start, firstTerminalIndex);
Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;
targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
ntIndex++;
// the pattern length will be reduced by the length of the non-terminal, and increased by 1 for the NT itself.
patternSize = patternSize - nonterminalTargetSpan.size() +1;
}
}
// Process all internal nonterminals
for (int i=0, n=sourcePhrases.getNumberOfTerminalSequences()-1; i<n; i++) {
int nextStartIndex =
sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, i+1);
int currentEndIndex =
sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, i);
if (nextStartIndex - currentEndIndex < minNonterminalSpan) {
return null;
} else {
Span nonterminalSourceSpan = new Span(currentEndIndex, nextStartIndex);
Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;
targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
ntIndex++;
patternSize = patternSize - nonterminalTargetSpan.size() + 1;
}
}
// If the source phrase starts with a nonterminal, we have to handle that NT as a special case
if (sourceEndsWithNT) {
int lastTerminalIndex = sourcePhrases.getLastTerminalIndex(sourcePhraseIndex);
if (sourceSpan.end - lastTerminalIndex < minNonterminalSpan) {
return null;
} else {
// If the source phrase ends with NT, then we need to calculate the span of the last NT
Span nonterminalSourceSpan = new Span(lastTerminalIndex, sourceSpan.end);
Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
if (logger.isLoggable(Level.FINEST)) logger.finest("Consistent target span " + nonterminalTargetSpan + " for NT source span " + nonterminalSourceSpan);
if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;
targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
ntIndex++;
patternSize = patternSize - nonterminalTargetSpan.size() + 1;
}
}
boolean foundAlignedTerminal = false;
// Create the pattern...
int[] words = new int[patternSize];
int patterCounter = 0;
Collections.sort(targetNTSpans);
if (targetNTSpans.get(0).getSpan().start == targetSpan.start) {
int ntCumulativeSpan = 0;
for (LabeledSpan span : targetNTSpans) {
ntCumulativeSpan += span.size();
}
if (ntCumulativeSpan >= targetSpan.size()) {
return null;
}
} else {
// if we don't start with a non-terminal, then write out all the words
// until we get to the first non-terminal
for (int i = targetSpan.start; i < targetNTSpans.get(0).getSpan().start; i++) {
if (!foundAlignedTerminal) {
foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
}
words[patterCounter] = targetCorpus.getWordID(i);
patterCounter++;
}
}
// add the first non-terminal
words[patterCounter] = targetNTSpans.get(0).getLabel();
patterCounter++;
// add everything until the final non-terminal
for(int i = 1; i < targetNTSpans.size(); i++) {
LabeledSpan NT1 = targetNTSpans.get(i-1);
LabeledSpan NT2 = targetNTSpans.get(i);
for(int j = NT1.getSpan().end; j < NT2.getSpan().start; j++) {
if (!foundAlignedTerminal) {
foundAlignedTerminal = alignments.hasAlignedTerminal(j, sourcePhrases, sourcePhraseIndex);
}
words[patterCounter] = targetCorpus.getWordID(j);
patterCounter++;
}
words[patterCounter] = NT2.getLabel();
patterCounter++;
}
// if we don't end with a non-terminal, then write out all remaining words
if(targetNTSpans.get(targetNTSpans.size()-1).getSpan().end != targetSpan.end) {