// Compute LCS
StringComparator left = new StringComparator(pl.toString());
StringComparator right = new StringComparator(pr.toString()); settings = new;
RangeDifference[] rd = RangeDifferencer.findRanges(settings, left, right);
// Debug Output
if (log.isDebugEnabled()) {
log.debug("\n\n RangeDifferences \n\n");
for (int x=0; x<rd.length; x++) {
log.debug (
toRangeString( left, rd[x].leftStart(), rd[x].leftLength(), true )
+ rd[x].kindString()
+ toRangeString( right, rd[x].rightStart(), rd[x].rightLength(), true ) );
// Now build appropriate replacement paragraph content
List<Object> pLeftReplacement = new ArrayList<Object>();
List<Object> pRightReplacement = new ArrayList<Object>();
// Which of the _existing_ w:r we are up to
int pLeftIndex = 0;
int pRightIndex = 0;
int[] leftCounts = getParagraphRunTextWordCounts(pl);
// StringBuilder debug = new StringBuilder();
// debug.append("{ ");
// for (int i=0; i < leftCounts.length; i++) {
// try {
// debug.append( leftCounts[i] + ", ");
// } catch (RuntimeException e) {
// }
// }
// System.out.println(debug);
int[] rightCounts = getParagraphRunTextWordCounts(pr);
int leftWordCounter = -1;
int rightWordCounter = -1;
for (int x=0; x<rd.length; x++) {
// The original runs are always longer than
// each rd
// We will definitely require a new run
// structure for each side
R currentLeftStructure = createRunStructure("",
pl, pLeftIndex );
R currentRightStructure = createRunStructure("",
pr, pRightIndex );
if (rd[x].kind() == RangeDifference.NOCHANGE) {
// These are part of the string LCS,
// (though they might not be part of the
// XML LCS once we've added their rPr
// back in.)
// This is where we focus our efforts.
// Process the words in rd[x] one word at a time
for (int i=rd[x].leftStart(); // left and right are identical
i<(rd[x].leftStart()+rd[x].leftLength()); i++) {
// Our objective is to ensure that both the
// left and right paragraphs end up with
// matching w:r/w:t boundaries.
// So when either of the existing paragraphs
// contains a boundary, this need to be inserted
// in both results
String word = left.getLeaf(i);
// log.debug(word);
if ( leftWordCounter < sum(leftCounts, 0, pLeftIndex)
&& rightWordCounter < sum(rightCounts, 0, pRightIndex) ) {
// it is ok to insert into current w:t
addWord(currentLeftStructure, word);
addWord(currentRightStructure, word);
} else {
// log.debug("Hit boundary");
// which boundary have we hit?
if (leftWordCounter == sum(leftCounts, 0, pLeftIndex)
&& rightWordCounter == sum(rightCounts, 0, pRightIndex) ) {
// Quite likely, for example, same formatting in each
// We're now on to each paragraph's next w:t
} else if (leftWordCounter == sum(leftCounts, 0, pLeftIndex) ) {
// We're now on to the left paragraph's next w:t
} else {
// We're now on to the right paragraph's next w:t
currentLeftStructure = createRunStructure(word,
pl, pLeftIndex );
currentRightStructure = createRunStructure(word,
pr, pRightIndex );
} else if (rd[x].kind() == RangeDifference.CHANGE) {
// These aren't part of the string LCS,
// (so they shouldn't be part of
// the XML LCS)
// All we need to do is make sure that
// the input is round tripped.
// Left side: Process the words in rd[x] one word at a time
// NB, can't just copy existing runs into the output
log.debug(".. left side");
for (int i=rd[x].leftStart();
i<(rd[x].leftStart()+rd[x].leftLength()); i++) {
String word = left.getLeaf(i);
// log.debug(word);
if ( leftWordCounter < sum(leftCounts, 0, pLeftIndex) ) {
// it is ok to insert into left's current w:t
addWord(currentLeftStructure, word);
} else {
// boundary hit
// We're now on to the left paragraph's next w:t
currentLeftStructure = createRunStructure(word,
pl, pLeftIndex );
// Right side
log.debug(".. right side");
for (int i=rd[x].rightStart();
i<(rd[x].rightStart()+rd[x].rightLength()); i++) {
String word = right.getLeaf(i);
if ( rightWordCounter < sum(rightCounts, 0, pRightIndex) ) {
// it is ok to insert into right's current w:t