List<String> duplicatesFirstTraversal = new ArrayList<String>(100);
// Get the first set of documents.
tm.setBatchHint(20);
DocumentList docList = tm.startTraversal();
while (docList != null) {
Document doc = null;
while (null != (doc = docList.nextDocument())) {
String docId = doc.findProperty(SpiConstants.PROPNAME_DOCID).
nextValue().toString();
if (!docIdListFirstTraversal.add(docId)) {
duplicatesFirstTraversal.add(docId);
}
}
String checkpoint = docList.checkpoint();
assertNotNull("Checkpoint was null", checkpoint);
// Resume traversal.
tm.setBatchHint(20);
docList = tm.resumeTraversal(checkpoint);
}
assertTrue("No documents traversed", docIdListFirstTraversal.size() > 0);
// TODO: do we want to investigate the presence of duplicates?
//if (duplicatesFirstTraversal.size() > 0) {
// System.out.println("Found duplicates during first traversal: " +
// duplicatesFirstTraversal.size());
//}
Set<String> docIdListSecondTraversal = new HashSet<String>(100);
List<String> duplicatesSecondTraversal = new ArrayList<String>(100);
// Get the second set of documents.
tm.setBatchHint(20);
docList = tm.startTraversal();
while (docList != null) {
Document doc = null;
while (null != (doc = docList.nextDocument())) {
String docId = doc.findProperty(SpiConstants.PROPNAME_DOCID).
nextValue().toString();
if (!docIdListSecondTraversal.add(docId)) {
duplicatesSecondTraversal.add(docId);
}
}