throws IOException
{
final String docno = key.toString();
currentReporter = reporter;
reporter.setStatus("Currently indexing "+docno);
final Document doc = value.getObject();
if (start) {
splitnum = value.getSplitIndex();
System.out.println(splitnum);
//RunData.writeInt(splitnum);
start = false;
}
this.outputPostingListCollector = _outputPostingListCollector;
/* setup for parsing */
createDocumentPostings();
String term;//term we're currently processing
numOfTokensInDocument = 0;
//numberOfDocuments++;
//get each term in the document
while (!doc.endOfDocument()) {
reporter.progress();
if ((term = doc.getNextTerm())!=null && !term.equals("")) {
termFields = doc.getFields();
/* pass term into TermPipeline (stop, stem etc) */
pipeline_first.processTerm(term);
/* the term pipeline will eventually add the term to this object. */
}
if (MAX_TOKENS_IN_DOCUMENT > 0 &&
numOfTokensInDocument > MAX_TOKENS_IN_DOCUMENT)
break;
}
//if we didn't index all tokens from document,
//we need tocurrentId get to the end of the document.
while (!doc.endOfDocument()){
doc.getNextTerm();
}
/* we now have all terms in the DocumentTree, so we save the document tree */
if (termsInDocument.getDocumentLength() == 0)
{ /* this document is empty, add the minimum to the document index */
// Nothing in the ifile
indexEmpty(doc.getAllProperties());
}
else
{ /* index this document */
try{
indexDocument(doc.getAllProperties(), termsInDocument);
numberOfTokens += numOfTokensInDocument;
reporter.incrCounter(Counters.INDEXED_TOKENS, numOfTokensInDocument);
reporter.incrCounter(Counters.INDEXED_POINTERS, termsInDocument.getNumberOfPointers());
} catch (IOException ioe) {
throw ioe;