if (interestingWordNeighbors > 0)
printWordNeighbors(dateString, fdTri);
} catch (IOException ioe) {
// rethrow
throw new IOError(ioe);
}
// Pick the earlier start time available as the new starting
// time for the s-space
assert futureStartTimes.size() > 0;
Long ssStart = new TreeSet<Long>(futureStartTimes).first();
futureStartTimes.clear();
// last update the date with the new time
curSSpaceStartTime.set(ssStart);
}
};
// barrier for document processing threads. When their next document is
// outside of the time range, the immediately increase the release on
// this semaphore and lock on the an object while the serialization
// thread writes out the current time span's .sspace
final CyclicBarrier exceededTimeSpanBarrier =
new CyclicBarrier(numThreads, serializeTimeSpan);
for (int i = 0; i < numThreads; ++i) {
Thread processingThread = new Thread() {
public void run() {
// repeatedly try to process documents while some still
// remain
while (docIter.hasNext()) {
TemporalDocument doc = docIter.next();
int docNumber = count.incrementAndGet();
long docTime = doc.timeStamp();
// special case for first document
if (docNumber == 1) {
curSSpaceStartTime.set(docTime);
startBarrier.set(true);
}
// Spin until the Thread with the first document
// sets the initial starting document time. Note
// that we spin here instead of block, because this
// is expected that another thread will immediately
// set this and so it will be a quick no-op
while (startBarrier.get() == false)
;
// Check whether the time for this document would
// exceed the maximum time span for any TRI
// partition. Loop to ensure that if this thread
// does loop and another thread has an earlier time
// that would cause this thread's time span to
// exceeds the other thread's time period, then this
// thread will block and loop again.
while (!timeSpan.insideRange(
curSSpaceStartTime.get(), docTime)) {
try {
// notify the barrier that this Thread is
// now processing a document in the next
// time span and so the serialization thread
// should write the .sspace to disk. In
// addition, enqueue the time for this
// document so the serialization thread can
// reset the correct s-sspace start time
futureStartTimes.offer(docTime);
exceededTimeSpanBarrier.await();
} catch (InterruptedException ex) {
return;
} catch (BrokenBarrierException ex) {
return;
}
}
try {
fdTri.processDocument(doc.reader());
} catch (IOException ioe) {
// rethrow
throw new IOError(ioe);
}
LOGGER.fine("parsed document #" + docNumber);
}
}
};