* efficiently discovered.
*/
public void getChunkFingerprints(InputStream is, ChunkVisitor visitor) throws IOException {
// windowing fingerprinter for finding chunk boundaries. this is only
// reset at the beginning of the file
final RabinFingerprintLong window = newWindowedFingerprint();
// fingerprinter for chunks. this is reset after each chunk
final RabinFingerprintLong finger = newFingerprint();
// counters
long chunkStart = 0;
long chunkEnd = 0;
/*
* fingerprint one byte at a time. we have to use this granularity to
* ensure that, for example, a one byte offset at the beginning of the
* file won't effect the chunk boundaries
*/
for (byte b : ByteStreams.toByteArray(is)) {
// push byte into fingerprints
window.pushByte(b);
finger.pushByte(b);
chunkEnd++;
/*
* if we've reached a boundary (which we will at some probability
* based on the boundary pattern and the size of the fingerprint
* window), we store the current chunk fingerprint and reset the
* chunk fingerprinter.
*/
if (boundaryDetector.isBoundary(window)) {
visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
finger.reset();
// store last chunk offset
chunkStart = chunkEnd;
}
}
// final chunk
visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
}