// http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
return Math.pow(1.0 - Math.exp((double) -numHashes * numElements / vectorSize), numHashes);
}
private static BloomFilter mergeBloomParts(String tapPath, long numBloomBits, long splitSize, int numBloomHashes, long numElems) throws IOException {
FixedSizeBitSet bitSet = new FixedSizeBitSet(numBloomBits);
if (FileSystemHelper.getFS().exists(new Path(tapPath))) {
Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), tapPath);
TupleEntryIterator itr = tap.openForRead(CascadingUtil.get().getFlowProcess());
while (itr.hasNext()) {
TupleEntry cur = itr.next();
long split = cur.getLong(0);
FixedSizeBitSet curSet = new FixedSizeBitSet(splitSize, ((BytesWritable) cur.getObject(1)).getBytes());
for (long i = 0; i < curSet.numBits(); i++) {
if (curSet.get(i)) {
bitSet.set(split * splitSize + i);
}
}
}
itr.close();