for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
try {
String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
"enabled" : "disabled");
CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
cbf.enableTestingStats();
int numFalsePos = 0;
Random rand = new Random(EVALUATION_SEED);
int nTrials = NUM_KV[t] * 10;
for (int i = 0; i < nTrials; ++i) {
byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
if (isInBloom(scanner, query, bt, rand)) {
numFalsePos += 1;
}
}
double falsePosRate = numFalsePos * 1.0 / nTrials;
LOG.debug(String.format(testIdMsg
+ " False positives: %d out of %d (%f)",
numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);
// Check for obvious Bloom filter crashes.
assertTrue("False positive is too high: " + falsePosRate + " (greater "
+ "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
falsePosRate < TOO_HIGH_ERROR_RATE);
// Now a more precise check to see if the false positive rate is not
// too high. The reason we use a relaxed restriction for the real-world
// case as opposed to the "fake lookup" case is that our hash functions
// are not completely independent.
double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
fakeLookupModeStr);
// For checking the lower bound we need to eliminate the last chunk,
// because it is frequently smaller and the false positive rate in it
// is too low. This does not help if there is only one under-sized
// chunk, though.
int nChunks = cbf.getNumChunks();
if (nChunks > 1) {
numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
falsePosRate = numFalsePos * 1.0 / nTrials;
LOG.info(testIdMsg + " False positive rate without last chunk is " +
falsePosRate + fakeLookupModeStr);
}