Examples of org.apache.hadoop.hbase.util.CompoundBloomFilter

org.apache.hadoop.hbase.util.CompoundBloomFilter
A Bloom filter implementation built on top of {@link ByteBloomFilter}, encapsulating a set of fixed-size Bloom filters written out at the time of {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the datablock stream, and loaded on demand at query time. This class only provides reading capabilities.

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
      try {
        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
            "enabled" : "disabled");
        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
        cbf.enableTestingStats();
        int numFalsePos = 0;
        Random rand = new Random(EVALUATION_SEED);
        int nTrials = NUM_KV[t] * 10;
        for (int i = 0; i < nTrials; ++i) {
          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
          if (isInBloom(scanner, query, bt, rand)) {
            numFalsePos += 1;
          }
        }
        double falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.debug(String.format(testIdMsg
            + " False positives: %d out of %d (%f)",
            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);


        // Check for obvious Bloom filter crashes.
        assertTrue("False positive is too high: " + falsePosRate + " (greater "
            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
            falsePosRate < TOO_HIGH_ERROR_RATE);


        // Now a more precise check to see if the false positive rate is not
        // too high. The reason we use a relaxed restriction for the real-world
        // case as opposed to the "fake lookup" case is that our hash functions
        // are not completely independent.


        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
            fakeLookupModeStr);


        // For checking the lower bound we need to eliminate the last chunk,
        // because it is frequently smaller and the false positive rate in it
        // is too low. This does not help if there is only one under-sized
        // chunk, though.
        int nChunks = cbf.getNumChunks();
        if (nChunks > 1) {
          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
          falsePosRate = numFalsePos * 1.0 / nTrials;
          LOG.info(testIdMsg + " False positive rate without last chunk is " +
              falsePosRate + fakeLookupModeStr);
        }

View Full Code Here

TOP

Related Classes of org.apache.hadoop.hbase.util.CompoundBloomFilter

org.apache.hadoop.hbase.io.hfile.HFileBlock

org.apache.hadoop.hbase.regionserver.TestCompoundBloomFilter

java.nio.ByteBuffer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.