Package org.apache.hadoop.util.bloom

Examples of org.apache.hadoop.util.bloom.BloomFilter


   * m = - n ln p / (ln 2)^2
   *
   * 1.44 log_2 (1/e) = # of bits per inserted element
   **/
  public BloomSet(int nbits, int hashes) {
    bloom = new BloomFilter(nbits, hashes, hashType);
  }
View Full Code Here


  protected BloomFilter deserialize(byte[] serialized) {
    try {
      Preconditions.checkArgument(serialized != null);
      DataInputStream in = new DataInputStream(new ByteArrayInputStream(
          serialized));
      BloomFilter bloom = new BloomFilter(); // empty constructor
      bloom.readFields(in);
      return bloom;
    } catch (IOException e) {
      throw new IllegalArgumentException(e.getMessage());
    }
  }
View Full Code Here

        return keys;
      }
    };
    Map<String, BloomFilter> filterValues = BloomFilterFactory.createFilter(new Path(inputPath), filterFn).getValue();
    assertEquals(1, filterValues.size());
    BloomFilter filter = filterValues.get("shakes.txt");
    assertTrue(filter.membershipTest(new Key("Mcbeth".getBytes())));
    assertTrue(filter.membershipTest(new Key("apples".getBytes())));
  }
View Full Code Here

    protected BloomFilter bloomFilter;
    protected OutputCollector<NullWritable, BloomFilter> collector;
   
    public void configure(JobConf conf) {
      super.configure(conf);
      bloomFilter = new BloomFilter(
          conf.getInt(BF_SIZE, BF_DFT_SIZE),
          conf.getInt(BF_NUM_HASHES, BF_DFT_NUM_HASHES),
          conf.getInt(BF_HASH_TYPE, BF_DFT_HASH_TYPE));
    }
View Full Code Here

    String filename = "bfngrams/out/part-00000";
    FileSystem fs = FileSystem.get(URI.create(filename), conf);
    Path path = new Path(filename);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    NullWritable nullKey = NullWritable.get();
    BloomFilter bloomFilter = new BloomFilter();
    reader.next(nullKey, bloomFilter);
    reader.close();
   
    System.out.println(bloomFilter.toString());
   
    String[] egs = {
        "activities other",
        "membership organizations",
        "organizations elsewhere",
        "4 0",
        "elsewhere classified",
        "other membership",
        "0 activities",
        "20091128093155 4"
    };
   
    for(String eg : egs) {
      Key k = new Key(eg.getBytes());
      System.out.println(eg+"\t"+bloomFilter.membershipTest(k));
    }
   
  }
View Full Code Here

    String filterName = getConfiguration().get(CRUNCH_FILTER_NAME);
    emitter.emit(Pair.of(filterName, bloomFilter));
  }

  static BloomFilter initializeFilter(int size) {
    return new BloomFilter(size, 5, Hash.MURMUR_HASH);
  }
View Full Code Here

    }
    assertEquals("Analyzer produced too few terms", expected.length, pos);
  }

  private static Filter getFilter(String[] tokens) throws IOException {
    Filter filter = new BloomFilter(100,50, Hash.JENKINS_HASH);
    Key k = new Key();
    for (String s: tokens) {
      setKey(k,s);
      filter.add(k);
    }
    return filter;
  }
View Full Code Here

    }
    assertEquals("Analyzer produced too few terms", expected.length, pos);
  }

  private static Filter getFilter(String[] tokens) throws IOException {
    Filter filter = new BloomFilter(100,50, Hash.JENKINS_HASH);
    Key k = new Key();
    for (String s: tokens) {
      setKey(k,s);
      filter.add(k);
    }
    return filter;
  }
View Full Code Here

    DataFileStream<Object> reader =
        new DataFileStream<Object>(
            is, new GenericDatumReader<Object>());

    reader.hasNext();
    BloomFilter filter = new BloomFilter();
    AvroBytesRecord
        .fromGenericRecord((GenericRecord) reader.next(), filter);
    IOUtils.closeQuietly(is);
    IOUtils.closeQuietly(reader);
View Full Code Here

  public static Filter NewBloomInstance() {
    return NewBloomInstance( DEFAULT_NUM_ELEMENTS, DEFAULT_FALSE_POS_PROB);
  }

  static Filter NewVesselBloom() {
    return new BloomFilter();
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.bloom.BloomFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.