Package brickhouse.udf.bloom

Source Code of brickhouse.udf.bloom.BloomUDAF

package brickhouse.udf.bloom;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/


import java.io.IOException;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.util.bloom.Filter;
import org.apache.hadoop.util.bloom.Key;
import org.apache.log4j.Logger;


/**
*   Construct a BloomFilter by aggregating on keys
*  
*   Uses hadoop util BloomFilter class
*  Use with bloom_contains( key, bloomfile );
*  insert overwrite local directory bloomfile
*  select bloom( ks_uid )
*   from big_table
*    where premise = true;
*   
*   add file bloomfile;
*  
*  select ks_uid
*  from other_big_table
*  where bloom_contains( key, distributed_bloom('bloomfile') );
*   
*  
* @author jeromebanks
*
*/
@Description(
     name = "bloom",
     value =  " Constructs a BloomFilter by aggregating a set of keys \n " +
              "_FUNC_(string key) \n"
    )
public class BloomUDAF extends UDAF {
  private static final Logger LOG = Logger.getLogger( BloomUDAF.class);
  //// Convert to GenericUDAF .. non-generic is broken ..
 

  public static class BloomUDAFEvaluator implements UDAFEvaluator {
    private Filter bloomFilter;

    /*
     */
    public void init() {
      bloomFilter =  BloomFactory.NewBloomInstance();
      /**
      try {
        ///LOG.info("INIT BLOOM " + BloomFactory.WriteBloomToString(bloomFilter));
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      **/
    }

   
    public boolean iterate( String key) {
      if( key != null) {
        if( bloomFilter == null) {
          init();
        }
        bloomFilter.add( new Key(key.getBytes()));
       
        /**
        try {
        ///LOG.info( "BloomFilter is " + BloomFactory.WriteBloomToString(bloomFilter ) + " after adding Key " +key);
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      **/
       
      }
      return true;
    }
   
    public String terminatePartial() throws HiveException {
      /**
      try {
        ///LOG.info(" Terminate Partial " + BloomFactory.WriteBloomToString(bloomFilter) );
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      **/
      return terminate();
    }

    public String terminate() throws HiveException {
      try {
        if( bloomFilter != null) {
          return BloomFactory.WriteBloomToString(bloomFilter);
        } else {
          return null;
        }
      } catch (IOException e) {
        LOG.error(" Error while evaluating Bloom ", e);
        throw new HiveException( "Error while evaluating Bloom");
      }
    }
   
    public boolean merge( String partial) {
      try {
        if( bloomFilter == null) {
          bloomFilter = BloomFactory.ReadBloomFromString(partial);
          ///LOG.info(" read bloom from partial " + BloomFactory.WriteBloomToString(bloomFilter));
          return true;
        } else{
          ///LOG.info(" ORng with merged before " + BloomFactory.WriteBloomToString(bloomFilter) );
          Filter other = BloomFactory.ReadBloomFromString(partial);
          ///LOG.info("ORng with merged other " + BloomFactory.WriteBloomToString(other) );
          bloomFilter.or(other);
          ///LOG.info(" ORing with merged after " + BloomFactory.WriteBloomToString(bloomFilter) );
          return true;
        }
      } catch (IOException e) {
        LOG.error(" Error while evaluating Bloom ", e);
        return false;
      }
    }

  }

}
TOP

Related Classes of brickhouse.udf.bloom.BloomUDAF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.