Package brickhouse.udf.hll

Source Code of brickhouse.udf.hll.HyperLogLogUDAFTest

package brickhouse.udf.hll;

import java.io.IOException;
import java.util.HashMap;
import java.util.UUID;

import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.log4j.Logger;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;

import junit.framework.Assert;

@Ignore("not ready yet")
public class HyperLogLogUDAFTest {
  private static final Logger LOG = Logger.getLogger(HyperLogLogUDAFTest.class);
 
  private static String TEST_HEADER = "\n************************************************************************\nRunning Test: ";
 
  @Test
  public void testSingleRowNullReturnsNull() throws HiveException {
    LOG.info(TEST_HEADER + "testSingleRowNullReturnsNull");
   
    HyperLogLogUDAF udaf = new HyperLogLogUDAF();
    ObjectInspector[] inputOiList = new ObjectInspector[] {
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaIntObjectInspector
        };
   
    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputOiList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
   
    Mode m = Mode.COMPLETE;
    ObjectInspector outputOi = udafEvaluator.init(m, inputOiList);
   
    Object[] parameters = new Object[] { null , 12 };
    AggregationBuffer agg = udafEvaluator.getNewAggregationBuffer();
    udafEvaluator.reset(agg);
    udafEvaluator.iterate(agg, parameters);
    Object result = udafEvaluator.terminate(agg);
   
    LOG.info("result = " + result);
   
    Assert.assertNull(result);
  }
 
  @Test
  public void testMultipleRowNullReturnsNull() throws HiveException {
    LOG.info(TEST_HEADER + "testMultipleRowNullReturnsNull");
   
    HyperLogLogUDAF udaf = new HyperLogLogUDAF();
    ObjectInspector[] inputOiList1 = new ObjectInspector[] {
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaIntObjectInspector
        };
   
    ObjectInspector[] inputOiList2 = new ObjectInspector[] {
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaIntObjectInspector
        };
   
    GenericUDAFParameterInfo paramInfo1 = new SimpleGenericUDAFParameterInfo(inputOiList1, false, false);
    GenericUDAFEvaluator udafEvaluator1 = udaf.getEvaluator(paramInfo1);
   
    GenericUDAFParameterInfo paramInfo2 = new SimpleGenericUDAFParameterInfo(inputOiList2, false, false);
    GenericUDAFEvaluator udafEvaluator2 = udaf.getEvaluator(paramInfo2);
   
    Mode m1 = Mode.PARTIAL1;
    ObjectInspector partialOutputOi1 = udafEvaluator1.init(m1, inputOiList1);
    AggregationBuffer agg1 = udafEvaluator1.getNewAggregationBuffer();
    udafEvaluator1.reset(agg1);
    udafEvaluator1.iterate(agg1, new Object[] { null , 12 });
    Object res1 = udafEvaluator1.terminate(agg1);
   
    Mode m2 = Mode.PARTIAL1;
    ObjectInspector partialOutputOi2 = udafEvaluator2.init(m2, inputOiList2);
    AggregationBuffer agg2 = udafEvaluator2.getNewAggregationBuffer();
    udafEvaluator2.reset(agg2);
    udafEvaluator2.iterate(agg2, new Object[] { null , 12 });
    Object res2 = udafEvaluator2.terminate(agg2);
   
    ObjectInspector finalOutputOi = udafEvaluator2.init(Mode.FINAL, new ObjectInspector[] {partialOutputOi1});
   
    AggregationBuffer agg3 = udafEvaluator2.getNewAggregationBuffer();
    udafEvaluator2.merge(agg3, agg1);
    udafEvaluator2.merge(agg3, agg2);
   
    Object result = udafEvaluator2.terminate(agg3);
   
    LOG.info("result = " + result);
   
    Assert.assertNull(result);
  }
 
  @Test
  public void testSingleRowNonNullReturnsNonNull() throws HiveException {
    LOG.info(TEST_HEADER + "testSingleRowNonNullReturnsNonNull");
   
    HyperLogLogUDAF udaf = new HyperLogLogUDAF();
    ObjectInspector[] inputOiList = new ObjectInspector[] {
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaIntObjectInspector
        };
   
    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputOiList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
   
    Mode m = Mode.COMPLETE;
    ObjectInspector finalOutputOi = udafEvaluator.init(m, inputOiList);
   
    Object[] parameters = new Object[] { "foo" , 12 };
    AggregationBuffer agg = udafEvaluator.getNewAggregationBuffer();
    udafEvaluator.reset(agg);
    udafEvaluator.iterate(agg, parameters);
    Object result = udafEvaluator.terminate(agg);
   
    LOG.info("result = " + result);
   
    Assert.assertNotNull(result);
  }
 
  private void testCardinalityEstimateWithinBounds(Integer precision, Long uniqueCount) throws HiveException, IOException {
    LOG.info("testCardinalityEstimateWithinBounds - precision = " + precision + " - uniqueCount = " + uniqueCount);
   
    HyperLogLogUDAF udaf = new HyperLogLogUDAF();
    ObjectInspector[] inputOiList = new ObjectInspector[] {
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaIntObjectInspector
        };
   
    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputOiList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
   
    Mode m = Mode.COMPLETE;
    ObjectInspector finalOutputOi = udafEvaluator.init(m, inputOiList);
   
    AggregationBuffer agg = udafEvaluator.getNewAggregationBuffer();
    udafEvaluator.reset(agg);
   
    String uuid;
    HashMap<String, Integer> h = new HashMap<String, Integer>();
    for (int i = 0; i < uniqueCount; i++) {
      uuid = UUID.randomUUID().toString();
      h.put(uuid, 1);
      udafEvaluator.iterate(agg, new Object[] { uuid , precision });
    }
   
    Object result = udafEvaluator.terminate(agg);
    Assert.assertNotNull(result);
   
    byte[] b = ((JavaBinaryObjectInspector) finalOutputOi).getPrimitiveJavaObject(result);
    HyperLogLogPlus hll = HyperLogLogPlus.Builder.build( b );
    Long cardEst = hll.cardinality();
   
    LOG.info("cardEst = " + cardEst);
   
    int actualUniques = h.keySet().size();
    LOG.info("actualUniques = " + actualUniques);
   
    Long absDiff = Math.abs(cardEst - actualUniques);
    LOG.info("absDiff = " + absDiff);
   
    Double relDiff = absDiff.doubleValue()/uniqueCount.doubleValue();
    LOG.info("relDiff = " + relDiff);
   
    Double maxError = 1.04d/Math.sqrt(Math.pow(2, precision));
    LOG.info("maxError = " + maxError);
   
    Assert.assertTrue( relDiff < maxError);
  }
 
  @Test
  public void testCardinalityEstimateWithinBounds12() throws HiveException, IOException {
    LOG.info(TEST_HEADER + "testCardinalityEstimateWithinBounds12");
   
    testCardinalityEstimateWithinBounds(12, 1000000L);
  }
 
  @Test
  public void testCardinalityEstimateWithinBounds16() throws HiveException, IOException {
    LOG.info(TEST_HEADER + "testCardinalityEstimateWithinBounds16");
   
    testCardinalityEstimateWithinBounds(16, 1000000L);
  }
 
}
TOP

Related Classes of brickhouse.udf.hll.HyperLogLogUDAFTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.