package net.agkn.hll;
/*
* Copyright 2013 Aggregate Knowledge, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.powermock.reflect.Whitebox.getInternalState;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import java.util.HashSet;
import java.util.Random;
import net.agkn.hll.serialization.ISchemaVersion;
import net.agkn.hll.serialization.SerializationUtil;
import org.testng.annotations.Test;
/**
* Tests {@link HLL} of type {@link HLLType#EXPLICIT}.
*
* @author timon
*/
public class ExplicitHLLTest {
/**
* Tests basic set semantics of {@link HLL#addRaw(long)}.
*/
@Test
public void addBasicTest() {
{ // Adding a single positive value to an empty set should work.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L/*positive*/);
assertEquals(hll.cardinality(), 1L);
}
{ // Adding a single negative value to an empty set should work.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(-1L/*negative*/);
assertEquals(hll.cardinality(), 1L);
}
{ // Adding a duplicate value to a set should be a no-op.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L/*positive*/);
assertEquals(hll.cardinality(), 1L/*arbitrary*/);
assertEquals(hll.cardinality(), 1L/*dupe*/);
}
}
// ------------------------------------------------------------------------
/**
* Tests {@link HLL#union(HLL)}.
*/
@Test
public void unionTest() {
{// Unioning two distinct sets should work
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
hllA.addRaw(1L);
hllA.addRaw(2L);
hllB.addRaw(3L);
hllA.union(hllB);
assertEquals(hllA.cardinality(), 3);
}
{// Unioning two sets whose union doesn't exceed the cardinality cap should not promote
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
hllA.addRaw(1L);
hllA.addRaw(2L);
hllB.addRaw(1L);
hllA.union(hllB);
assertEquals(hllA.cardinality(), 2);
}
{// unioning two sets whose union exceeds the cardinality cap should promote
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
// fill up sets to explicitThreshold
for(long i=0; i<128/*explicitThreshold*/; i++) {
hllA.addRaw(i);
hllB.addRaw(i + 128);
}
hllA.union(hllB);
assertEquals(hllA.getType(), HLLType.SPARSE);
}
}
// ------------------------------------------------------------------------
/**
* Tests {@link HLL#clear()}
*/
@Test
public void clearTest() {
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L);
assertEquals(hll.cardinality(), 1L);
hll.clear();
assertEquals(hll.cardinality(), 0L);
}
// ------------------------------------------------------------------------
/**
* Tests {@link LongSetSlab#toBytes(int, ISchemaVersion)} and
* {@link LongSetSlab#fromBytes(int, byte[], ISchemaVersion)}.
*/
@Test
public void toFromBytesTest() {
final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
final HLLType type = HLLType.EXPLICIT;
final int padding = schemaVersion.paddingBytes(type);
final int bytesPerWord = 8;
{// Should work on an empty set
final HLL hll = newHLL(128/*arbitrary*/);
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding/*no elements, just padding*/);
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
{// Should work on a partially filled set
final HLL hll = newHLL(128/*arbitrary*/);
for(int i=0; i<3; i++) {
hll.addRaw(i);
}
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding + (bytesPerWord * 3/*elements*/));
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
{// Should work on a full set
final int explicitThreshold = 128;
final HLL hll = newHLL(explicitThreshold);
for(int i=0; i<explicitThreshold; i++) {
hll.addRaw(27 + i/*arbitrary*/);
}
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding + (bytesPerWord * explicitThreshold/*elements*/));
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
}
// ------------------------------------------------------------------------
/**
* Tests correctness against {@link java.util.HashSet}.
*/
@Test
public void randomValuesTest() {
final int explicitThreshold = 4096;
final HashSet<Long> canonical = new HashSet<Long>();
final HLL hll = newHLL(explicitThreshold);
final long seed = 1L/*constant so results are reproducible*/;
final Random random = new Random(seed);
for(int i=0;i<explicitThreshold;i++){
long randomLong = random.nextLong();
canonical.add(new Long(randomLong));
hll.addRaw(randomLong);
}
final int canonicalCardinality = canonical.size();
assertEquals(hll.cardinality(), canonicalCardinality);
}
// ------------------------------------------------------------------------
/**
* Tests promotion to {@link HLLType#SPARSE} and {@link HLLType#FULL}.
*/
@Test
public void promotionTest() {
{ // locally scoped for sanity
final int explicitThreshold = 128;
final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold*/, HLLType.EXPLICIT);
for(int i=0;i<explicitThreshold + 1;i++){
hll.addRaw(i);
}
assertEquals(hll.getType(), HLLType.SPARSE);
}
{ // locally scoped for sanity
final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, 4/*expthresh => explicitThreshold = 8*/, false/*sparseon*/, HLLType.EXPLICIT);
for(int i=0;i<9/* > explicitThreshold */;i++){
hll.addRaw(i);
}
assertEquals(hll.getType(), HLLType.FULL);
}
}
// ************************************************************************
// assertion helpers
/**
* Asserts that values in both sets are exactly equal.
*/
private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
final LongOpenHashSet internalSetA = (LongOpenHashSet)getInternalState(hllA, "explicitStorage");
final LongOpenHashSet internalSetB = (LongOpenHashSet)getInternalState(hllB, "explicitStorage");
assertTrue(internalSetA.equals(internalSetB));
}
/**
* Builds a {@link HLLType#EXPLICIT} {@link HLL} instance with the specified
* explicit threshold.
*
* @param explicitThreshold explicit threshold to use for the constructed
* {@link HLL}. This must be greater than zero.
* @return a default-sized {@link HLLType#EXPLICIT} empty {@link HLL} instance.
* This will never be <code>null</code>.
*/
private static HLL newHLL(final int explicitThreshold) {
return new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold, arbitrary, unused*/, HLLType.EXPLICIT);
}
}