/*
* Copyright 2012, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.LinkBench;
import java.util.Properties;
import java.util.Random;
import junit.framework.TestCase;
import org.junit.Test;
import com.facebook.LinkBench.distributions.ProbabilityDistribution;
/**
* This test implements generic unit tests for different implementations of
* ProbabilityDistribution.
*
* Most of these tests are either sanity tests (check that output is within
* expected range and obeys basic invariants), and consistency tests
* (check that the output of two different methods is consistent).
*
* While these tests go a long way to checking the consistency of the
* behavior of the ProbabilityDistribution, it cannot check that the
* specific correct values are generated: it is helpful to implement
* additional tests for each concrete implementation.
*
* @author tarmstrong
*/
public abstract class DistributionTestBase extends TestCase {
protected abstract ProbabilityDistribution getDist();
protected Properties getDistParams() {
return new Properties();
}
/** Number of cdf checks to perform */
protected int cdfChecks() {
return 50000;
}
/** Number of pdf checks */
protected int pdfChecks() {
return 50000;
}
protected Bucketer getBucketer() {
return new UniformBucketer(cdfChecks());
}
/** Percentage difference between cdf and choose() to tolerate */
protected double tolerance() {
return 0.002;
}
public Random initRandom(String testName) {
long seed = System.currentTimeMillis();
System.err.println("Choose seed " + seed + " for test " + testName);
return new Random(seed);
}
/**
* Check a few invariants cdf should adhere to
*/
@Test
public void testCDFSanity() {
ProbabilityDistribution dist = getDist();
long min = 453, max = 26546454;
dist.init(min, max, getDistParams(), "");
assertEquals(dist.cdf(min-1), 0.0);
assertEquals(dist.cdf(min-234321), 0.0);
assertEquals(dist.cdf(max), 1.0);
assertEquals(dist.cdf(max+2343242224234L), 1.0);
// Check cdf is monotonically increasing
double last = 0.0;
long step = (max - min) / cdfChecks();
for (long id = min; id < max; id += step) {
double p = dist.cdf(id);
assertTrue(p >= last);
last = p;
}
}
@Test
public void testPDFSanity() {
ProbabilityDistribution dist = getDist();
long min = 453, max = 26546454;
dist.init(min, max, getDistParams(), "");
assertEquals(0.0, dist.pdf(min-1));
assertEquals(0.0, dist.pdf(min-234321));
assertEquals(0.0, dist.pdf(max));
assertEquals(0.0, dist.pdf(max+2343242224234L));
// Check pdf is in correct range
double total = 0.0;
long step = (max - min) / pdfChecks();
for (long id = min; id < max; id += step) {
double p = dist.pdf(id);
if ((id - min) < step * 100) {
System.err.println("p(X=" + id + ") = " + p);
}
assertTrue(p >= 0.0);
assertTrue(p <= 1.0);
total += p;
}
assert(total <= 1.0);
}
@Test
public void testPDFSum() {
ProbabilityDistribution dist = getDist();
long min = 1, max = 50;
dist.init(min, max, getDistParams(), "");
// Check sum of pdf over small range
// Order of least to most probably to minimize sum error
double total = 0.0;
for (long id = max - 1; id >= min; id--) {
double p = dist.pdf(id);
assertTrue(p >= 0.0);
assertTrue(p <= 1.0);
System.err.println("p(X=" + id + ") = " + p);
total += p;
}
System.err.println("Total = " + total);
// Give significant tolerance due to rounding errors
assertTrue(total <= 1.05);
assertTrue(total >= 0.95 );
}
@Test
public void testChooseSanity() {
ProbabilityDistribution dist = getDist();
long min = 453, max = 26546454;
dist.init(min, max, getDistParams(), "");
Random rng = initRandom("testChooseSanity");
for (int i = 0; i < 100000; i++) {
long id = dist.choose(rng);
assertTrue(id >= min);
assertTrue(id < max);
}
}
/**
* Check that choose() and cdf() are returning consistent results
* (i.e. that the result of choose are distributed according to cdf)
*/
@Test
public void testCDFChooseConsistency() {
long min = 100, max = 100000;
Bucketer bucketer = getBucketer();
int bucketCount = bucketer.getBucketCount();
int buckets[] = new int[bucketCount];
long n = max - min;
Random rng = initRandom("testCDFChooseConsistency");
ProbabilityDistribution dist = getDist();
dist.init(min, max, getDistParams(), "");
int trials = 1000000;
for (int i = 0; i < trials; i++) {
long id = dist.choose(rng);
long off = id - min;
int bucket = bucketer.chooseBucket(off, n);
buckets[bucket]++;
}
int totalCount = 0;
boolean fail = false;
for (int b = 0; b < bucketCount; b++) {
totalCount += buckets[b];
long bucketTop = bucketer.bucketMax(b, n) + min;
double actCDF = ((double)totalCount) / trials;
double expCDF = dist.cdf(bucketTop);
// 0.2% error
if (Math.abs(expCDF - actCDF) > tolerance()) {
System.err.println(String.format("Divergence between CDF and " +
"choose function: P(X <= %d) act: %f exp: %f", bucketTop,
actCDF, expCDF));
fail = true;
}
}
if (fail) {
fail("Divergence between cdf and choose methods: see preceding output " +
"for details");
}
}
@Test
public void testCDFPDFConsistency() {
long min = 252352, max = 6544543;
ProbabilityDistribution dist = getDist();
dist.init(min, max, getDistParams(), "");
long step = (max - min) / cdfChecks();
for (long id = min + 1; id < max; id += step) {
double c = dist.cdf(id);
double c1 = dist.cdf(id - 1);
double p = dist.pdf(id);
double err = Math.abs((c - c1) - p);
if (err > 0.0001) {
fail(String.format("Error > 0.001: cdf(%d) - cdf(%d) = %f, pdf(%d) = %f",
id, id -1, c1 - c, id, p));
}
}
}
@Test
public void testQuantileSanity() {
long min = 0, max = 1000;
ProbabilityDistribution dist = getDist();
dist.init(min, max, getDistParams(), "");
long last = dist.quantile(0.0);
for (double q = 0.0; q <= 1.0; q += 0.125) {
long id = dist.quantile(q);
System.err.format("quantile(%f) = %d\n", q, id);
assertTrue(id >= min);
assertTrue(id < max);
assertTrue(id >= last);
last = id;
}
// min should be most probable, and therefore should definitely
// be returned by quantile
assertEquals(min, dist.quantile(0.0));
}
/**
* Different distributions should be bucketed in different ways
* to test their fit. For example, the zipf distribution treats
* lower keys specially so we want to have better resolution for
* those
*/
static interface Bucketer {
public int getBucketCount();
public int chooseBucket(long i, long n);
public long bucketMax(int bucket, long n);
}
static class UniformBucketer implements Bucketer {
final int bucketCount;
public UniformBucketer(int bucketCount) {
this.bucketCount = bucketCount;
}
public int getBucketCount() {
return bucketCount;
}
public int chooseBucket(long i, long n) {
return (int)((i * bucketCount) / n);
}
public long bucketMax(int bucket, long n) {
return ((long)((((double)bucket+1)/bucketCount)*n)) - 1;
}
}
}