/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
package me.lemire.integercompression.benchmarktools;
import com.kamikaze.pfordelta.PForDelta;
import me.lemire.integercompression.BinaryPacking;
import me.lemire.integercompression.ByteIntegerCODEC;
import me.lemire.integercompression.Composition;
import me.lemire.integercompression.DeltaZigzagBinaryPacking;
import me.lemire.integercompression.DeltaZigzagVariableByte;
import me.lemire.integercompression.FastPFOR;
import me.lemire.integercompression.FastPFOR128;
import me.lemire.integercompression.IntWrapper;
import me.lemire.integercompression.IntegerCODEC;
import me.lemire.integercompression.JustCopy;
import me.lemire.integercompression.NewPFD;
import me.lemire.integercompression.NewPFDS16;
import me.lemire.integercompression.NewPFDS9;
import me.lemire.integercompression.OptPFD;
import me.lemire.integercompression.OptPFDS16;
import me.lemire.integercompression.OptPFDS9;
import me.lemire.integercompression.Simple9;
import me.lemire.integercompression.VariableByte;
import me.lemire.integercompression.differential.Delta;
import me.lemire.integercompression.differential.IntegratedBinaryPacking;
import me.lemire.integercompression.differential.IntegratedByteIntegerCODEC;
import me.lemire.integercompression.differential.IntegratedComposition;
import me.lemire.integercompression.differential.IntegratedIntegerCODEC;
import me.lemire.integercompression.differential.IntegratedVariableByte;
import me.lemire.integercompression.differential.XorBinaryPacking;
import me.lemire.integercompression.synth.ClusteredDataGenerator;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
/**
*
* Simple class meant to compare the speed of different schemes.
*
* @author Daniel Lemire
*
*/
public class Benchmark {
/**
* Standard benchmark
*
* @param csvLog
* Writer for CSV log.
* @param c
* the codec
* @param data
* arrays of input data
* @param repeat
* How many times to repeat the test
* @param verbose
* whether to output result on screen
*/
private static void testCodec(PrintWriter csvLog, int sparsity,
IntegerCODEC c, int[][] data, int repeat, boolean verbose) {
if (verbose) {
System.out.println("# " + c.toString());
System.out
.println("# bits per int, compress speed (mis), decompression speed (mis) ");
}
int N = data.length;
int totalSize = 0;
int maxLength = 0;
for (int k = 0; k < N; ++k) {
totalSize += data[k].length;
if (data[k].length > maxLength) {
maxLength = data[k].length;
}
}
// 4x + 1024 to account for the possibility of some negative
// compression.
int[] compressBuffer = new int[4 * maxLength + 1024];
int[] decompressBuffer = new int[maxLength + 1024];
// These variables hold time in microseconds (10^-6).
long compressTime = 0;
long decompressTime = 0;
int size = 0;
IntWrapper inpos = new IntWrapper();
IntWrapper outpos = new IntWrapper();
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
int[] backupdata = Arrays.copyOf(data[k],
data[k].length);
// compress data.
long beforeCompress = System.nanoTime() / 1000;
inpos.set(1);
outpos.set(0);
if (!(c instanceof IntegratedIntegerCODEC)) {
Delta.delta(backupdata);
}
c.compress(backupdata, inpos, backupdata.length
- inpos.get(), compressBuffer, outpos);
long afterCompress = System.nanoTime() / 1000;
// measure time of compression.
compressTime += afterCompress - beforeCompress;
final int thiscompsize = outpos.get() + 1;
size += thiscompsize;
// extract (uncompress) data
long beforeDecompress = System.nanoTime() / 1000;
inpos.set(0);
outpos.set(1);
decompressBuffer[0] = backupdata[0];
c.uncompress(compressBuffer, inpos,
thiscompsize - 1, decompressBuffer,
outpos);
if (!(c instanceof IntegratedIntegerCODEC))
Delta.fastinverseDelta(decompressBuffer);
long afterDecompress = System.nanoTime() / 1000;
// measure time of extraction (uncompression).
decompressTime += afterDecompress
- beforeDecompress;
if (outpos.get() != data[k].length)
throw new RuntimeException(
"we have a bug (diff length) "
+ c + " expected "
+ data[k].length
+ " got "
+ outpos.get());
// verify: compare original array with
// compressed and
// uncompressed.
for (int m = 0; m < outpos.get(); ++m) {
if (decompressBuffer[m] != data[k][m]) {
throw new RuntimeException(
"we have a bug (actual difference), expected "
+ data[k][m]
+ " found "
+ decompressBuffer[m]
+ " at " + m);
}
}
}
}
if (verbose) {
double bitsPerInt = size * 32.0 / totalSize;
long compressSpeed = totalSize * repeat
/ (compressTime);
long decompressSpeed = totalSize * repeat
/ (decompressTime);
System.out.println(String.format(
"\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
compressSpeed, decompressSpeed));
csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
c.toString(), sparsity, bitsPerInt,
compressSpeed, decompressSpeed);
csvLog.flush();
}
}
/**
* Standard benchmark byte byte-aligned schemes
*
* @param csvLog
* Writer for CSV log.
* @param c
* the codec
* @param data
* arrays of input data
* @param repeat
* How many times to repeat the test
* @param verbose
* whether to output result on screen
*/
private static void testByteCodec(PrintWriter csvLog, int sparsity,
ByteIntegerCODEC c, int[][] data, int repeat, boolean verbose) {
if (verbose) {
System.out.println("# " + c.toString());
System.out
.println("# bits per int, compress speed (mis), decompression speed (mis) ");
}
int N = data.length;
int totalSize = 0;
int maxLength = 0;
for (int k = 0; k < N; ++k) {
totalSize += data[k].length;
if (data[k].length > maxLength) {
maxLength = data[k].length;
}
}
byte[] compressBuffer = new byte[8 * maxLength + 1024];
int[] decompressBuffer = new int[maxLength + 1024];
// These variables hold time in microseconds (10^-6).
long compressTime = 0;
long decompressTime = 0;
int size = 0;
IntWrapper inpos = new IntWrapper();
IntWrapper outpos = new IntWrapper();
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
int[] backupdata = Arrays.copyOf(data[k],
data[k].length);
// compress data.
long beforeCompress = System.nanoTime() / 1000;
inpos.set(1);
outpos.set(0);
if (!(c instanceof IntegratedByteIntegerCODEC)) {
Delta.delta(backupdata);
}
c.compress(backupdata, inpos, backupdata.length
- inpos.get(), compressBuffer, outpos);
long afterCompress = System.nanoTime() / 1000;
// measure time of compression.
compressTime += afterCompress - beforeCompress;
final int thiscompsize = outpos.get() + 1;
size += thiscompsize;
// extract (uncompress) data
long beforeDecompress = System.nanoTime() / 1000;
inpos.set(0);
outpos.set(1);
decompressBuffer[0] = backupdata[0];
c.uncompress(compressBuffer, inpos,
thiscompsize - 1, decompressBuffer,
outpos);
if (!(c instanceof IntegratedByteIntegerCODEC))
Delta.fastinverseDelta(decompressBuffer);
long afterDecompress = System.nanoTime() / 1000;
// measure time of extraction (uncompression).
decompressTime += afterDecompress
- beforeDecompress;
if (outpos.get() != data[k].length)
throw new RuntimeException(
"we have a bug (diff length) "
+ c + " expected "
+ data[k].length
+ " got "
+ outpos.get());
// verify: compare original array with
// compressed and
// uncompressed.
for (int m = 0; m < outpos.get(); ++m) {
if (decompressBuffer[m] != data[k][m]) {
throw new RuntimeException(
"we have a bug (actual difference), expected "
+ data[k][m]
+ " found "
+ decompressBuffer[m]
+ " at " + m);
}
}
}
}
if (verbose) {
double bitsPerInt = size * 8.0 / totalSize;
long compressSpeed = totalSize * repeat
/ (compressTime);
long decompressSpeed = totalSize * repeat
/ (decompressTime);
System.out.println(String.format(
"\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
compressSpeed, decompressSpeed));
csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
c.toString(), sparsity, bitsPerInt,
compressSpeed, decompressSpeed);
csvLog.flush();
}
}
/**
* Main method.
*
* @param args
* command-line arguments
* @throws FileNotFoundException when we fail to create a new file
*/
public static void main(String args[]) throws FileNotFoundException {
System.out
.println("# benchmark based on the ClusterData model from:");
System.out.println("# Vo Ngoc Anh and Alistair Moffat. ");
System.out.println("# Index compression using 64-bit words.");
System.out
.println("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. ");
System.out.println();
PrintWriter writer = null;
try {
File csvFile = new File(
String.format(
"benchmark-%1$tY%1$tm%1$tdT%1$tH%1$tM%1$tS.csv",
System.currentTimeMillis()));
writer = new PrintWriter(csvFile);
System.out
.println("# Results will be written into a CSV file: "
+ csvFile.getName());
System.out.println();
test(writer, 20, 18, 10);
System.out.println();
System.out
.println("Results were written into a CSV file: "
+ csvFile.getName());
} finally {
if (writer != null) {
writer.close();
}
}
}
/**
* Standard test for the Kamikaze library
*
* @param data
* input data
* @param repeat
* how many times to repeat
* @param verbose
* whether to output data on screen
*/
public static void testKamikaze(int[][] data, int repeat,
boolean verbose) {
DecimalFormat df = new DecimalFormat("0.00");
DecimalFormat dfspeed = new DecimalFormat("0");
if (verbose)
System.out.println("# kamikaze PForDelta");
if (verbose)
System.out
.println("# bits per int, compress speed (mis), decompression speed (mis) ");
long bef, aft;
String line = "";
int N = data.length;
int totalsize = 0;
int maxlength = 0;
for (int k = 0; k < N; ++k) {
totalsize += data[k].length;
if (data[k].length > maxlength)
maxlength = data[k].length;
}
int[] buffer = new int[maxlength + 1024];
/*
* 4x + 1024 to account for the possibility of some negative
* compression
*/
int size = 0;
int comptime = 0;
long decomptime = 0;
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
int outpos = 0;
int[] backupdata = Arrays.copyOf(data[k],
data[k].length);
//
bef = System.nanoTime() / 1000;
Delta.delta(backupdata);
ArrayList<int[]> dataout = new ArrayList<int[]>(
data[k].length / 128);
for (int K = 0; K < data[k].length; K += 128) {
final int[] compressedbuf = PForDelta
.compressOneBlockOpt(Arrays
.copyOfRange(
backupdata, K,
K + 128), 128);
dataout.add(compressedbuf);
outpos += compressedbuf.length;
}
aft = System.nanoTime() / 1000;
//
comptime += aft - bef;
final int thiscompsize = outpos;
size += thiscompsize;
//
bef = System.nanoTime() / 1000;
ArrayList<int[]> datauncomp = new ArrayList<int[]>(
dataout.size());
int deltaoffset = 0;
for (int[] compbuf : dataout) {
int[] tmpbuf = new int[128];
PForDelta.decompressOneBlock(tmpbuf,
compbuf, 128);
tmpbuf[0] += deltaoffset;
Delta.fastinverseDelta(tmpbuf);
deltaoffset = tmpbuf[127];
datauncomp.add(tmpbuf);
}
aft = System.nanoTime() / 1000;
//
decomptime += aft - bef;
if (datauncomp.size() * 128 != data[k].length)
throw new RuntimeException(
"we have a bug (diff length) "
+ " expected "
+ data[k].length
+ " got "
+ datauncomp.size()
* 128);
for (int m = 0; m < data[k].length; ++m)
if (datauncomp.get(m / 128)[m % 128] != data[k][m]) {
throw new RuntimeException(
"we have a bug (actual difference), expected "
+ data[k][m]
+ " found "
+ buffer[m]
+ " at " + m);
}
}
}
line += "\t" + df.format(size * 32.0 / totalsize);
line += "\t" + dfspeed.format(totalsize * repeat / (comptime));
line += "\t"
+ dfspeed.format(totalsize * repeat / (decomptime));
if (verbose)
System.out.println(line);
}
/**
* Generate test data.
*
* @param N
* How many input arrays to generate
* @param nbr
* How big (in log2) should the arrays be
* @param sparsity
* How sparse test data generated
*/
private static int[][] generateTestData(ClusteredDataGenerator dataGen,
int N, int nbr, int sparsity) {
final int[][] data = new int[N][];
final int dataSize = (1 << (nbr + sparsity));
for (int i = 0; i < N; ++i) {
data[i] = dataGen.generateClustered((1 << nbr),
dataSize);
}
return data;
}
/**
* Generates data and calls other tests.
*
* @param csvLog
* Writer for CSV log.
* @param N
* How many input arrays to generate
* @param nbr
* how big (in log2) should the arrays be
* @param repeat
* How many times should we repeat tests.
*/
private static void test(PrintWriter csvLog, int N, int nbr, int repeat) {
csvLog.format("\"Algorithm\",\"Sparsity\",\"Bits per int\",\"Compress speed (MiS)\",\"Decompress speed (MiS)\"\n");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
final int max_sparsity = 31 - nbr;
for (int sparsity = 1; sparsity < max_sparsity; ++sparsity) {
System.out.println("# sparsity " + sparsity);
System.out.println("# generating random data...");
int[][] data = generateTestData(cdg, N, nbr, sparsity);
System.out.println("# generating random data... ok.");
testCodec(csvLog, sparsity, new Composition(
new FastPFOR128(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new FastPFOR128(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new FastPFOR128(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new FastPFOR(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new FastPFOR(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new FastPFOR(), new VariableByte()), data,
repeat, true);
System.out.println();
// TODO: support CSV log output.
testKamikaze(data, repeat, false);
testKamikaze(data, repeat, false);
testKamikaze(data, repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new IntegratedComposition(
new IntegratedBinaryPacking(),
new IntegratedVariableByte()), data, repeat,
false);
testCodec(csvLog, sparsity, new IntegratedComposition(
new IntegratedBinaryPacking(),
new IntegratedVariableByte()), data, repeat,
false);
testCodec(csvLog, sparsity, new IntegratedComposition(
new IntegratedBinaryPacking(),
new IntegratedVariableByte()), data, repeat,
true);
System.out.println();
testCodec(csvLog, sparsity, new JustCopy(), data,
repeat, false);
testCodec(csvLog, sparsity, new JustCopy(), data,
repeat, false);
testCodec(csvLog, sparsity, new JustCopy(), data,
repeat, true);
System.out.println();
testByteCodec(csvLog, sparsity, new VariableByte(),
data, repeat, false);
testByteCodec(csvLog, sparsity, new VariableByte(),
data, repeat, false);
testByteCodec(csvLog, sparsity, new VariableByte(),
data, repeat, true);
System.out.println();
testByteCodec(csvLog, sparsity,
new IntegratedVariableByte(), data, repeat,
false);
testByteCodec(csvLog, sparsity,
new IntegratedVariableByte(), data, repeat,
false);
testByteCodec(csvLog, sparsity,
new IntegratedVariableByte(), data, repeat,
true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new BinaryPacking(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new BinaryPacking(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new BinaryPacking(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new NewPFD(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFD(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFD(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new NewPFDS9(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFDS9(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFDS9(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new NewPFDS16(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFDS16(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new NewPFDS16(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new OptPFD(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFD(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFD(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new OptPFDS9(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFDS9(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFDS9(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Composition(
new OptPFDS16(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFDS16(), new VariableByte()), data,
repeat, false);
testCodec(csvLog, sparsity, new Composition(
new OptPFDS16(), new VariableByte()), data,
repeat, true);
System.out.println();
testCodec(csvLog, sparsity, new Simple9(), data,
repeat, false);
testCodec(csvLog, sparsity, new Simple9(), data,
repeat, false);
testCodec(csvLog, sparsity, new Simple9(), data,
repeat, true);
System.out.println();
{
IntegerCODEC c = new Composition(
new XorBinaryPacking(),
new VariableByte());
testCodec(csvLog, sparsity, c, data, repeat,
false);
testCodec(csvLog, sparsity, c, data, repeat,
false);
testCodec(csvLog, sparsity, c, data, repeat,
true);
System.out.println();
}
{
IntegerCODEC c = new Composition(
new DeltaZigzagBinaryPacking(),
new DeltaZigzagVariableByte());
testCodec(csvLog, sparsity, c, data, repeat,
false);
testCodec(csvLog, sparsity, c, data, repeat,
false);
testCodec(csvLog, sparsity, c, data, repeat,
true);
System.out.println();
}
}
}
}