slices = min(devices.length, MAX_PARRALLELISM_LEVEL);
// create command queues for every GPU, setup colormap and init kernels
queues = new CLCommandQueue[slices];
kernels = new CLKernel[slices];
probes = new CLEventList(slices);
colorMap = new CLBuffer[slices];
for (int i = 0; i < slices; i++) {
colorMap[i] = clContext.createIntBuffer(32*2, READ_ONLY);