FloatBuffer resultArray = NIOUtils.directFloats(10, context.getByteOrder());
array1.put(A);
array2.put(B);
CLFloatBuffer b1 = context.createFloatBuffer(Usage.Input, array1, true);
CLFloatBuffer b2 = context.createFloatBuffer(Usage.Input, array2, true);
CLFloatBuffer b3 = context.createFloatBuffer(Usage.Output, resultArray, false);
CLProgram program;
try {
program = context.createProgram(myKernelSource).build();
CLKernel kernel = program.createKernel(
"simpleKernel",
b1,
b2,
b3
);
CLEvent kernelCompletion;
// The same kernel can be safely used by different threads, as long as setArgs + enqueueNDRange are in a synchronized block
synchronized (kernel) {
//kernel.setArgs(b1,b2,b3);
kernelCompletion = kernel.enqueueNDRange(queue, new int[] { A.length }, new int[] { 1 } );
}
kernelCompletion.waitFor(); // better not to wait for it but to pass it as a dependent event to some other queuable operation (CLBuffer.read, for instance)
FloatBuffer f = b3.read(queue, kernelCompletion);
for(int i=0;i<A.length;i++) {
System.out.println( A[i] + " * " + B[i] + " = " + f.get(i));
}