Examples of com.jogamp.opencl.CLCommandQueue

com.jogamp.opencl.CLCommandQueue
The command queue is used to queue a set of operations for a specific {@link CLDevice}. Having multiple command-queues allows applications to queue multiple independent commands without requiring synchronization. Note that this should work as long as these objects are not being shared.
concurrency note:
Sharing of objects across multiple queues or using a CLCommandQueue form multiple Threads will require the application to perform appropriate synchronization.
@see CLDevice#createCommandQueue(com.jogamp.opencl.CLCommandQueue.Mode) @author Michael Bien

        public synchronized Thread newThread(Runnable runnable) {


            SecurityManager sm = System.getSecurityManager();
            ThreadGroup group = (sm != null) ? sm.getThreadGroup() : Thread.currentThread().getThreadGroup();


            CLCommandQueue queue = queues.get(index);
            Thread thread = new CommandQueuePoolThread(group, runnable, queue, index++);
            thread.setDaemon(true);


            return thread;
        }

View Full Code Here

            
            assertEquals(glData.capacity(), clBuffer.getCLCapacity());
            assertEquals(glData.capacity()*4, clBuffer.getCLSize());
                
            
            CLCommandQueue queue = theChosenOne.createCommandQueue();


            // read gl buffer into cl nio buffer
            queue.putAcquireGLObject(clBuffer)
                 .putReadBuffer(clBuffer, true)
                 .putReleaseGLObject(clBuffer);


            while(clData.hasRemaining()) {
                assertEquals(glData.get(), clData.get());

View Full Code Here

        }


        @Override
        public IntBuffer execute(CLSingleProgramQueueContext qc) {
            
            CLCommandQueue queue = qc.getQueue();
            CLContext context = qc.getCLContext();
            CLKernel kernel = qc.getKernel("compute");


//            System.out.println(Thread.currentThread().getName()+" / "+queue);
            assertFalse(qc.isReleased());
            assertFalse(queue.isReleased());
            assertFalse(context.isReleased());
            assertFalse(kernel.isReleased());


            CLBuffer<IntBuffer> buffer = null;
            try{


                buffer = context.createBuffer(data);
                int gws = buffer.getCLCapacity();


                kernel.putArg(buffer).putArg(gws).rewind();


                queue.putWriteBuffer(buffer, false);
                queue.put1DRangeKernel(kernel, 0, gws, 0);
                queue.putReadBuffer(buffer, true);
            }finally{
                if(buffer != null) {
                    buffer.release();
                }
            }

View Full Code Here

        double[] bandwidths = new double[count];


        // Use the device asked by the user
        CLDevice[] devices = context.getDevices();
        for (CLDevice device : devices) {
            CLCommandQueue queue = device.createCommandQueue();


            //run each of the copies
            for (int i = 0; i < count; i++) {
                memSizes[i] = start + i * increment;
                switch (kind) {
                    case DEVICE_TO_HOST:
                        bandwidths[i] += testDeviceToHostTransfer(queue, memSizes[i], accMode, memMode);
                        break;
                    case HOST_TO_DEVICE:
                        bandwidths[i] += testHostToDeviceTransfer(queue, memSizes[i], accMode, memMode);
                        break;
                    case DEVICE_TO_DEVICE:
                        bandwidths[i] += testDeviceToDeviceTransfer(queue, memSizes[i]);
                        break;
                }
            }
            queue.release();
        }


        //print results
        printResultsReadable(memSizes, bandwidths, count, kind, accMode, memMode, count);
    }

View Full Code Here


        // Use the device asked by the user
        CLDevice[] devices = context.getDevices();
        for (CLDevice device : devices) {
            // Allocate command queue for the device
            CLCommandQueue queue = device.createCommandQueue();


            //Run the shmoo
            int iteration = 0;
            int memSize = 0;
            while (memSize <= SHMOO_MEMSIZE_MAX) {
                if (memSize < SHMOO_LIMIT_20KB) {
                    memSize += SHMOO_INCREMENT_1KB;
                } else if (memSize < SHMOO_LIMIT_50KB) {
                    memSize += SHMOO_INCREMENT_2KB;
                } else if (memSize < SHMOO_LIMIT_100KB) {
                    memSize += SHMOO_INCREMENT_10KB;
                } else if (memSize < SHMOO_LIMIT_1MB) {
                    memSize += SHMOO_INCREMENT_100KB;
                } else if (memSize < SHMOO_LIMIT_16MB) {
                    memSize += SHMOO_INCREMENT_1MB;
                } else if (memSize < SHMOO_LIMIT_32MB) {
                    memSize += SHMOO_INCREMENT_2MB;
                } else {
                    memSize += SHMOO_INCREMENT_4MB;
                }


                memSizes[iteration] = memSize;
                switch (kind) {
                    case DEVICE_TO_HOST:
                        bandwidths[iteration] += testDeviceToHostTransfer(queue, memSizes[iteration], accMode, memMode);
                        break;
                    case HOST_TO_DEVICE:
                        bandwidths[iteration] += testHostToDeviceTransfer(queue, memSizes[iteration], accMode, memMode);
                        break;
                    case DEVICE_TO_DEVICE:
                        bandwidths[iteration] += testDeviceToDeviceTransfer(queue, memSizes[iteration]);
                        break;
                }
                iteration++;
            }
            queue.release();
        }


        //print results
        printResultsReadable(memSizes, bandwidths, count, kind, accMode, memMode, count);

View Full Code Here

            // select fastest device
            CLDevice device = context.getMaxFlopsDevice();
            out.println("using "+device);


            // create command queue on device.
            CLCommandQueue queue = device.createCommandQueue();


            int elementCount = 1444477;                                  // Length of arrays to process
            int localWorkSize = min(device.getMaxWorkGroupSize(), 256);  // Local work size dimensions
            int globalWorkSize = roundUp(localWorkSize, elementCount);   // rounded up to the nearest multiple of the localWorkSize


            // load sources, create and build program
            CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();


            // A, B are input buffers, C is for the result
            CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY);
            CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY);
            CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY);


            out.println("used device memory: "
                + (clBufferA.getCLSize()+clBufferB.getCLSize()+clBufferC.getCLSize())/1000000 +"MB");


            // fill input buffers with random numbers
            // (just to have test data; seed is fixed -> results will not change between runs).
            fillBuffer(clBufferA.getBuffer(), 12345);
            fillBuffer(clBufferB.getBuffer(), 67890);


            // get a reference to the kernel function with the name 'VectorAdd'
            // and map the buffers to its input parameters.
            CLKernel kernel = program.createCLKernel("VectorAdd");
            kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount);


            // asynchronous write of data to GPU device,
            // followed by blocking read to get the computed results back.
            long time = nanoTime();
            queue.putWriteBuffer(clBufferA, false)
                 .putWriteBuffer(clBufferB, false)
                 .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
                 .putReadBuffer(clBufferC, true);
            time = nanoTime() - time;

View Full Code Here

            
            // allocate a OpenCL buffer using the direct fb as working copy
            CLBuffer<FloatBuffer> buffer = context.createBuffer(fb, CLBuffer.Mem.READ_WRITE);
            
            // creade a command queue with benchmarking flag set
            CLCommandQueue queue = context.getDevices()[0].createCommandQueue(Mode.PROFILING_MODE);
            
            int localWorkSize = queue.getDevice().getMaxWorkGroupSize(); // Local work size dimensions
            int globalWorkSize = roundUp(localWorkSize, fb.capacity());  // rounded up to the nearest multiple of the localWorkSize
            
            // create kernel and set function parameters
            CLKernel kernel = program.createCLKernel("gamma");

View Full Code Here

        CLContext context = null;


        try{


            context = CLContext.create();
            CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue();


            out.println("Initializing OpenCL bitonic sorter...");
            kernels = initBitonicSort(queue);


            out.println("Creating OpenCL memory objects...");
            CLBuffer<IntBuffer> keyBuffer = context.createIntBuffer(elements, READ_ONLY, USE_BUFFER);
            System.out.println(keyBuffer.getCLSize()/1000000.0f);


            out.println("Initializing data...\n");
            Random random = new Random();
            for (int i = 0; i < elements; i++) {
                int rnd = random.nextInt(maxvalue);
                keyBuffer.getBuffer().put(i, rnd);
            }


            int arrayLength = elements;
            int batch = elements / arrayLength;


            out.printf("Test array length %d (%d arrays in the batch)...\n", arrayLength, batch);


            long time = currentTimeMillis();


            bitonicSort(queue, keyBuffer, keyBuffer, batch, arrayLength, sortDir);
            queue.putReadBuffer(keyBuffer, true);


            out.println(currentTimeMillis() - time+"ms");


            IntBuffer keys = keyBuffer.getBuffer();
            printSnapshot(keys, 20);

View Full Code Here

                throw new RuntimeException("this demo requires a GPU OpenCL implementation");
            }
            
            //single GPU setup
            context = CLContext.create(platform.getMaxFlopsDevice());
            CLCommandQueue queue = context.getDevices()[0].createCommandQueue();


            int maxValue = Integer.MAX_VALUE;
            int samples  = 10;


            int[] workgroupSizes = new int[] {128, 256};


            int[] runs = new int[] {   32768,
                                       65536,
                                      131072,
                                      262144,
                                      524288,
                                     1048576,
                                     2097152,
                                     4194304,
                                     8388608 };


            for (int i = 0; i < workgroupSizes.length; i++) {


                int workgroupSize = workgroupSizes[i];


                out.println("\n = = = workgroup size: "+workgroupSize+" = = = ");


                for(int run = 0; run < runs.length; run++) {


                    if(  workgroupSize==128 && runs[run] >= 8388608
                      || workgroupSize==256 && runs[run] <= 32768) {
                        continue; // we can only sort up to 4MB with wg size of 128
                    }


                    int numElements = runs[run];


                    CLBuffer<IntBuffer> array = context.createIntBuffer(numElements, READ_WRITE);
                    out.print("array size: " + array.getCLSize()/1000000.0f+"MB; ");
                    out.println("elements: " + array.getCLCapacity()/1000+"K");


                    fillBuffer(array, maxValue);


                    RadixSort radixSort = new RadixSort(queue, numElements, workgroupSize);
                    for(int a = 0; a < samples; a++) {


                        queue.finish();


                        long time = nanoTime();


                        queue.putWriteBuffer(array, false);
                        radixSort.sort(array, numElements, 32);
                        queue.putReadBuffer(array, true);


                        out.println("time: " + (nanoTime() - time)/1000000.0f+"ms");
                    }


                    out.print("snapshot: ");

View Full Code Here

TOP

Related Classes of com.jogamp.opencl.CLCommandQueue

com.jogamp.common.nio.CachedBufferFactory

com.jogamp.common.nio.NativeSizeBuffer

com.jogamp.opencl.demos.bandwidth.BandwidthBenchmark

com.jogamp.opencl.demos.bitonicsort.BitonicSort

com.jogamp.opencl.demos.gamma.CLSimpleGammaCorrection

com.jogamp.opencl.demos.hellojocl.HelloJOCL

com.jogamp.opencl.demos.radixsort.RadixSortDemo

com.jogamp.opencl.gl.CLGLTest

com.jogamp.opencl.llb.CLCommandQueueBinding

com.jogamp.opencl.llb.gl.CLGL

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.