PDA

View Full Version : Atomic compare and swap



Elias
11-16-2012, 02:01 AM
Hi everyone,

I'm trying to use the atom_cmpxchg (OpenCL version is 1.0, GPU is NVIDIA 9600M GT) function, but I cant manage to get the expected result: the swap dont happen.

I tried to find a example (my code is rather long, so I looked for something simpler), but I can't make it work either (this code can be found on the internet, but I just dont remember the address right now):

The output result:

Old A = 500
New A 500

I may not have understood what this function is supposed to do....
Given the prototype of the function atom_cmpxchg (__global int *p, int cmp, int val): I want to swap the value at *p by val if and only if *p == cmp (store the old value of *p if *p!=cmp). Is that right? Or I am missing something?

Thanks for your help!

The kernel:

__kernel void atomiccmpxchg(__global int *old, __global int *new)
{
__local int v,v1;
v = 500;
v1=10;
*old = atom_cmpxchg(new,v,v1);
}

The host code:

#include <iostream>
#include <cstdlib>
#include <fstream>
#include <string>
#if defined __APPLE__ || defined (MACOSX)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif


using namespace std;

void err_check( int err, string err_code ) {
if ( err != CL_SUCCESS ) {
cout << "Error: " << err_code << "(" << err << ")" << endl;
exit(-1);
}
}

int main()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem mobj_a = NULL;
cl_mem mobj_b = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int err;

int a, b;
a = 500;
b = 500;


// Get platform/device information
err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms );
err_check( err, "clGetPlatformIDs" );

// Get information about the device
err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices );
err_check( err, "clGetDeviceIDs" );

// Create OpenCL Context
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err );
err_check( err, "clCreateContext" );

// Create Command Queue
command_queue = clCreateCommandQueue( context, device_id, CL_QUEUE_PROFILING_ENABLE, &err );
err_check( err, "clCreateCommandQueue" );

// Create memory objects and tranfer the data to memory buffer
mobj_a = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
err = clEnqueueWriteBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
err_check( err, "clEnqueueWriteBuffer" );

mobj_b = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
err = clEnqueueWriteBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );
err_check( err, "clEnqueueWriteBuffer" );

// Read kernel file
ifstream file("atomic_cmpxchg.cl");
string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
const char *source_str = prog.c_str();

// Create Kernel program from the read in source
program = clCreateProgramWithSource( context, 1, (const char **) &source_str, 0, &err );
err_check( err, "clCreateProgramWithSource" );

// Build Kernel Program
err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, &len);
printf("--- Build log ---\n%s\n", buffer);
err_check( err, "clBuildProgram" );


// Create OpenCL Kernel
kernel = clCreateKernel( program, "atomiccmpxchg", &err );
err_check( err, "clCreateKernel" );

// Set OpenCL kernel argument
err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &mobj_a );
err_check( err, "clSetKernelArg" );
err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &mobj_b );
err_check( err, "clSetKernelArg" );

// Execute OpenCL kernel in task parallel
clEnqueueTask( command_queue, kernel, 0, NULL, NULL );
err_check( err, "clEnqueueTask" );

// Read (Transfer result) from the memory buffer
err = clEnqueueReadBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
err = clEnqueueReadBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );

// Free objects
err = clFlush( command_queue );
err = clFinish( command_queue );
err = clReleaseKernel( kernel );
err = clReleaseProgram( program );
err = clReleaseMemObject( mobj_a );
err = clReleaseMemObject( mobj_b );
err = clReleaseCommandQueue( command_queue );
err = clReleaseContext( context );

// Display result
cout << "\tOld A = " << a << endl;
cout << "\tNew A " << b << endl;


return 0;
}

chippies
11-18-2012, 05:33 AM
You aren't checking the return code from clEnqueueTask. Your code is

clEnqueueTask( command_queue, kernel, 0, NULL, NULL );
But it should be

err = clEnqueueTask( command_queue, kernel, 0, NULL, NULL );

Elias
11-18-2012, 12:34 PM
Er... well yes, indeed it was missing. It does not help though (I've added the return code check, just in case...).
As I was saying, it is not my code, I was just looking for a minimal example to try this function. The swap wont happen, either in this example or in my own program.

clint3112
11-19-2012, 03:04 AM
Have you enabled atomic operations? Might be disabled?



NOTE: The atomic built-in functions that use the atom_ prefix and are described by the
following extensions
cl_khr_global_int32_base_atomics
cl_khr_global_int32_extended_atomics
cl_khr_local_int32_base_atomics
cl_khr_local_int32_extended_atomics
in sections 9.5 and 9.6 of the OpenCL 1.0 specification are also supported.

utnapishtim
11-19-2012, 04:12 AM
Since you first write to local memory when settings v and v1, then read this local memory in the atom_cmpxchg() function, you should insert a local memory fence before calling atom_cmpxchg.

Elias
11-19-2012, 05:02 AM
Have you enabled atomic operations? Might be disabled?
Yep, atomics operations are enabled (I tripled checked)


Since you first write to local memory when settings v and v1, then read this local memory in the atom_cmpxchg() function, you should insert a local memory fence before calling atom_cmpxchg.
I've added the barrier, no change though, still cant manage to swap the values.
I tried with privates variables instead, so that no barrier is needed, still nothing.

utnapishtim
11-19-2012, 09:39 AM
I can't see any error in your program. In fact, it works fine on a 9600M GS (driver 306.23)

Have you tried with a simple assignment:

*old = *new;
if (*new == v) *new = v1;

Elias
11-19-2012, 10:35 AM
I can't see any error in your program. In fact, it works fine on a 9600M GS (driver 306.23)

Have you tried with a simple assignment:

*old = *new;
if (*new == v) *new = v1;

Yes it could be done that way, but I need to prevent any workitem to interfer in an other workitem's instructions (would cause data inconsistency).

That's really weird...