Hi everyone,

I'm trying to use the atom_cmpxchg (OpenCL version is 1.0, GPU is NVIDIA 9600M GT) function, but I cant manage to get the expected result: the swap dont happen.

I tried to find a example (my code is rather long, so I looked for something simpler), but I can't make it work either (this code can be found on the internet, but I just dont remember the address right now):

The output result:
Code :
	Old A = 500
	New A 500

I may not have understood what this function is supposed to do....
Given the prototype of the function atom_cmpxchg (__global int *p, int cmp, int val): I want to swap the value at *p by val if and only if *p == cmp (store the old value of *p if *p!=cmp). Is that right? Or I am missing something?

Thanks for your help!

The kernel:
Code :
__kernel void atomiccmpxchg(__global int *old, __global int *new)
{
	__local int v,v1;
        v = 500;
	v1=10;
	*old = atom_cmpxchg(new,v,v1);
}

The host code:
Code :
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <string>
#if defined __APPLE__ || defined (MACOSX)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
 
 
using namespace std;
 
void err_check( int err, string err_code ) {
	if ( err != CL_SUCCESS ) {
		cout << "Error: " << err_code << "(" << err << ")" << endl;
		exit(-1);
	}
}
 
int main()
{
	cl_platform_id platform_id = NULL;
	cl_device_id device_id = NULL;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_mem mobj_a = NULL;
	cl_mem mobj_b = NULL;
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	cl_uint ret_num_devices;
	cl_uint ret_num_platforms;
	cl_int err;
 
	int a, b;
	a = 500;
	b = 500;
 
 
	// Get platform/device information 
	err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms );
	err_check( err, "clGetPlatformIDs" );
 
	// Get information about the device
	err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices );
	err_check( err, "clGetDeviceIDs" );
 
	// Create OpenCL Context
	context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err );
	err_check( err, "clCreateContext" );
 
	// Create Command Queue
	command_queue = clCreateCommandQueue( context, device_id, CL_QUEUE_PROFILING_ENABLE, &err );
	err_check( err, "clCreateCommandQueue" );
 
	// Create memory objects and tranfer the data to memory buffer
	mobj_a = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
	err = clEnqueueWriteBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
	err_check( err, "clEnqueueWriteBuffer" );
 
	mobj_b = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err );
	err = clEnqueueWriteBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );
	err_check( err, "clEnqueueWriteBuffer" );	
 
	// Read kernel file
	ifstream file("atomic_cmpxchg.cl");
	string prog( istreambuf_iterator<char>( file ), ( istreambuf_iterator<char>() ) );
	const char *source_str = prog.c_str();
 
	// Create Kernel program from the read in source
	program = clCreateProgramWithSource( context, 1, (const char **) &source_str, 0, &err );
	err_check( err, "clCreateProgramWithSource" );
 
	// Build Kernel Program
	err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
  size_t len;
  char buffer[2048];
  clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
                        sizeof(buffer), buffer, &len);
  printf("--- Build log ---\n%s\n", buffer);
	err_check( err, "clBuildProgram" );
 
 
	// Create OpenCL Kernel
	kernel = clCreateKernel( program, "atomiccmpxchg", &err );
	err_check( err, "clCreateKernel" );
 
	//  Set OpenCL kernel argument
	err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &mobj_a );
	err_check( err, "clSetKernelArg" );
	err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &mobj_b );
	err_check( err, "clSetKernelArg" );
 
	//  Execute OpenCL kernel in task parallel
	clEnqueueTask( command_queue, kernel, 0, NULL, NULL );
	err_check( err, "clEnqueueTask" );	
 
	//  Read (Transfer result) from the memory buffer
	err = clEnqueueReadBuffer( command_queue, mobj_a, CL_TRUE, 0, sizeof(int), &a, 0, NULL, NULL );
	err = clEnqueueReadBuffer( command_queue, mobj_b, CL_TRUE, 0, sizeof(int), &b, 0, NULL, NULL );
 
	// Free objects
	err = clFlush( command_queue );
	err = clFinish( command_queue );
	err = clReleaseKernel( kernel );
	err = clReleaseProgram( program );
	err = clReleaseMemObject( mobj_a );
	err = clReleaseMemObject( mobj_b );
	err = clReleaseCommandQueue( command_queue );
	err = clReleaseContext( context );
 
	// Display result
	cout << "\tOld A = " << a << endl;
	cout << "\tNew A " << b << endl;
 
 
	return 0;
}