PDA

View Full Version : Kernel execution's problem



Nibul
01-03-2010, 07:58 AM
Hi !
I'm trying to add 2 vectors but the kernel is not executed. I can't see where the problem is.

I'm running the code on OS X Snow Leopard with the following Makefile:

CC = gcc
SRC = addition.c
DEST = addition
LDFLAGS = -framework OpenCL

all: $(SRC)
$(CC) -o $(DEST) $(SRC) $(LDFLAGS)

Here is the code:


/* OpenCL core library */
#include <OpenCL/opencl.h>
#include <stdio.h>
#include <stdlib.h>

/* GPU Kernel call */
int opencl_call(long* a, long* b, long* c, size_t n);

static const char* opencl_code[1] = {\
"__kernel void addition(__constant long* a, __constant long* b, __global long* c) \
{ \
unsigned int i = get_global_id(0); \
c[i] = a[i] + b[i]; \
}"};

int main(int argc, char** argv)
{
int i = 0;
long a[5] = {1, 2, 3, 4, 5};
long b[5] = {7, 5, 1, 9, 42};
long c[5] = {0,0,0,0,0};
if(opencl_call(a, b, c, 5) == -1)
printf("Error !");
for(i = 0; i < 5; i++)
printf("c[%ld] = %ld\n", i, c[i]);
return 0;
}

int opencl_call(long* a, long* b, long* c, size_t n)
{
/* Return Status */
cl_int status = CL_SUCCESS;

size_t device_list_size;
cl_device_id* devices = NULL;
cl_command_queue queue;
cl_command_queue_properties prop = 0;
cl_mem a_buffer, b_buffer, c_buffer;
cl_program program;
cl_kernel kernel;
size_t global_work_size[1], local_work_size[1];


/* Create OpenCL context */
cl_context context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);

/* Check whether we really have a GPU */
if(status != CL_SUCCESS)
{
printf("Sorry, your GPU is not supported, using CPU instead.\n");
context = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &status);
}
if(status != CL_SUCCESS)
return -1;

/* Check how many GPU we have */
status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size);
if(status != CL_SUCCESS)
return -1;
printf("We have %d devices.\n", device_list_size);

/* Get the device list */
devices = (cl_device_id*)malloc(device_list_size);

status = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL);
if(status != CL_SUCCESS)
return -1;

/* Create command queue */
queue = clCreateCommandQueue(context, devices[0], prop, &status);
if(status != CL_SUCCESS)
return -1;


/* Allocate memory buffers */
a_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), a, &status);
b_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), b, &status);
if(status != CL_SUCCESS)
return -1;

/* Output buffer */
c_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, n*sizeof(long), c, &status);
if(status != CL_SUCCESS)
return -1;

/* Create a CL program using the kernel source */
program = clCreateProgramWithSource(context, 1, opencl_code, NULL, &status);
if(status != CL_SUCCESS)
return -1;

/* Build OpenCL program. */
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
if(status != CL_SUCCESS)
return -1;

/* Create OpenCL kernel */
kernel = clCreateKernel(program, "addition", &status);

/* Set kernel arguments */
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
if(status != CL_SUCCESS)
return -1;

/* Kernel execution */
global_work_size[0] = n;
local_work_size[0] = 1;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);

status = clEnqueueReadBuffer(queue, c_buffer, CL_TRUE, 0, n*sizeof(long), c, 0, NULL, NULL);

/* Free memory */
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}


Thanks.

dbs2
01-04-2010, 08:08 AM
What do you mean by the kernel is not executed? Do you get an error? Do you get the wrong values back? Does it crash?

Try running with CL_LOG_ERRORS=stdout in your environment and see if you get an error printed out.

Nibul
01-05-2010, 11:04 AM
I get wrong values back:
c[0] = 0
c[1] = 42949672960
c[2] = 8589934594
c[3] = 8589934602
c[4] = 8589934594

No error printed out with CL_LOG_ERRORS=stdout

dbs2
01-11-2010, 06:13 AM
Try changing your long to cl_long to make sure you're not having any 64/32 bit issues. Then try reading back your buffers to verify they have what you expect. (E.g., malloc a buffer then read back and verify that it's what you've written.) I'm not coming up with anything else after a quick glance at the code.