PDA

View Full Version : system goes into some indefinite loop - adding two vectors



sayush
11-14-2009, 12:09 AM
Hi, i am being able to compile this code but it doesn't give me any output and goes in some indefinite loop (or something else)... i dont understand whats happening. I'm a newbie to OpenCL and this is my first program.. All i'm trying to do is add two vectors. I get no errors during the compilation. If i am not wrong, i should see the 'End' that i've printed after the gpu portion completes right?

This is the machine detail that i'm running the code on:
Linux gpu02.cluster 2.6.18-92.1.22.el5 #1 SMP Tue Dec 16 11:57:43 EST 2008 x86_64 x86_64 x86_64 GNU/Linux




#include <stdio.h>
#include <CL/cl.h>

#define SIZE 10

int va[SIZE];
int vb[SIZE];
int vc[SIZE];

char* load_program_source(const char*);

int Init(){
int i;
srand(20);
for(i=0;i<SIZE;i++){
va[i]=rand()%10;
vb[i]=rand()%10;
}
}

char* load_program_source(const char *filename)
{
int fileSize = 0;
FILE *pFile = fopen(filename, "r");
rewind(pFile);
fseek(pFile, 0, SEEK_END);
fileSize = ftell(pFile);
rewind(pFile);

char *data = (char*) calloc(sizeof(char), fileSize+1);
fread(data, 1, fileSize, pFile);
data[fileSize]='\0';
fclose(pFile);
return data;
}

int main(){
if(!Init()){
printf("Unable to initialize data");
return 1;
}

cl_context GPUContext = clCreateContextFromType(0,CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
if(!GPUContext){
printf("Error: Failed to create context");
return 1;
}

//Get the list of GPU devices associated with this context
size_t ParmDataBytes;
clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);
cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);
clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes,GPUDevices,NULL);

//Create a command queue on first gpu device
cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext, GPUDevices[0],0,NULL);
if(!GPUCommandQueue){
printf("Error: Failed to create a command queue");
return 1;
}

//Allocate memory
cl_mem GPUva = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*SIZE, va, NULL);
cl_mem GPUvb = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*SIZE, vb, NULL);
cl_mem GPUvc = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, sizeof(int)*SIZE, NULL, NULL);

//Create OCL program reading the source code from the file
char * OclSource = load_program_source("vectoradd.cl");
cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext,1,(const char**)&OclSource,NULL,NULL);

//Build the program
clBuildProgram(OpenCLProgram,0,NULL,NULL,NULL,NULL );

//obtain the handle for the kernel
cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram,"VectorAdd",NULL);

//associate GPU memory with the kernel
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), (void*)&GPUvc);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPUvb);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPUva);

//Launch the kernel in the GPU
size_t WorkSize[1] = {SIZE};
clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd,1,NULL,WorkSize,NULL,0,NULL,NULL);

//copy the result back to the main memory
clEnqueueReadBuffer(GPUCommandQueue, GPUvc, CL_TRUE,0,sizeof(int) * SIZE, vc, 0, NULL, NULL);

//cleanup
free(GPUDevices);
clReleaseKernel(OpenCLVectorAdd);
clReleaseProgram(OpenCLProgram);
clReleaseCommandQueue(GPUCommandQueue);
clReleaseContext(GPUContext);
clReleaseMemObject(GPUva);
clReleaseMemObject(GPUvb);
clReleaseMemObject(GPUvc);

printf("End");
return 0;
}



this is my kernel code:



__kernel void VectorAdd(__global int* vc, __global int* vb, __global int* va){
int i;
for(i=0;i<100;i++){
vc[i]=vb[i]+va[i];
}
}


could anyone please help me with it?

Thanks a lot

PaulS
11-14-2009, 07:57 AM
Size in the c code is 10. Loop in the kernel loops 100 times. Result is that you overflow your memory objects.

With that change, and the addition of a print loop:

0 + 6 = 6
5 + 7 = 12
0 + 2 = 2
0 + 9 = 9
9 + 4 = 13
1 + 0 = 1
8 + 0 = 8
3 + 0 = 3
9 + 0 = 9
1 + 3 = 4

Runs fine on CPU and GPU on my MacBook Pro (10.6.2)

sayush
11-14-2009, 09:17 PM
well how do you run the code? Its just like running a simple .c code isn't it? or is there something else to do, because it's not changing the output when i change that to 10.
I compile the code like:

gcc -o vectoradd vectoradd.c -lOpenCL
vectoradd.c: In function ‘load_program_source’:
vectoradd.c:30: warning: incompatible implicit declaration of built-in function ‘calloc’
vectoradd.c: In function ‘main’:
vectoradd.c:52: warning: incompatible implicit declaration of built-in function ‘malloc’


and i run the code like:

cl]$ ./vectoradd


do i need to specify anything else?

sayush
11-14-2009, 11:44 PM
and one more thing. I'm connecting to a remote server and working on it using ssh, should that cause any problems in producing the output on the screen?

sayush
11-14-2009, 11:49 PM
Sorry for all the trouble folks - it was the stupid PC that causing problems, not the code in itself. Thanks a lot.