PDA

View Full Version : matrix addition problem



bubu
04-14-2011, 07:41 AM
Hi!

I'm having some trouble with this code:



#include <oclUtils.h>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <CL/cl.h>
#include <conio.h>

#define N 10

void write(int A[N][N])
{
int s = 0;
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
if (s > N-1)
{
printf("\n");
s = 0;
}
printf(" %d", A[i][j]);
s++;
}
}
printf("\n \n");
}

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)\n",
"{\n",
" unsigned int x = get_global_id(0);\n",
" unsigned int y = get_global_id(1);\n",
" \n",
" c[x] = a[x] + b[x];\n",
"}\n"
};

int main(int argc, const char** argv)
{
int host_vector1[N][N], host_vector2[N][N];
int host_vector[N][N];

for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
{
host_vector1[i][j] = j;
host_vector2[i][j] = j;
}
}

write(host_vector1);
write(host_vector2);

cl_int error = 0;

cl_uint numPlatforms;
cl_platform_id* clSelectedPlatformID = NULL;
clGetPlatformIDs(0, NULL, &numPlatforms);
clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num Platforms);
error = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
if(error != CL_SUCCESS)
return 0;

cl_uint ciDeviceCount;
cl_device_id* clDevices = NULL;
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 0, NULL, &ciDeviceCount);

clDevices = (cl_device_id*) malloc(sizeof(cl_device_id) * ciDeviceCount);
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, ciDeviceCount, clDevices, &ciDeviceCount);

cl_context GPU_context = clCreateContext(0, 1, clDevices, NULL, NULL, &error);

cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPU_context, clDevices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
cl_event event1;

cl_mem GPU_vector1 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector1, NULL);
cl_mem GPU_vector2 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector2, NULL);
cl_mem GPU_result_vector = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(int) * N*N, NULL, NULL);

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

error = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);

cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "matrixAdd", NULL);
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPU_result_vector);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPU_vector1);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPU_vector2);

size_t WorkSize[2] = {N, N};
cl_int temp = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 2, NULL, WorkSize, NULL, 0, NULL, &event1);

clEnqueueReadBuffer(GPUCommandQueue, GPU_result_vector, CL_TRUE, 0, N*N * sizeof(int), host_vector, 0, NULL, NULL);

write(host_vector);

_getch();

return 0;
}


It runs now, but if I remove the
" \n" line form the kernel, the program crashes. Also my original goal is a simple matrix addition, but if I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel
c[x][y] = a[x][y] + b[x][y]; it just won't work. I get a matrix filled with zeros.

Any thoughts?

david.garcia
04-14-2011, 10:01 AM
Where does the program crash? How does it crash (any error messages?)?

As general advice, I would replace:

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

with

[code]
cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);
[code]

As for "If I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel it just won't work", the code you posted is syntactically incorrect. Have you checked whether clBuildProgram() returns any compilation errors?

bubu
04-14-2011, 10:42 AM
Where does the program crash? How does it crash (any error messages?)?
The message was:
Unhandled exception at 0x7593b760 in ocl1.exe: Microsoft C++ exception: std::bad_alloc at memory location 0x0012f878..

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);
With this, that problem is gone. THX!



I haven't checked clBuildProgram for compilation errors. You mean this is incorrect?


const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)\n",
"{\n",
" unsigned int x = get_global_id(0);\n",
" unsigned int y = get_global_id(1);\n",
" c[x][y] = a[x][y] + b[x][y];\n",
"}\n"
};

fangq
04-15-2011, 11:53 AM
I haven't checked clBuildProgram for compilation errors. You mean this is incorrect?

you can use clGetProgramBuildInfo with the CL_PROGRAM_BUILD_LOG option to get detailed compilation error/warning messages.

http://www.khronos.org/registry/cl/sdk/ ... dInfo.html (http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clGetProgramBuildInfo.html)

bubu
04-16-2011, 01:44 AM
I found a solution (which I will post a bit later), thx for the replies. Obviously my kernel was wrong, but my problem was, that I wasn't even sure, if my host code was OK.

paviraj_1989
04-23-2011, 10:27 AM
Hello Bubu,
as per i know, for new line in kernel function us delimiter \ instead of \n; 2 dimensional matrix operation wont work here.


praveenraj
BE student
Pesit

david.garcia
04-23-2011, 01:29 PM
as per i know, for new line in kernel function us delimiter \ instead of \n

That is not correct. Newlines (\n) in OpenCL C work the same as in C99.

Backslash (\) is the line continuation character in the preprocessor, which is a different thing.