# Thread: matrix addition problem

Hi!

I'm having some trouble with this code:

Code :
```#include <oclUtils.h>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <CL/cl.h>
#include <conio.h>

#define N 10

void write(int A[N][N])
{
int s = 0;
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
if (s > N-1)
{
printf("\n");
s = 0;
}
printf(" %d", A[i][j]);
s++;
}
}
printf("\n \n");
}

const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)\n",
"{\n",
"       unsigned int x = get_global_id(0);\n",
"       unsigned int y = get_global_id(1);\n",
"       \n",
"       c[x] = a[x] + b[x];\n",
"}\n"
};

int main(int argc, const char** argv)
{
int host_vector1[N][N], host_vector2[N][N];
int host_vector[N][N];

for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
{
host_vector1[i][j] = j;
host_vector2[i][j] = j;
}
}

write(host_vector1);
write(host_vector2);

cl_int error = 0;

cl_uint numPlatforms;
cl_platform_id* clSelectedPlatformID = NULL;
clGetPlatformIDs(0, NULL, &numPlatforms);
clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
error = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
if(error != CL_SUCCESS)
return 0;

cl_uint ciDeviceCount;
cl_device_id* clDevices =  NULL;
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 0, NULL, &ciDeviceCount);

clDevices = (cl_device_id*) malloc(sizeof(cl_device_id) * ciDeviceCount);
error = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, ciDeviceCount, clDevices, &ciDeviceCount);

cl_context GPU_context = clCreateContext(0, 1, clDevices, NULL, NULL, &error);

cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPU_context, clDevices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
cl_event event1;

cl_mem GPU_vector1 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector1, NULL);
cl_mem GPU_vector2 = clCreateBuffer(GPU_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * N*N, host_vector2, NULL);
cl_mem GPU_result_vector = clCreateBuffer(GPU_context, CL_MEM_WRITE_ONLY, sizeof(int) * N*N, NULL, NULL);

cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);

error = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);

cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "matrixAdd", NULL);
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&GPU_result_vector);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPU_vector1);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPU_vector2);

size_t WorkSize[2] = {N, N};
cl_int temp = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 2, NULL, WorkSize, NULL, 0, NULL, &event1);

clEnqueueReadBuffer(GPUCommandQueue, GPU_result_vector, CL_TRUE, 0, N*N * sizeof(int), host_vector, 0, NULL, NULL);

write(host_vector);

_getch();

return 0;
}```

It runs now, but if I remove the
Code :
`"       \n"`
line form the kernel, the program crashes. Also my original goal is a simple matrix addition, but if I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel
Code :
`c[x][y] = a[x][y] + b[x][y];`
it just won't work. I get a matrix filled with zeros.

Any thoughts?

2. ## Re: matrix addition problem

Where does the program crash? How does it crash (any error messages?)?

As general advice, I would replace:
Code :
`cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, 7, OpenCLSource1, NULL, &error);`

with

[code]
cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);
[code]

As for "If I add this (instead of the current c[x] = a[x] + b[x]; line) to the kernel it just won't work", the code you posted is syntactically incorrect. Have you checked whether clBuildProgram() returns any compilation errors?

3. ## Re: matrix addition problem

Where does the program crash? How does it crash (any error messages?)?
The message was:
Unhandled exception at 0x7593b760 in ocl1.exe: Microsoft C++ exception: std::bad_alloc at memory location 0x0012f878..
Code :
`cl_program OpenCLProgram = clCreateProgramWithSource(GPU_context, sizeof(OpenCLSource1)/sizeof(OpenCLSource1[0]), OpenCLSource1, NULL, &error);`
With this, that problem is gone. THX!

I haven't checked clBuildProgram for compilation errors. You mean this is incorrect?

Code :
```const char* OpenCLSource1[] = {
"__kernel void matrixAdd(__global int* c, __global int* a, __global int* b)\n",
"{\n",
"       unsigned int x = get_global_id(0);\n",
"       unsigned int y = get_global_id(1);\n",
"       c[x][y] = a[x][y] + b[x][y];\n",
"}\n"
};```

4. ## Re: matrix addition problem

Originally Posted by bubu
I haven't checked clBuildProgram for compilation errors. You mean this is incorrect?
you can use clGetProgramBuildInfo with the CL_PROGRAM_BUILD_LOG option to get detailed compilation error/warning messages.

http://www.khronos.org/registry/cl/sdk/ ... dInfo.html

5. ## Re: matrix addition problem

I found a solution (which I will post a bit later), thx for the replies. Obviously my kernel was wrong, but my problem was, that I wasn't even sure, if my host code was OK.

6. ## Re: matrix addition problem

Hello Bubu,
as per i know, for new line in kernel function us delimiter \ instead of \n; 2 dimensional matrix operation wont work here.

praveenraj
BE student
Pesit

7. ## Re: matrix addition problem

as per i know, for new line in kernel function us delimiter \ instead of \n
That is not correct. Newlines (\n) in OpenCL C work the same as in C99.

Backslash (\) is the line continuation character in the preprocessor, which is a different thing.

