PDA

View Full Version : Help: Adding two arrays



Rui
03-20-2010, 09:22 AM
I'm trying to write my first program, that adds two arrays, but it's not working. It's giving error on func.wait() saying "Access violation reading location 0x00000000" and I don't know why. What am I doing wrong?


#define __NO_STD_VECTOR
#define __NO_STD_STRING

#include <malloc.h>
#include <cl.hpp>
#include <iostream>

using namespace std;
using namespace cl;

const char *kernelSrc = "__kernel void"
"vectorAdd(__global const int *a,"
"__global const int *b,"
"__global int *resultado)"
"{"
"// Vector element index"
"int nIndex = get_global_id(0);"
"resultado[nIndex] = a[nIndex] + b[nIndex];"
"}";


int main(void)
{
const int size = 4;

// get platform
vector<Platform> platformList;
Platform::get(&platformList);
cout << "Platform number is: " << platformList.size() << endl;

// show platform info
cl::string platformInfo;
platformList[0].getInfo(CL_PLATFORM_VERSION, &platformInfo);
cout << "Platform Info: " << platformInfo.c_str() << endl;

// get available devices
vector<Device> devices;
platformList[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
cout << devices.size() << " device(s) found!" << endl;

// show device info
cl::string name;
devices[0].getInfo(CL_DEVICE_NAME, &name);
cout << "Device name: " << name.c_str() << endl;

// create OpenCL context
Context hContext(devices, 0, NULL, NULL, NULL);

// queue
CommandQueue queue(hContext, devices[0], 0, 0);

// Load source
cl::Program::Sources source(1, std::make_pair(kernelSrc,strlen(kernelSrc)));

// create and build program
Program program(hContext, source, NULL);
program.build(devices, NULL, NULL, NULL);

// Get the kernel so we can use it
Kernel kernel(program, "vectorAdd", NULL);

// arrays on host
int *a = new int[size];
int *b = new int[size];
int *c = new int[size];

// initialize
a[0]=1; a[1]=2; a[2]=20; a[3]=8;
b[0]=29; a[1]=28; a[2]=10; a[3]=22;

// result should be {30, 30, 30, 30}

// Allocate memory on device and copy the arrays there
Buffer bufferA(hContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * size, a, NULL);
Buffer bufferB(hContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * size, b, NULL);

// Create buffer to get output on kernel
Buffer bufferResultado(hContext, CL_MEM_WRITE_ONLY, sizeof(int)*size, NULL, NULL);

// set arguments
kernel.setArg(0, bufferA);
kernel.setArg(1, bufferB);
kernel.setArg(2, bufferResultado);

// execute kernel
NDRange global(size);
NDRange local(2);
KernelFunctor func = kernel.bind(queue, global, local);

func().wait(); // error here.............................................. .................................................. .


cout << "Done!" << endl;

// copy from device to host
queue.enqueueReadBuffer(bufferResultado, CL_TRUE, 0, sizeof(int)*size, c, NULL, NULL);

for(int i=0; i<size; i++)
cout << c[i] << endl;

getchar();
return EXIT_SUCCESS;
}

ssarangi
03-24-2010, 07:57 PM
I am not sure whether I am answering you correctly or not since I am a newbie myself but the only thing which I didn't understand was that why have you made the local(2) just before you execute the kernel. You seem to have 4 elements and your global work size is 4 so why is the local work size (2). Might be changing that to 0 could solve your problem. This is just a thought...