headdoggy

02-10-2013, 02:36 AM

Dear all,

I have spent some time trying to figure out my problem, but haven't succeeded so I am posting here.

Since I thought about OpenCL for the first time, I thought it would be great to implement method of lines for solving PDE equation. So I started from the very basic example - 1D hyperbolic equation (simple string oscillations).

Starting from initialization:

int fixed=1;

for (i = 0; i < loops; i++)

{

u[0][i] = fixed;

u[1][i] = fixed;

u[2][i] = fixed;

u[n-1][i] = fixed;

u[n-2][i] = fixed;

u[n-3][i] = fixed;

y[0] = 0.0;

y[n-1] = 0.0;

}

for (i = 0; i < n; i++)

{

u[i][0] = fosc (i2x (i));

u[i][1] = fosc (i2x (i));

u[i][2] = fosc (i2x (i));

input[i] = u[i][2];

y[i] = (u[i][2] - u[i][0])/2.0/dt;

}

Where function fosc gives me simple Sine function profile from -50 to 50.

float

fosc (float x)

{

float osc;

osc=1+sin(x*3.1415926/100+3.1415926/2.0); //simple string

return osc;

}

I have created following buffers for input, output and derivatives:

cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, n * sizeof(float),(void *) input, NULL);

cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE , n*loops * sizeof(float), NULL, NULL);

cl_mem devs = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, n * sizeof(float),(void *)y, NULL);

Since it is not valid to pass 2D arrays in kernel, I slice the output function with indexing

[x+SizeX*y]

for (j=0; j<loops; j++){

for (i=0; i<n; i++){

u[i][j+2]=output[i+n*j];

if (i%(n/8) == 0) {

cout << u[i][j] << " ";

}

}

cout<<endl;

}

Then I came up with the following kernel:

float

an (float fn2, float fn1, float fn, float fn11, float fn22, float dh)

{

return (-fn2 + 16.0 * fn1 - 30.0 * fn + 16.0 * fn11 - fn22) / 12.0 / dh / dh - func (fn); //4-th space order second derivative

}

__kernel void helloworld(__global float* in, __global float* out, int n, int loops, __global float* devs)

{

int t;

float dh=100.0/n;

float dt=100.0/loops;

int i = get_global_id(0);

float y[1000];

float u[1000];

float k1[1000];float k2[1000];float k3[1000];float k4[1000];

float m1[1000]; float m2[1000]; float m3[1000]; float m4[1000];

if ( (i>-1) && (i<n) ) {

u[i]=in[i]; //receiving initial field values

y[i]=devs[i]; //receiving initial derivatives

}

for (t=0; t < loops; t++) {

if ( (i>1) && (i<n-2) ) {

k1[i] =

an (u[i - 2], u[i - 1], u[i], u[i + 1], u[i + 2], dh) * dt;

m1[i] = y[i] * dt;

k2[i] =

an (u[i - 2] + m1[i - 2] / 2.0,

u[i - 1] + m1[i - 1] / 2.0, u[i] + m1[i] / 2.0,

u[i + 1] + m1[i + 1] / 2.0,

u[i + 2] + m1[i + 2] / 2.0, dh) * dt;

m2[i] = (y[i] + k1[i] / 2.0) * dt;

k3[i] =

an (u[i - 2] + m2[i - 2] / 2.0,

u[i - 1] + m2[i - 1] / 2.0, u[i] + m2[i] / 2.0,

u[i + 1] + m2[i + 1] / 2.0,

u[i + 2] + m2[i + 2] / 2.0, dh) * dt;

m3[i] = (y[i] + k2[i] / 2.0) * dt;

k4[i] =

an (u[i - 2] + m3[i - 2], u[i - 1] + m3[i - 1],

u[i] + m3[i], u[i + 1] + m3[i + 1],

u[i + 2] + m3[i + 2], dh) * dt;

m4[i] = (y[i] + k3[i]) * dt;

y[i] = y[i] + (k1[i] + 2.0 * k2[i] + 2.0 * k3[i] + k4[i]) / 6.0;

u[i] = u[i] + (m1[i] + 2.0 * m2[i] + 2.0 * m3[i] + m4[i]) / 6.0;

out[i+n*t] = u[i];

} else {

out[i+n*t] = 1; }

}

}

Now the problem is that the code is working in usual C++ program but when I moved to OpenCL it doesn't. Instead of harmonic oscillations, I can see random numbers popping up here and there.

The algorithm looks fine to me, I can not figure out the problem.

I would appreciate any help.

I have spent some time trying to figure out my problem, but haven't succeeded so I am posting here.

Since I thought about OpenCL for the first time, I thought it would be great to implement method of lines for solving PDE equation. So I started from the very basic example - 1D hyperbolic equation (simple string oscillations).

Starting from initialization:

int fixed=1;

for (i = 0; i < loops; i++)

{

u[0][i] = fixed;

u[1][i] = fixed;

u[2][i] = fixed;

u[n-1][i] = fixed;

u[n-2][i] = fixed;

u[n-3][i] = fixed;

y[0] = 0.0;

y[n-1] = 0.0;

}

for (i = 0; i < n; i++)

{

u[i][0] = fosc (i2x (i));

u[i][1] = fosc (i2x (i));

u[i][2] = fosc (i2x (i));

input[i] = u[i][2];

y[i] = (u[i][2] - u[i][0])/2.0/dt;

}

Where function fosc gives me simple Sine function profile from -50 to 50.

float

fosc (float x)

{

float osc;

osc=1+sin(x*3.1415926/100+3.1415926/2.0); //simple string

return osc;

}

I have created following buffers for input, output and derivatives:

cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, n * sizeof(float),(void *) input, NULL);

cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE , n*loops * sizeof(float), NULL, NULL);

cl_mem devs = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, n * sizeof(float),(void *)y, NULL);

Since it is not valid to pass 2D arrays in kernel, I slice the output function with indexing

[x+SizeX*y]

for (j=0; j<loops; j++){

for (i=0; i<n; i++){

u[i][j+2]=output[i+n*j];

if (i%(n/8) == 0) {

cout << u[i][j] << " ";

}

}

cout<<endl;

}

Then I came up with the following kernel:

float

an (float fn2, float fn1, float fn, float fn11, float fn22, float dh)

{

return (-fn2 + 16.0 * fn1 - 30.0 * fn + 16.0 * fn11 - fn22) / 12.0 / dh / dh - func (fn); //4-th space order second derivative

}

__kernel void helloworld(__global float* in, __global float* out, int n, int loops, __global float* devs)

{

int t;

float dh=100.0/n;

float dt=100.0/loops;

int i = get_global_id(0);

float y[1000];

float u[1000];

float k1[1000];float k2[1000];float k3[1000];float k4[1000];

float m1[1000]; float m2[1000]; float m3[1000]; float m4[1000];

if ( (i>-1) && (i<n) ) {

u[i]=in[i]; //receiving initial field values

y[i]=devs[i]; //receiving initial derivatives

}

for (t=0; t < loops; t++) {

if ( (i>1) && (i<n-2) ) {

k1[i] =

an (u[i - 2], u[i - 1], u[i], u[i + 1], u[i + 2], dh) * dt;

m1[i] = y[i] * dt;

k2[i] =

an (u[i - 2] + m1[i - 2] / 2.0,

u[i - 1] + m1[i - 1] / 2.0, u[i] + m1[i] / 2.0,

u[i + 1] + m1[i + 1] / 2.0,

u[i + 2] + m1[i + 2] / 2.0, dh) * dt;

m2[i] = (y[i] + k1[i] / 2.0) * dt;

k3[i] =

an (u[i - 2] + m2[i - 2] / 2.0,

u[i - 1] + m2[i - 1] / 2.0, u[i] + m2[i] / 2.0,

u[i + 1] + m2[i + 1] / 2.0,

u[i + 2] + m2[i + 2] / 2.0, dh) * dt;

m3[i] = (y[i] + k2[i] / 2.0) * dt;

k4[i] =

an (u[i - 2] + m3[i - 2], u[i - 1] + m3[i - 1],

u[i] + m3[i], u[i + 1] + m3[i + 1],

u[i + 2] + m3[i + 2], dh) * dt;

m4[i] = (y[i] + k3[i]) * dt;

y[i] = y[i] + (k1[i] + 2.0 * k2[i] + 2.0 * k3[i] + k4[i]) / 6.0;

u[i] = u[i] + (m1[i] + 2.0 * m2[i] + 2.0 * m3[i] + m4[i]) / 6.0;

out[i+n*t] = u[i];

} else {

out[i+n*t] = 1; }

}

}

Now the problem is that the code is working in usual C++ program but when I moved to OpenCL it doesn't. Instead of harmonic oscillations, I can see random numbers popping up here and there.

The algorithm looks fine to me, I can not figure out the problem.

I would appreciate any help.