GPUworker

02-04-2011, 10:38 AM

Dear all,

I am trying to write a code for image processing.

I want to write one kernel and want to call in another kernel. Is it possible?

I came to know that we can call functions. I also want to try the kernel also.

Please help me in this regard.

Here is the situation in which I am now.

//creatematrix.cl

float ** createMat(int iRows, int iColumns, float Initvalue)

{

float **pMatrix;

pMatrix = (float**)malloc(sizeof(float*) * iRows);

for (int i = 0; i < iRows; i++)

{

pMatrix[i] = (float*)malloc(sizeof(float) * iColumns);

}

for (int i = 0; i < iRows; i++)

{

for (int j = 0; j < iColumns; j++)

{

pMatrix[i][j] = Initvalue;

}

}

return pMatrix;

}

// Convolution.cl

__kernel float* IMConvolution(const __global float * pInput,

__constant float * pFilter,

const int nInWidth,

const int nFilterWidth,

const int nWidth)

{

float *pOutput;

//const int nWidth = get_global_size(0);

const int xOut = get_global_id(0);

const int yOut = get_global_id(1);

const int xInTopLeft = xOut;

const int yInTopLeft = yOut;

float sum = 0;

for (int r = 0; r < nFilterWidth; r++)

{

const int idxFtmp = r * nFilterWidth;

const int yIn = yInTopLeft + r;

const int idxIntmp = yIn * nInWidth + xInTopLeft;

for (int c = 0; c < nFilterWidth; c++)

{

const int idxF = idxFtmp + c;

const int idxIn = idxIntmp + c;

sum += pFilter[idxF]*pInput[idxIn];

}

} //for (int r = 0...

/*if (sum > 1)

sum = 1;

if (sum < -1)

sum = -1;*/

const int idxOut = yOut * nWidth + xOut;

pOutput[idxOut] = sum;

return(pOutput);

}

// CNNonGPU.cl

#include "Convolution.cl"

#include "creatematrix.cl"

__kernel void CNNonGPU(const __global float * pInput,

__constant float * tempA,

__constant float * tempB,

__global float * pOutput,

const int nInWidth,

const int nInHeight,

const int nFilterWidth,

const int iterations)

{

const int nWidth = get_global_size(0);

float *TBimg, *Yimg, *extYimg, *TAimg;

float **tempimg;

int Elements = 0;

int inElements = 0;

float *sum;

Elements = nWidth * nWidth;

inElements = nInWidth * nInHeight;

TBimg = new float [Elements];

TAimg = new float [Elements];

sum = new float [Elements];

TBimg = IMConvolution(pInput,tempB,nInWidth,nFilterWidth,n Width);

extYimg = pInput;

for(int i = 0; i < iterations;i++)

{

Yimg = extYimg;

TAimg = IMConvolution(Yimg,tempA,nInWidth,nFilterWidth,nWi dth);

for(int j = 0; j < Elements;j++)

{

sum[j] = TBimg[j] + TAimg[j] + bias;

}

tempimg = createMat(nInWidth,nInHeight,0);

int ind = 0;

for (int i = 1; i < iHeightExtended-1; i++)

{

for (int j = 1; j < iWidthExtended-1; j++)

{

tempimg[i][j] = 0.5 * ((abs(sum[ind] + 1) - (abs(sum[ind] - 1)));

ind++;

}

}

ind = 0;

for(int j = 0; j < nInHeight; j++)

{

for(int i = 0; i < nInWidth;i++)

{

extYimg[ind] = tempimg[i][j];

ind++;

}

}

}

ind = 0;

for (int i = 1; i < iHeightExtended-1; i++)

{

for (int j = 1; j < iWidthExtended-1; j++)

{

pOutput[ind] = tempimg[i][j];

ind++;

}

}

}

Here in CNNonGPU I want to use the convolution kernel many times. Some time it may be 1000 time or even more.

I also want to use the create matrix function once. Is it possible to do so?

I tried to execute the above one by calling the CNNonGPU from main(CPU). I am getting error at clBuildProgram. (Error is :CL_BUILD_PROGRAM_FAILURE)

Thanks in advance.

I am trying to write a code for image processing.

I want to write one kernel and want to call in another kernel. Is it possible?

I came to know that we can call functions. I also want to try the kernel also.

Please help me in this regard.

Here is the situation in which I am now.

//creatematrix.cl

float ** createMat(int iRows, int iColumns, float Initvalue)

{

float **pMatrix;

pMatrix = (float**)malloc(sizeof(float*) * iRows);

for (int i = 0; i < iRows; i++)

{

pMatrix[i] = (float*)malloc(sizeof(float) * iColumns);

}

for (int i = 0; i < iRows; i++)

{

for (int j = 0; j < iColumns; j++)

{

pMatrix[i][j] = Initvalue;

}

}

return pMatrix;

}

// Convolution.cl

__kernel float* IMConvolution(const __global float * pInput,

__constant float * pFilter,

const int nInWidth,

const int nFilterWidth,

const int nWidth)

{

float *pOutput;

//const int nWidth = get_global_size(0);

const int xOut = get_global_id(0);

const int yOut = get_global_id(1);

const int xInTopLeft = xOut;

const int yInTopLeft = yOut;

float sum = 0;

for (int r = 0; r < nFilterWidth; r++)

{

const int idxFtmp = r * nFilterWidth;

const int yIn = yInTopLeft + r;

const int idxIntmp = yIn * nInWidth + xInTopLeft;

for (int c = 0; c < nFilterWidth; c++)

{

const int idxF = idxFtmp + c;

const int idxIn = idxIntmp + c;

sum += pFilter[idxF]*pInput[idxIn];

}

} //for (int r = 0...

/*if (sum > 1)

sum = 1;

if (sum < -1)

sum = -1;*/

const int idxOut = yOut * nWidth + xOut;

pOutput[idxOut] = sum;

return(pOutput);

}

// CNNonGPU.cl

#include "Convolution.cl"

#include "creatematrix.cl"

__kernel void CNNonGPU(const __global float * pInput,

__constant float * tempA,

__constant float * tempB,

__global float * pOutput,

const int nInWidth,

const int nInHeight,

const int nFilterWidth,

const int iterations)

{

const int nWidth = get_global_size(0);

float *TBimg, *Yimg, *extYimg, *TAimg;

float **tempimg;

int Elements = 0;

int inElements = 0;

float *sum;

Elements = nWidth * nWidth;

inElements = nInWidth * nInHeight;

TBimg = new float [Elements];

TAimg = new float [Elements];

sum = new float [Elements];

TBimg = IMConvolution(pInput,tempB,nInWidth,nFilterWidth,n Width);

extYimg = pInput;

for(int i = 0; i < iterations;i++)

{

Yimg = extYimg;

TAimg = IMConvolution(Yimg,tempA,nInWidth,nFilterWidth,nWi dth);

for(int j = 0; j < Elements;j++)

{

sum[j] = TBimg[j] + TAimg[j] + bias;

}

tempimg = createMat(nInWidth,nInHeight,0);

int ind = 0;

for (int i = 1; i < iHeightExtended-1; i++)

{

for (int j = 1; j < iWidthExtended-1; j++)

{

tempimg[i][j] = 0.5 * ((abs(sum[ind] + 1) - (abs(sum[ind] - 1)));

ind++;

}

}

ind = 0;

for(int j = 0; j < nInHeight; j++)

{

for(int i = 0; i < nInWidth;i++)

{

extYimg[ind] = tempimg[i][j];

ind++;

}

}

}

ind = 0;

for (int i = 1; i < iHeightExtended-1; i++)

{

for (int j = 1; j < iWidthExtended-1; j++)

{

pOutput[ind] = tempimg[i][j];

ind++;

}

}

}

Here in CNNonGPU I want to use the convolution kernel many times. Some time it may be 1000 time or even more.

I also want to use the create matrix function once. Is it possible to do so?

I tried to execute the above one by calling the CNNonGPU from main(CPU). I am getting error at clBuildProgram. (Error is :CL_BUILD_PROGRAM_FAILURE)

Thanks in advance.