hello everyone,

im trying to develop simple program (using clCreateImage2D and read/write_image) - read bmp from file, compute it in kernel (copy in this case) and then save to another bmp file. unfortunatelly when i run program, output file is empty, no errors are given by program. i have no idea what is causing the problem.

any ideas what is wrong with my code ?

vec.cpp
#include <oclUtils.h>

const char* cSourceFile = "kernel.cl";

// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrcA; // OpenCL device source buffer A
cl_mem cmDevSrcB; // OpenCL device source buffer B
cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize[] = {512, 512};
size_t szLocalWorkSize[] = {16, 16};

size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation

shrBOOL bNoPrompt = shrFALSE;


// Main function
// ************************************************** **********
*********
int main(int argc, char **argv)
{
// get command line arg for quick test, if provided
bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");

// start logs
shrSetLogFileName ("vec.txt");

void *image = fopen("in.bmp", "rb");
void *image2 = fopen("wynik.bmp", "wb");

image = (void *)malloc(8 * (512*512*3+54));
image2 = (void *)malloc(8 * (512*512*3+54));

// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateContextFromType...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
shrLog(LOGBOTH, 0.0, "clGetContextInfo...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateCommandQueue...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;
size_t height = 512;
size_t rowpitch = 0;

cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage = clCreateImage2D(
cxGPUContext,
flags,
&format,
width,
height,
rowpitch,
image,
&ciErr1
);

cl_mem_flags flags2;
flags2 = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage2 = clCreateImage2D(
cxGPUContext, // a valid OpenCL context
flags2, // option flags [1]
&format, // image format properties [2]
width, // width of the image in pixels
height, // height of the image in pixels
rowpitch, // scan-line pitch in bytes [3]
image2, // pointer to the image data
&ciErr2 // on return, the result code
);

ciErr1 |= ciErr2;
shrLog(LOGBOTH, 0.0, "clCreateBuffer...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Read the OpenCL kernel in from source file
shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)...\n", cSourceFile);
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clBuildProgram...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create the kernel
ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);
ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);
shrLog(LOGBOTH, 0.0, "clSetKernelArg 0 - 3...\n\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// --------------------------------------------------------
// Start Core sequence... copy input data to GPU, compute, copy results back

const size_t origin[] = {0, 0, 0};
const size_t region[] = {1, 1, 1};

ciErr1 = clEnqueueWriteImage (
cqCommandQue,
myClImage, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin,// const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Launch kernel
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (copy)...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

//collect results
ciErr1 = clEnqueueReadImage (
cqCommandQue,
myClImage2, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin, // const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image2, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}


FILE *nk = fopen("wynik.bmp", "wb");
fwrite(image2, 1, sizeof(8*(512*512*3+54)), nk);

shrLog(LOGBOTH, 0.0, "END \n\n");
}
kernel.cl

__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)
{
const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CL AMP|CLK_FI
LTER_NEAREST;
int gid0 = get_global_id(0);
int gid1 = get_global_id(1);
uint4 pixel;
pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid 1));
write_imageui (imageOut,(int2)(gid0,gid1),pixel);
}
as input file i use standart lena file (renamed to in.bmp) from http://www.bilsen.com/aic/tests/lena/lena.bmp