PDA

View Full Version : problems with clcreateimage2d - empty output file



arrghh
02-17-2010, 06:59 AM
hello everyone,

im trying to develop simple program (using clCreateImage2D and read/write_image) - read bmp from file, compute it in kernel (copy in this case) and then save to another bmp file. unfortunatelly when i run program, output file is empty, no errors are given by program. i have no idea what is causing the problem.

any ideas what is wrong with my code ?

vec.cpp


#include <oclUtils.h>

const char* cSourceFile = "kernel.cl";

// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrcA; // OpenCL device source buffer A
cl_mem cmDevSrcB; // OpenCL device source buffer B
cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize[] = {512, 512};
size_t szLocalWorkSize[] = {16, 16};

size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation

shrBOOL bNoPrompt = shrFALSE;


// Main function
// ************************************************** **********
*********
int main(int argc, char **argv)
{
// get command line arg for quick test, if provided
bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");

// start logs
shrSetLogFileName ("vec.txt");

void *image = fopen("in.bmp", "rb");
void *image2 = fopen("wynik.bmp", "wb");

image = (void *)malloc(8 * (512*512*3+54));
image2 = (void *)malloc(8 * (512*512*3+54));

// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateContextFromType...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
shrLog(LOGBOTH, 0.0, "clGetContextInfo...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateCommandQueue...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;
size_t height = 512;
size_t rowpitch = 0;

cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage = clCreateImage2D(
cxGPUContext,
flags,
&format,
width,
height,
rowpitch,
image,
&ciErr1
);

cl_mem_flags flags2;
flags2 = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage2 = clCreateImage2D(
cxGPUContext, // a valid OpenCL context
flags2, // option flags [1]
&format, // image format properties [2]
width, // width of the image in pixels
height, // height of the image in pixels
rowpitch, // scan-line pitch in bytes [3]
image2, // pointer to the image data
&ciErr2 // on return, the result code
);

ciErr1 |= ciErr2;
shrLog(LOGBOTH, 0.0, "clCreateBuffer...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Read the OpenCL kernel in from source file
shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)...\n", cSourceFile);
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clBuildProgram...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create the kernel
ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);
shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);
ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);
shrLog(LOGBOTH, 0.0, "clSetKernelArg 0 - 3...\n\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// --------------------------------------------------------
// Start Core sequence... copy input data to GPU, compute, copy results back

const size_t origin[] = {0, 0, 0};
const size_t region[] = {1, 1, 1};

ciErr1 = clEnqueueWriteImage (
cqCommandQue,
myClImage, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin,// const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);

if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Launch kernel
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (copy)...\n");
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

//collect results
ciErr1 = clEnqueueReadImage (
cqCommandQue,
myClImage2, // cl_mem image,
CL_TRUE, // cl_bool blocking_read,
origin, // const size_t origin[3],
region, // const size_t region[3],
0, // size_t row_pitch,
0, // size_t slice_pitch,
image2, // void *ptr,
0, // cl_uint num_events_in_wait_list,
NULL, // const cl_event *event_wait_list,
NULL // cl_event *event)
);
if (ciErr1 != CL_SUCCESS)
{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}


FILE *nk = fopen("wynik.bmp", "wb");
fwrite(image2, 1, sizeof(8*(512*512*3+54)), nk);

shrLog(LOGBOTH, 0.0, "END \n\n");
}


kernel.cl



__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)
{
const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CL AMP|CLK_FI
LTER_NEAREST;
int gid0 = get_global_id(0);
int gid1 = get_global_id(1);
uint4 pixel;
pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid 1));
write_imageui (imageOut,(int2)(gid0,gid1),pixel);
}

as input file i use standart lena file (renamed to in.bmp) from http://www.bilsen.com/aic/tests/lena/lena.bmp

dbs2
02-20-2010, 04:33 AM
It looks like your region for writing and reading the image is 1x1x1, so you'll only get 1 pixel. Also, make sure the image format you are using is supported by the device. I've heard there is a bug in the Nvidia drivers that does not report an error when you try to create an unsupported image type.

arrghh
02-20-2010, 02:55 PM
thank you for the reply. i've changed region size to {widht, height,1} and also add a few tweaks but still i cant solve my problem. now my output image is filled with random noise in 1/3 of height, the rest is black. i dont know exactly how to pass an image to program.
i've tried to read image as 'unsigned char[width][height]', but as i mentioned before its not working properly....

code below is without errcode flags, but i've checked them all, and theres no error given.



#include <oclUtils.h>

const char* cSourceFile = "kernel.cl";

// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrcA; // OpenCL device source buffer A
cl_mem cmDevSrcB; // OpenCL device source buffer B
cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize[2];
size_t szLocalWorkSize[2];

size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation

// Main function
// ************************************************** *******************
int main(int argc, char **argv)
{

size_t result=0;
unsigned char header [54];

FILE *input = fopen("in.bmp", "rb");
// read and store header
result = fread(header,1,54,input);
// move to start of data
fseek (input, 54, SEEK_SET);

unsigned char tab [512][512];
//read data from bmp file
int i, j;
for (i=0; i < 512; i++) {
for (j=0; j < 512; j++) {
result = fread (&tab[i][j],1,3,input);
}
}

fclose(input);
void * image= (char*)fopen("face.bmp", "rb");
//image = (void *)tab;

unsigned char tab2 [512][512];

void *image2;
image2 = (void *)tab2;

// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);

// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM
size_t width = 512;
size_t height = 512;
size_t rowpitch = 0;

cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR; // | CL_MEM_ALLOC_HOST_PTR;//CL_MEM_USE_HOST_PTR;

cl_mem myClImage = clCreateImage2D (
cxGPUContext,
flags,
&format,
width,
height,
rowpitch,
image,
&ciErr1
);

cl_mem_flags flags2;
flags2 = CL_MEM_WRITE_ONLY; // | CL_MEM_ALLOC_HOST_PTR;// | CL_MEM_USE_HOST_PTR;

cl_mem myClImage2 = clCreateImage2D (
cxGPUContext, // a valid OpenCL context
flags2, // option flags [1]
&format, // image format properties [2]
width, // width of the image in pixels
height, // height of the image in pixels
rowpitch, // scan-line pitch in bytes [3]
image2, // pointer to the image data
&ciErr1 // on return, the result code
);

// Read the OpenCL kernel in from source file
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);

// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);

// Create the kernel
ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);

// Set the Argument values
ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);
ciErr1 = clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);

// Start Core sequence... copy input data to GPU, compute, copy results back
size_t origin[3];
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;

size_t region[3];
region[0] = width;
region[1] = height;
region[2] = 1;

ciErr1 = clEnqueueWriteImage (
cqCommandQue, //cl_command_queue command_queue,
myClImage, //cl_mem image,
CL_TRUE, //cl_bool blocking_write,
origin, //const size_t origin[3],
region, //const size_t region[3],
width * sizeof(char),//size_t input_row_pitch, width * sizeof(unsigned char) * 4
0, //size_t input_slice_pitch,
image, //const void * ptr,
0, //cl_uint num_events_in_wait_list,
NULL, //const cl_event *event_wait_list,
NULL //cl_event *event
);

szGlobalWorkSize[0] = 512;
szGlobalWorkSize[1] = 512;
szLocalWorkSize[0] = 16;
szLocalWorkSize[1] = 16;

// Launch kernel
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
clFinish(cqCommandQue);

// colect data
ciErr1 = clEnqueueReadImage (
cqCommandQue, //cl_command_queue command_queue,
myClImage2, //cl_mem image,
CL_TRUE, //cl_bool blocking_read,
origin, //const size_t origin[3],
region, //const size_t region[3],
width * sizeof(char) , //size_t row_pitch,
0, //size_t slice_pitch,
image2, //void *ptr,
0, //cl_uint num_events_in_wait_list,
NULL, //const cl_event *event_wait_list,
NULL //cl_event *event
);
// save data to file
FILE *nk = fopen("wynik.bmp", "wb");
fwrite(header,1,54,nk);
fwrite(image2, sizeof(char), (512*512), nk);
fclose(nk);

}

dbs2
02-21-2010, 10:11 AM
Are you sure the image data you're using when you create the image shouldn't be pointing to the tab variable? It seems like you read your data into tab but then use image for the image.

arrghh
02-21-2010, 11:21 AM
yes, it should pointing the tab :D . i've solved it already. now in dealing with another issue http://www.khronos.org/message_boards/viewtopic.php?f=37&t=2495