Results 1 to 3 of 3

Thread: One Code - Three different output images

  1. #1
    Junior Member
    Join Date
    Nov 2010
    Posts
    14

    One Code - Three different output images

    Hello,

    I've just started with OpenCL and a problem I can't figure out how to solve:

    I want to implement a Gauss-Filter in the kernel. I use Image2D-objects for the source- and destination-image.

    My problem is, when I repeatedly execute the same exact code (of course execution is after compiling) I get three different output images:
    1. the source image
    2. the blurred image (that's what always should be the output)
    3. a fully black image

    I would really appreciate your help. Thank you very much.

    Here comes the code:
    device_gauss.cl:
    Code :
    __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE
    		| CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
     
    __kernel void gauss(__read_only image2d_t src, __write_only image2d_t dst, __global float *weighter,
    		int offset, int rows, int cols) {
     
    	int i,j;
    	float sum = 0.0f;
    	float norm = 0.0f;
     
    	//Get coordinates
    	int gid0 = get_global_id(0);
    	int gid1 = get_global_id(1);
    	float4 pixel;
    	float4 pixeltemp;
    	int dist;
    	pixel=read_imagef(src,sampler,(int2)(gid0,gid1));
    	for (i = -offset; i <= offset; i++) {
    		for (j = -offset; j <= offset; j++) {
    			// sum up the color values of the neighbour pixels
     
    				pixeltemp = read_imagef(src,sampler,(int2)(gid0+i,gid1 +j));
    				dist = (int) (sqrt((i*i)+(j*j))+0.5);
    				sum += weighter[dist] * pixeltemp.x;
     
    				norm += weighter[dist];
     
    		}
    	}
    	pixel.x = (sum)/(norm);
     
    	if(norm>0.0f)
    		write_imagef (dst,(int2)(gid0, gid1),pixel);
     
     
    }

    host_gauss.c
    Code :
    #include <CL/cl.h>
    #include <stdio.h>
    #include <sys/stat.h>
    #include <math.h>
    #include "load_write_pgm.h"
     
    #define DIMENSION 2
    #define WEIGHTER_SIZE 13
    #define MILLION 1000000.0
     
    void check(cl_int*);
    void copyimg_to_linear(float *src, float *dst, struct imageMatrix* img);
    void save_1dimage(float *src, struct imageMatrix * img, char* filename);
    void do2Dto1D(struct imageMatrix* source, float* destination);
    void do1Dto2D(float* source, struct imageMatrix* destination);
    void init_weight_array(size_t size, float* weighter);
     
    int main() {
    	cl_int err;
    	cl_event event;
    	cl_uint amount;
    	int i;
     
    	//Get platforms
    	clGetPlatformIDs(NULL, NULL, &amount);
    	cl_platform_id *platform = (cl_platform_id*) malloc(amount
    			* sizeof(cl_platform_id));
    	err = clGetPlatformIDs(amount, platform, NULL);
    	check(&err);
     
    	//Get devices of first platform
    	clGetDeviceIDs(*platform, CL_DEVICE_TYPE_GPU, NULL, NULL, &amount);
    	cl_device_id *devices = (cl_device_id*) malloc(amount
    			* sizeof(cl_device_id));
    	err = clGetDeviceIDs(*platform, CL_DEVICE_TYPE_GPU, amount, devices, NULL);
    	check(&err);
     
    	//Get MaxWorkGroupSize
    	size_t workgroup = 0;
    	err = clGetDeviceInfo(*devices, CL_DEVICE_MAX_WORK_GROUP_SIZE,
    			sizeof(workgroup), &workgroup, NULL);
    	check(&err);
    	int workgroupsize = (int) workgroup;
     
    	//Create a context
    	cl_context ctx;
    	cl_context_properties props[] = { CL_CONTEXT_PLATFORM,
    			(cl_context_properties) *platform, NULL };
    	ctx = clCreateContext(props, amount, devices, NULL, NULL, &err);
    	check(&err);
     
    	//Create a command queue for all devices associated with the first platform
    	cl_command_queue *queues = (cl_command_queue*) malloc(amount * sizeof(cl_command_queue));
    	for (i = 0; i < amount; i++) {
    		queues[i] = clCreateCommandQueue(ctx, devices[i],
    				CL_QUEUE_PROFILING_ENABLE, &err);
    	}
     
    	//Read source from file
    	FILE *f;
    	f = fopen("kernels/device_gauss.cl", "r");
    	struct stat finfo;
    	if (f == NULL) {
    		printf("ERROR: Could not load file. \n");
    		exit(EXIT_FAILURE);
    	}
    	stat("kernels/device_gauss.cl", &finfo);
    	char *buffer = (char*) malloc(finfo.st_size + 1);
    	char c;
    	i = 0;
    	while ((c = getc(f)) != EOF) {
    		buffer[i] = c;
    		i++;
    	}
    	buffer[i] = '\0';
     
    	//Create program by source
    	cl_program program;
    	program = clCreateProgramWithSource(ctx, 1, &buffer, NULL, &err);
    	check(&err);
     
    	//Build program
    	err = clBuildProgram(program, NULL, NULL, NULL, NULL, NULL);
    	check(&err);
    	//Create kernel object
    	cl_kernel gauss_kernel;
    	gauss_kernel = clCreateKernel(program, "gauss", &err);
    	check(&err);
     
    	//Start measuring
    	cl_ulong start;
    	clEnqueueMarker(queues[0], &event);
    	clFinish(queues[0]);
    	clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
    			sizeof(cl_ulong), &start, NULL);
     
    	//Create buffers
    	cl_mem d_weighter;
    	float* h_src, *h_dst, *weighter;
    	struct imageMatrix img;
    	int dim[2];
    	char* filename = "pictures/pgm.pgm";
    	int offset = WEIGHTER_SIZE / 2;
    	//method to load .pgm-file
    	load_pgm_image(&img, filename);
    	dim[0] = img.i_rows;
    	dim[1] = img.i_cols;
    	//Host-Source-Image
    	h_src = (float*) malloc(dim[0] * dim[1] * sizeof(float));
    	//Host-Destination-Image
    	h_dst = (float*) malloc(dim[0] * dim[1] * sizeof(float));
    	//Copy 2D-Image to a 1D-Array
    	copyimg_to_linear(h_src, h_dst, &img);
    	//fill destination-image with zeros
    	h_dst = (float*) calloc(dim[0] * dim[1], sizeof(float));
     
    	//********Create IMG Objects**********
    	cl_mem d_src_2d;
    	cl_mem d_dst_2d;
     
    	cl_image_format format;
    	format.image_channel_order = CL_R;
    	format.image_channel_data_type = CL_FLOAT;
    	printf("%i\n", (int) sizeof(CL_UNSIGNED_INT8));
    	int num_channels_per_pixel = 1;
    	int channel_size = (int) sizeof(CL_UNSIGNED_INT8);
    	int pixel_size = num_channels_per_pixel * channel_size;
     
    	//Create Image2D-Objects für source and destination
    	d_src_2d = clCreateImage2D(ctx, CL_MEM_READ_ONLY, &format, dim[1], dim[0],
    			0, NULL, &err);
    	check(&err);
    	d_dst_2d = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &format, dim[1], dim[0],
    			0, NULL, &err);
    	check(&err);
     
    	//create weight for neighbor-pixel
    	weighter = (float *) malloc(WEIGHTER_SIZE * sizeof(float));
    	init_weight_array(WEIGHTER_SIZE, weighter);
    	d_weighter = clCreateBuffer(ctx, CL_MEM_ALLOC_HOST_PTR
    			| CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, WEIGHTER_SIZE
    			* sizeof(float), weighter, &err);
    	check(&err);
     
    	//Set kernel parameter
    	err = clSetKernelArg(gauss_kernel, 0, sizeof(cl_mem), (void*) &d_src_2d);
    	check(&err);
    	err = clSetKernelArg(gauss_kernel, 1, sizeof(cl_mem), (void*) &d_dst_2d);
    	check(&err);
    	err = clSetKernelArg(gauss_kernel, 2, sizeof(cl_mem), &d_weighter);
    	check(&err);
    	err = clSetKernelArg(gauss_kernel, 3, sizeof(int), &offset);
    	check(&err);
    	err = clSetKernelArg(gauss_kernel, 4, sizeof(int), &dim[1]);
    	check(&err);
    	err = clSetKernelArg(gauss_kernel, 5, sizeof(int), &dim[0]);
    	check(&err);
     
    	//Copy image to device
    	size_t origin[] = { 0, 0, 0 };
    	size_t region[] = { dim[1], dim[0], 1 };
    	err = clEnqueueWriteImage(queues[0], d_src_2d, CL_TRUE, origin, region, 0,
    			0, h_src, 0, NULL, &event);
    	check(&err);
     
    	err = clEnqueueWriteImage(queues[0], d_dst_2d, CL_TRUE, origin, region, 0,
    			0, h_dst, 0, NULL, &event);
    	check(&err);
     
    	//Enqueue kernel execution command in command queue
    	int remainder1 = dim[1] % workgroupsize;
    	int remainder2 = dim[0] % workgroupsize;
     
    	size_t global_size[] = { dim[1] + workgroup - remainder1, dim[0]
    			+ workgroup - remainder2 };
    	err = clEnqueueNDRangeKernel(queues[0], gauss_kernel, 2, NULL, global_size,
    			NULL, NULL, NULL, &event);
    	check(&err);
     
    	//Wait for execution of the gauss algorithm
    	err = clWaitForEvents(1, &event);
    	check(&err);
     
    	//Download result from device memory
    	err = clEnqueueReadImage(queues[0], d_dst_2d, CL_TRUE, origin, region, 0,
    			0, h_dst, NULL, NULL, NULL);
    	check(&err);
     
    	//Measure elapsed time
    	cl_ulong end;
    	clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong),
    			&end, NULL);
    	clFinish(queues[0]);
    	float elapsed = (end - start) / MILLION;
    	printf("Elapsed time: \t%f ms\n", elapsed);
     
    	//Write result image to disk
     
    	save_1dimage(h_dst, &img, "pictures/result.pgm");
     
    	clReleaseMemObject(d_dst_2d);
    	clReleaseMemObject(d_src_2d);
    	free(h_src);
    	free(h_dst);
    	free(weighter);
    	destroyImageMatrix(&img);
     
    	return 0;
    }
     
    void copyimg_to_linear(float *src, float *dst, struct imageMatrix* img) {
    	do2Dto1D(img, src);
    	do2Dto1D(img, dst);
    }
     
    void save_1dimage(float *src, struct imageMatrix* img, char* filename) {
    	do1Dto2D(src, img);
    	write_pgm_image(img, filename);
    }
     
    /*Transform a 2D array in a 1D Array*/
    void do2Dto1D(struct imageMatrix* source, float* destination) {
    	int i, j;
    	for (i = 0; i < source->i_rows; i++) {
    		for (j = 0; j < source->i_cols; j++) {
    			destination[j + (i * source->i_cols)] = source->imageMatrix[i][j];
    		}
    	}
    }
     
    /**
     *Transform a 1D array in a 2D Array
     */
    void do1Dto2D(float* source, struct imageMatrix* destination) {
    	int i, j;
    	for (i = 0; i < destination->i_rows; i++) {
    		for (j = 0; j < destination->i_cols; j++) {
    			destination->imageMatrix[i][j] = source[j + (i
    					* destination->i_cols)];
    		}
    	}
    }
     
    /**
     *
     */
    void init_weight_array(size_t size, float* weighter) {
    	int offset = size / 2;
    	int fwhm = 5;
    	/*
    	 * Given as parameter
    	 * FWHM = 2 sqrt(2 ln2) sigma ~ 2.35 sigma
    	 */
    	float a = (fwhm / 2.354);
    	int i;
     
    	/* set up kernel to weight the pixels */
    	/* (KERNEL_SIZE - offset -1) is the CORRECT version */
    	for (i = -offset; i <= (size - offset - 1); i++) {
    		weighter[i + offset] = exp(-i * i / (2 * a * a));
    	}
    }
     
    void check(cl_int *err) {
    	switch (*err) {
    	case CL_SUCCESS:
    		return;
    		break;
    	case CL_DEVICE_NOT_FOUND:
    		printf("Device not found.");
    		break;
    	case CL_DEVICE_NOT_AVAILABLE:
    		printf("Device not available");
    		break;
    	case CL_COMPILER_NOT_AVAILABLE:
    		printf("Compiler not available");
    		break;
    	case CL_MEM_OBJECT_ALLOCATION_FAILURE:
    		printf("Memory object allocation failure");
    		break;
    	case CL_OUT_OF_RESOURCES:
    		printf("Out of resources");
    		break;
    	case CL_OUT_OF_HOST_MEMORY:
    		printf("Out of host memory");
    		break;
    	case CL_PROFILING_INFO_NOT_AVAILABLE:
    		printf("Profiling information not available");
    		break;
    	case CL_MEM_COPY_OVERLAP:
    		printf("Memory copy overlap");
    		break;
    	case CL_IMAGE_FORMAT_MISMATCH:
    		printf("Image format mismatch");
    		break;
    	case CL_IMAGE_FORMAT_NOT_SUPPORTED:
    		printf("Image format not supported");
    		break;
    	case CL_BUILD_PROGRAM_FAILURE:
    		printf("Program build failure");
    		break;
    	case CL_MAP_FAILURE:
    		printf("Map failure");
    		break;
    	case CL_INVALID_VALUE:
    		printf("Invalid value");
    		break;
    	case CL_INVALID_DEVICE_TYPE:
    		printf("Invalid device type");
    		break;
    	case CL_INVALID_PLATFORM:
    		printf("Invalid platform");
    		break;
    	case CL_INVALID_DEVICE:
    		printf("Invalid device");
    		break;
    	case CL_INVALID_CONTEXT:
    		printf("Invalid context");
    		break;
    	case CL_INVALID_QUEUE_PROPERTIES:
    		printf("Invalid queue properties");
    		break;
    	case CL_INVALID_COMMAND_QUEUE:
    		printf("Invalid command queue");
    		break;
    	case CL_INVALID_HOST_PTR:
    		printf("Invalid host pointer");
    		break;
    	case CL_INVALID_MEM_OBJECT:
    		printf("Invalid memory object");
    		break;
    	case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
    		printf("Invalid image format descriptor");
    		break;
    	case CL_INVALID_IMAGE_SIZE:
    		printf("Invalid image size");
    		break;
    	case CL_INVALID_SAMPLER:
    		printf("Invalid sampler");
    		break;
    	case CL_INVALID_BINARY:
    		printf("Invalid binary");
    		break;
    	case CL_INVALID_BUILD_OPTIONS:
    		printf("Invalid build options");
    		break;
    	case CL_INVALID_PROGRAM:
    		printf("Invalid program");
    		break;
    	case CL_INVALID_PROGRAM_EXECUTABLE:
    		printf("Invalid program executable");
    		break;
    	case CL_INVALID_KERNEL_NAME:
    		printf("Invalid kernel name");
    		break;
    	case CL_INVALID_KERNEL_DEFINITION:
    		printf("Invalid kernel definition");
    		break;
    	case CL_INVALID_KERNEL:
    		printf("Invalid kernel");
    		break;
    	case CL_INVALID_ARG_INDEX:
    		printf("Invalid argument index");
    		break;
    	case CL_INVALID_ARG_VALUE:
    		printf("Invalid argument value");
    		break;
    	case CL_INVALID_ARG_SIZE:
    		printf("Invalid argument size");
    		break;
    	case CL_INVALID_KERNEL_ARGS:
    		printf("Invalid kernel arguments");
    		break;
    	case CL_INVALID_WORK_DIMENSION:
    		printf("Invalid work dimension");
    		break;
    	case CL_INVALID_WORK_GROUP_SIZE:
    		printf("Invalid work group size");
    		break;
    	case CL_INVALID_WORK_ITEM_SIZE:
    		printf("Invalid work item size");
    		break;
    	case CL_INVALID_GLOBAL_OFFSET:
    		printf("Invalid global offset");
    		break;
    	case CL_INVALID_EVENT_WAIT_LIST:
    		printf("Invalid event wait list");
    		break;
    	case CL_INVALID_EVENT:
    		printf("Invalid event");
    		break;
    	case CL_INVALID_OPERATION:
    		printf("Invalid operation");
    		break;
    	case CL_INVALID_GL_OBJECT:
    		printf("Invalid OpenGL object");
    		break;
    	case CL_INVALID_BUFFER_SIZE:
    		printf("Invalid buffer size");
    		break;
    	case CL_INVALID_MIP_LEVEL:
    		printf("Invalid mip-map level");
    		break;
    	default:
    		printf("Unknown");
    		break;
    	}
    	printf("\n");
    }

  2. #2
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: One Code - Three different output images

    Code :
    int channel_size = (int) sizeof(CL_UNSIGNED_INT8);

    This is not doing what you think. CL_UNSIGNED_INT8 is simply an arbitrary number. It's a code to represent that you want 8 bits per channel.

    If the channel format is CL_UNSIGNED_INT8 then the channel size is 1 (byte). If you want to use sizeof, then do this:

    Code :
    int channel_size = sizeof(cl_uchar);

    There are other things that look strange:

    Code :
    int remainder1 = dim[1] % workgroupsize;
       int remainder2 = dim[0] % workgroupsize;
     
       size_t global_size[] = { dim[1] + workgroup - remainder1, dim[0]
             + workgroup - remainder2 };

    First, the value stored in workgroupsize is the maximum work-group size supported by the device, not the work-group size that will be used to execute this particular kernel.

    Second, you don't need to know what work-group size will be used to execute this kernel. It's not useful information for you in this case.

    Third, do not try to tweak the global size based on what the work-group size. Do not remove the remainder (why would you do that?). This is what you should do:

    Code :
    size_t global_size[2] = { dim[1], dim[0]};

    This above could be the reason you are seeing different outputs at different times.

    This code is also not needed:
    Code :
    //Wait for execution of the gauss algorithm
       err = clWaitForEvents(1, &event);
       check(&err);

    You don't need to wait for the kernel to finish because you are using an in-order command queue. That means that the queue will make sure that a command does not start executing a command until all the previously enqueued commands are already finished.
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  3. #3
    Junior Member
    Join Date
    Nov 2010
    Posts
    14

    Re: One Code - Three different output images

    Hey david.garcia,

    thanks for your answer.

    I implemented the changes you suggested but still I get the three different output images.

    It sometimes appears to me, that my kernel isn't even executed (mostly when the source image is simply "copied" to the destination image). But I can't see a reason for that...

    Do I maybe have a problem with the global_id leaving the image bounds? I actually can't imagine, especially since I now changed the global_size to the one david.garcia suggested...

    I think I need some more hints...

Similar Threads

  1. Too many images?
    By matilda in forum OpenCL
    Replies: 2
    Last Post: 07-26-2011, 05:28 PM
  2. Replies: 1
    Last Post: 12-06-2010, 10:39 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •