Results 1 to 3 of 3

Thread: OpenCL memory allocation problem

Hybrid View

  1. #1

    OpenCL memory allocation problem

    I'm using OpenCL on Android and am processing video's.
    I can process images and frame's from video's. When I use my inverse filter (no for loops in it), I can process all the frames without initializing the OpenCL objects again. But when I use a filter with a for loop, only the first frame will be processed and all the others will be black.

    Example:
    1)
    init function
    execute kernel with for loops
    remove opencl function
    => all goes fine

    2)
    init function
    execute kernel without for loops
    execute kernel without for loops on next frame
    remove opencl function
    => all goes fine

    3)
    init function
    execute kernel with for loops
    execute kernel with for loops on next frame
    remove opencl function
    => first frame is processed, 2th frame is black


    I'm using OpenCL1.1 and am looking for a way to solve this issue. As far as my knowledge goes, it must be something with memory allocation?

    Init code:
    Code :
    struct OpenCLObjects
    {
    	cl_platform_id platform;
    	cl_device_id device;
    	cl_context context;
    	cl_command_queue queue;
    	cl_program program;
    	cl_kernel kernel;
    	bool isInputBufferInitialized;
    	cl_mem inputBuffer;
    	cl_mem outputBuffer;
    };
     
    static OpenCLObjects openCLObjects;
     
    void initOpenCL
    (
    		JNIEnv* env,
    		jobject thisObject,
    		jstring kernelName,
    		cl_device_type required_device_type,
    		OpenCLObjects& openCLObjects
    )
    {
     
    	using namespace std;
     
     
    	openCLObjects.isInputBufferInitialized = false;
     
    	cl_int err = CL_SUCCESS;
     
    	/* 
    	 * Step 1: Get the first platform
    	 */
    	cl_platform_id platform;
    	err = clGetPlatformIDs(1, &platform, NULL);
    	SAMPLE_CHECK_ERRORS(err);
     
    	cl_uint i = 0;
    	size_t platform_name_length = 0;
    	err = clGetPlatformInfo(
    			platform,
    			CL_PLATFORM_NAME,
    			0,
    			0,
    			&platform_name_length
    	);
    	SAMPLE_CHECK_ERRORS(err);
     
    	openCLObjects.platform = platform;
    	/* 
    	 * Step 2: Create context with a device of the specified type (required_device_type).
    	 */
     
    	cl_context_properties context_props[] = {
    			CL_CONTEXT_PLATFORM,
    			cl_context_properties(openCLObjects.platform),
    			0
    	};
     
    	openCLObjects.context =
    			clCreateContextFromType
    			(
    					context_props,
    					required_device_type,
    					0,
    					0,
    					&err
    			);
    	SAMPLE_CHECK_ERRORS(err);
    	/* 
    	 * Step 3: Query for OpenCL device that was used for context creation.
    	 */
    	err = clGetContextInfo
    			(
    					openCLObjects.context,
    					CL_CONTEXT_DEVICES,
    					sizeof(openCLObjects.device),
    					&openCLObjects.device,
    					0
    			);
    	SAMPLE_CHECK_ERRORS(err);
     
    	/*  
    	 * Step 4: Create OpenCL program from its source code.
    	 * The file name is passed by java.
    	 * Convert the jstring to const char* and append the needed directory path.
    	 */
    	const char* fileName = env->GetStringUTFChars(kernelName, 0);
    	std::string fileDir;
    	fileDir.append("/data/data/com.denayer.ovsr/app_execdir/");
    	fileDir.append(fileName);
    	fileDir.append(".cl");
    	std::string kernelSource = loadProgram(fileDir);
    	const char* kernelSourceChar = kernelSource.c_str();
     
    	openCLObjects.program =
    			clCreateProgramWithSource
    			(
    					openCLObjects.context,
    					1,
    					&kernelSourceChar,
    					0,
    					&err
    			);
     
    	SAMPLE_CHECK_ERRORS(err);
     
    	/*
    	 * Build the program with defined BUILDOPT (build optimizations).
    	 */
    	err = clBuildProgram(openCLObjects.program, 0, 0, BUILDOPT, 0, 0);
    	jstring JavaString = (*env).NewStringUTF("Code compiled succesful.");
    	if(err == CL_BUILD_PROGRAM_FAILURE)
    	{
    		size_t log_length = 0;
    		err = clGetProgramBuildInfo(
    				openCLObjects.program,
    				openCLObjects.device,
    				CL_PROGRAM_BUILD_LOG,
    				0,
    				0,
    				&log_length
    		);
    		SAMPLE_CHECK_ERRORS(err);
     
    		vector<char> log(log_length);
     
    		err = clGetProgramBuildInfo(
    				openCLObjects.program,
    				openCLObjects.device,
    				CL_PROGRAM_BUILD_LOG,
    				log_length,
    				&log[0],
    				0
    		);
    		SAMPLE_CHECK_ERRORS(err);
     
    		LOGE
    		(
    				"Error happened during the build of OpenCL program.\nBuild log: %s",
    				&log[0]
    		);
    		return;
    	}
     
    	/* 
    	 * Step 6: Extract kernel from the built program.
    	 */
    	fileName = env->GetStringUTFChars(kernelName, 0);
    	char result[100];   // array to hold the result.
    	std::strcpy(result,fileName); // copy string one into the result.
    	std::strcat(result,"Kernel"); // append string two to the result.
    	openCLObjects.kernel = clCreateKernel(openCLObjects.program, result, &err);
    	SAMPLE_CHECK_ERRORS(err);
     
    	/* 
    	 * Step 7: Create command queue.
    	 */
     
    	openCLObjects.queue =
    			clCreateCommandQueue
    			(
    					openCLObjects.context,
    					openCLObjects.device,
    					0,     
    					&err
    			);
    	SAMPLE_CHECK_ERRORS(err);
     
    }

    Execution code:
    Code :
    void nativeImage2DOpenCL
    (
    		JNIEnv* env,
    		jobject thisObject,
    		OpenCLObjects& openCLObjects,
    		jobject inputBitmap,
    		jobject outputBitmap
    )
    {
    	using namespace std;
     
    	timeval start;
    	timeval end;
     
    	gettimeofday(&start, NULL);
     
    	AndroidBitmapInfo bitmapInfo;
    	AndroidBitmap_getInfo(env, inputBitmap, &bitmapInfo);
     
    	size_t bufferSize = bitmapInfo.height * bitmapInfo.stride;
    	cl_uint rowPitch = bitmapInfo.stride / 4;
     
    	cl_int err = CL_SUCCESS;
     
    	void* inputPixels = 0;
    	AndroidBitmap_lockPixels(env, inputBitmap, &inputPixels);
     
    	cl_image_format image_format;
    	image_format.image_channel_data_type=CL_UNORM_INT8;
    	image_format.image_channel_order=CL_RGBA;
     
    	openCLObjects.inputBuffer =
    			clCreateImage2D(openCLObjects.context,
    					CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
    					&image_format,
    					bitmapInfo.width,
    					bitmapInfo.height,
    					0,
    					inputPixels,
    					&err);
    	SAMPLE_CHECK_ERRORS(err);
     
    	openCLObjects.isInputBufferInitialized = true;
     
    	AndroidBitmap_unlockPixels(env, inputBitmap);
     
    	void* outputPixels = 0;
    	AndroidBitmap_lockPixels(env, outputBitmap, &outputPixels);
     
    	cl_mem outputBuffer =
    			clCreateImage2D(openCLObjects.context,
    					CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
    					&image_format,
    					bitmapInfo.width,
    					bitmapInfo.height,
    					0,
    					outputPixels,
    					&err);
    	SAMPLE_CHECK_ERRORS(err);
    	err = clSetKernelArg(openCLObjects.kernel, 0, sizeof(openCLObjects.inputBuffer), &openCLObjects.inputBuffer);
    	SAMPLE_CHECK_ERRORS(err);
    	err = clSetKernelArg(openCLObjects.kernel, 1, sizeof(outputBuffer), &outputBuffer);
    	SAMPLE_CHECK_ERRORS(err);
     
    	size_t globalSize[2] = { bitmapInfo.width, bitmapInfo.height };
     
    	err = clEnqueueNDRangeKernel
    			(
    					openCLObjects.queue,
    					openCLObjects.kernel,
    					2,
    					0,
    					globalSize,
    					0,
    					0, 0, 0
    			);
    	SAMPLE_CHECK_ERRORS(err);
     
    	err = clFinish(openCLObjects.queue);
    	SAMPLE_CHECK_ERRORS(err);
     
        const size_t origin[3] = {0, 0, 0};
        const size_t region[3] = {bitmapInfo.width, bitmapInfo.height, 1};
     
    	err = clEnqueueReadImage(
    			openCLObjects.queue,
    			outputBuffer,
    			true,
    			origin,
    			region,
    			0,
    			0,
    			outputPixels,
    			0,
    			0,
    			0);
    	SAMPLE_CHECK_ERRORS(err);
     
     
    	// Call clFinish to guarantee that the output region is updated.
    	err = clFinish(openCLObjects.queue);
    	SAMPLE_CHECK_ERRORS(err);
     
    	err = clReleaseMemObject(outputBuffer);
    	SAMPLE_CHECK_ERRORS(err);
     
    	// Make the output content be visible at the Java side by unlocking
    	// pixels in the output bitmap object.
    	AndroidBitmap_unlockPixels(env, outputBitmap);
     
    }

  2. #2
    Kernel code inverse:

    Code :
    _kernel void inverseKernel(__read_only  image2d_t  srcImage,
                              __write_only image2d_t  dstImage)
    { 
        const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
                                   CLK_ADDRESS_REPEAT        |
                                   CLK_FILTER_NEAREST;
         int x = get_global_id(0);
         int y = get_global_id(1);
         int2 coords = (int2) (x,y);
     
        float4 centerPixel = read_imagef(srcImage,sampler,coords);
        centerPixel.x = 1-centerPixel.x;
        centerPixel.y = 1-centerPixel.y;
        centerPixel.z = 1-centerPixel.z;
        write_imagef(dstImage,coords,centerPixel);	
    }

    Edge kernel (with for loop):
    Code :
    __kernel void edgeKernel(__read_only  image2d_t  srcImage,
                              __write_only image2d_t  dstImage)
    {    
        const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
                                   CLK_ADDRESS_REPEAT         |
                                   CLK_FILTER_NEAREST;
        int x = get_global_id(0);
        int y = get_global_id(1);
    	int2 coords = (int2) (x,y);
     
    	int i = 0;
    	int j = 0;
    	float4 bufferPixel;
    	float4 currentPixel;
    	float sum = 0;
    	int counter = 0;
    	const float edgeKernel[9] = {0.0f,1.0f,0.0f,1.0f,-4.0f,1.0f,0.0f,1.0f,0.0f};
    	currentPixel = read_imagef(srcImage,sampler,coords);
    	for(i=-1;i<=1;i++)
    	{
    		for(j=-1;j<=1;j++)
    		{
    		coords = (int2)((x+i),(y+j));
    	    bufferPixel = read_imagef(srcImage,sampler,coords);
    	    //sum = sum + (bufferPixel.y * edgeKernel[counter]);
    	    sum = mad(bufferPixel.y,edgeKernel[counter],sum);
    	    counter++;
    		}
    	}
    	if(sum>255) sum=255;
    	if(sum<0) sum=0;
     
    	currentPixel.x=sum;
    	currentPixel.y=sum;
    	currentPixel.z=sum;
     
    	write_imagef(dstImage,coords,currentPixel);	                          
     
    }

    All code can be found here:
    https://github.com/degoossez/OVSR
    OpenCL code is in the JNI folder, the kernels are in the assets folder

  3. #3
    Senior Member
    Join Date
    Oct 2012
    Posts
    115
    You are using unnormalized integer coordinates with read_imagef(), so your sampler should be

    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
    CLK_ADDRESS_CLAMP_TO_EDGE |
    CLK_FILTER_NEAREST;

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •