I'm using OpenCL on Android and am processing video's.
I can process images and frame's from video's. When I use my inverse filter (no for loops in it), I can process all the frames without initializing the OpenCL objects again. But when I use a filter with a for loop, only the first frame will be processed and all the others will be black.

Example:
1)
init function
execute kernel with for loops
remove opencl function
=> all goes fine

2)
init function
execute kernel without for loops
execute kernel without for loops on next frame
remove opencl function
=> all goes fine

3)
init function
execute kernel with for loops
execute kernel with for loops on next frame
remove opencl function
=> first frame is processed, 2th frame is black


I'm using OpenCL1.1 and am looking for a way to solve this issue. As far as my knowledge goes, it must be something with memory allocation?

Init code:
Code :
struct OpenCLObjects
{
	cl_platform_id platform;
	cl_device_id device;
	cl_context context;
	cl_command_queue queue;
	cl_program program;
	cl_kernel kernel;
	bool isInputBufferInitialized;
	cl_mem inputBuffer;
	cl_mem outputBuffer;
};
 
static OpenCLObjects openCLObjects;
 
void initOpenCL
(
		JNIEnv* env,
		jobject thisObject,
		jstring kernelName,
		cl_device_type required_device_type,
		OpenCLObjects& openCLObjects
)
{
 
	using namespace std;
 
 
	openCLObjects.isInputBufferInitialized = false;
 
	cl_int err = CL_SUCCESS;
 
	/* 
	 * Step 1: Get the first platform
	 */
	cl_platform_id platform;
	err = clGetPlatformIDs(1, &platform, NULL);
	SAMPLE_CHECK_ERRORS(err);
 
	cl_uint i = 0;
	size_t platform_name_length = 0;
	err = clGetPlatformInfo(
			platform,
			CL_PLATFORM_NAME,
			0,
			0,
			&platform_name_length
	);
	SAMPLE_CHECK_ERRORS(err);
 
	openCLObjects.platform = platform;
	/* 
	 * Step 2: Create context with a device of the specified type (required_device_type).
	 */
 
	cl_context_properties context_props[] = {
			CL_CONTEXT_PLATFORM,
			cl_context_properties(openCLObjects.platform),
			0
	};
 
	openCLObjects.context =
			clCreateContextFromType
			(
					context_props,
					required_device_type,
					0,
					0,
					&err
			);
	SAMPLE_CHECK_ERRORS(err);
	/* 
	 * Step 3: Query for OpenCL device that was used for context creation.
	 */
	err = clGetContextInfo
			(
					openCLObjects.context,
					CL_CONTEXT_DEVICES,
					sizeof(openCLObjects.device),
					&openCLObjects.device,
					0
			);
	SAMPLE_CHECK_ERRORS(err);
 
	/*  
	 * Step 4: Create OpenCL program from its source code.
	 * The file name is passed by java.
	 * Convert the jstring to const char* and append the needed directory path.
	 */
	const char* fileName = env->GetStringUTFChars(kernelName, 0);
	std::string fileDir;
	fileDir.append("/data/data/com.denayer.ovsr/app_execdir/");
	fileDir.append(fileName);
	fileDir.append(".cl");
	std::string kernelSource = loadProgram(fileDir);
	const char* kernelSourceChar = kernelSource.c_str();
 
	openCLObjects.program =
			clCreateProgramWithSource
			(
					openCLObjects.context,
					1,
					&kernelSourceChar,
					0,
					&err
			);
 
	SAMPLE_CHECK_ERRORS(err);
 
	/*
	 * Build the program with defined BUILDOPT (build optimizations).
	 */
	err = clBuildProgram(openCLObjects.program, 0, 0, BUILDOPT, 0, 0);
	jstring JavaString = (*env).NewStringUTF("Code compiled succesful.");
	if(err == CL_BUILD_PROGRAM_FAILURE)
	{
		size_t log_length = 0;
		err = clGetProgramBuildInfo(
				openCLObjects.program,
				openCLObjects.device,
				CL_PROGRAM_BUILD_LOG,
				0,
				0,
				&log_length
		);
		SAMPLE_CHECK_ERRORS(err);
 
		vector<char> log(log_length);
 
		err = clGetProgramBuildInfo(
				openCLObjects.program,
				openCLObjects.device,
				CL_PROGRAM_BUILD_LOG,
				log_length,
				&log[0],
				0
		);
		SAMPLE_CHECK_ERRORS(err);
 
		LOGE
		(
				"Error happened during the build of OpenCL program.\nBuild log: %s",
				&log[0]
		);
		return;
	}
 
	/* 
	 * Step 6: Extract kernel from the built program.
	 */
	fileName = env->GetStringUTFChars(kernelName, 0);
	char result[100];   // array to hold the result.
	std::strcpy(result,fileName); // copy string one into the result.
	std::strcat(result,"Kernel"); // append string two to the result.
	openCLObjects.kernel = clCreateKernel(openCLObjects.program, result, &err);
	SAMPLE_CHECK_ERRORS(err);
 
	/* 
	 * Step 7: Create command queue.
	 */
 
	openCLObjects.queue =
			clCreateCommandQueue
			(
					openCLObjects.context,
					openCLObjects.device,
					0,     
					&err
			);
	SAMPLE_CHECK_ERRORS(err);
 
}

Execution code:
Code :
void nativeImage2DOpenCL
(
		JNIEnv* env,
		jobject thisObject,
		OpenCLObjects& openCLObjects,
		jobject inputBitmap,
		jobject outputBitmap
)
{
	using namespace std;
 
	timeval start;
	timeval end;
 
	gettimeofday(&start, NULL);
 
	AndroidBitmapInfo bitmapInfo;
	AndroidBitmap_getInfo(env, inputBitmap, &bitmapInfo);
 
	size_t bufferSize = bitmapInfo.height * bitmapInfo.stride;
	cl_uint rowPitch = bitmapInfo.stride / 4;
 
	cl_int err = CL_SUCCESS;
 
	void* inputPixels = 0;
	AndroidBitmap_lockPixels(env, inputBitmap, &inputPixels);
 
	cl_image_format image_format;
	image_format.image_channel_data_type=CL_UNORM_INT8;
	image_format.image_channel_order=CL_RGBA;
 
	openCLObjects.inputBuffer =
			clCreateImage2D(openCLObjects.context,
					CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
					&image_format,
					bitmapInfo.width,
					bitmapInfo.height,
					0,
					inputPixels,
					&err);
	SAMPLE_CHECK_ERRORS(err);
 
	openCLObjects.isInputBufferInitialized = true;
 
	AndroidBitmap_unlockPixels(env, inputBitmap);
 
	void* outputPixels = 0;
	AndroidBitmap_lockPixels(env, outputBitmap, &outputPixels);
 
	cl_mem outputBuffer =
			clCreateImage2D(openCLObjects.context,
					CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
					&image_format,
					bitmapInfo.width,
					bitmapInfo.height,
					0,
					outputPixels,
					&err);
	SAMPLE_CHECK_ERRORS(err);
	err = clSetKernelArg(openCLObjects.kernel, 0, sizeof(openCLObjects.inputBuffer), &openCLObjects.inputBuffer);
	SAMPLE_CHECK_ERRORS(err);
	err = clSetKernelArg(openCLObjects.kernel, 1, sizeof(outputBuffer), &outputBuffer);
	SAMPLE_CHECK_ERRORS(err);
 
	size_t globalSize[2] = { bitmapInfo.width, bitmapInfo.height };
 
	err = clEnqueueNDRangeKernel
			(
					openCLObjects.queue,
					openCLObjects.kernel,
					2,
					0,
					globalSize,
					0,
					0, 0, 0
			);
	SAMPLE_CHECK_ERRORS(err);
 
	err = clFinish(openCLObjects.queue);
	SAMPLE_CHECK_ERRORS(err);
 
    const size_t origin[3] = {0, 0, 0};
    const size_t region[3] = {bitmapInfo.width, bitmapInfo.height, 1};
 
	err = clEnqueueReadImage(
			openCLObjects.queue,
			outputBuffer,
			true,
			origin,
			region,
			0,
			0,
			outputPixels,
			0,
			0,
			0);
	SAMPLE_CHECK_ERRORS(err);
 
 
	// Call clFinish to guarantee that the output region is updated.
	err = clFinish(openCLObjects.queue);
	SAMPLE_CHECK_ERRORS(err);
 
	err = clReleaseMemObject(outputBuffer);
	SAMPLE_CHECK_ERRORS(err);
 
	// Make the output content be visible at the Java side by unlocking
	// pixels in the output bitmap object.
	AndroidBitmap_unlockPixels(env, outputBitmap);
 
}