Hello to everyone,
I have a problem with this piece of code:
Code :
int f_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
						const unsigned char *in_arg, unsigned int nbytes) {
	int result = d_true, error;
	size_t global_work_size[1] = {nbytes}, local_work_size[1] = {16};
	void *pointer;
	memcpy(out_arg, in_arg, nbytes);
	v_cl_out_buffer = clCreateBuffer(v_cl_context, (CL_MEM_READ_WRITE|
														CL_MEM_USE_HOST_PTR),
									 nbytes, out_arg, &error);
	if ((error = clSetKernelArg(v_cl_kernel, 1, sizeof(v_cl_out_buffer),
								(void *)&v_cl_out_buffer)) == CL_SUCCESS) {
		if (v_space_initialized == d_false) {
			v_cl_key_buffer = clCreateBuffer(v_cl_context, CL_MEM_READ_ONLY,
											 const_b, NULL, &error);
			clEnqueueWriteBuffer(v_cl_command_queue, v_cl_key_buffer,
								 CL_TRUE, 0, const_b, expanded_key, 0,
								 NULL, NULL);
			if (((error = clSetKernelArg(v_cl_kernel, 0,
										 sizeof(v_cl_key_buffer),
										 (void *)&v_cl_key_buffer))
				 != CL_SUCCESS) ||
				((error = clSetKernelArg(v_cl_kernel, 2,
									 sizeof(int),
									 (void *)&const_a))
				 != CL_SUCCESS)) {
				d_debug("can't hook parameters to the kernel");
				result = d_false;
			}
			v_space_initialized = d_true;
		}
	}
	if (error == CL_SUCCESS) {
		if ((error = clEnqueueNDRangeKernel(v_cl_command_queue,
											v_cl_kernel, 1, NULL,
											global_work_size,
											local_work_size,
											0, NULL, NULL))
			 == CL_SUCCESS) {
			pointer = clEnqueueMapBuffer(v_cl_command_queue,
										 v_cl_out_buffer, CL_TRUE,
										 CL_MAP_READ, nbytes, 0, 0, NULL,
										 NULL, NULL);
			memcpy(out_arg, pointer, nbytes);
			clEnqueueUnmapMemObject(v_cl_command_queue, v_cl_out_buffer,
									pointer, 0, NULL, NULL);
		}
		clReleaseMemObject(v_cl_out_buffer);
	}
	return result;
}
This piece of code is called many times (in my personal test case only two times) with a different nbytes value (but nbytes%16 is always 0), a different pointer to an in_arg string (initial_in_arg+last_nbytes foreach step) and a different pointer to an already allocated out_arg (out_arg has the same dimension of the in_arg parameter and work in the same way).
expanded_key and const_a are global variables and they don't change their content.
My kernel is very simple (it's just for test):
Code :
__kernel void example(__global uint *rkey, __global uint *odata, uint steps) {
	int executions;
	unsigned int *pointer[4], box[4];
	unsigned char *character;
	character = (unsigned char *)odata[0];
	character[0] = 'T';
	character[1] = 'H';
	character[2] = 'I';
	character[3] = 'S';
	character = (unsigned char *)odata[1];
	character[0] = ' ';
	character[1] = 'I';
	character[2] = 'S';
	character[3] = ' ';
	character = (unsigned char *)odata[2];
	character[0] = 'A';
	character[1] = ' ';
	character[2] = 'T';
	character[3] = 'E';
	character = (unsigned char *)odata[3];
	character[0] = 'S';
	character[1] = 'T';
	character[2] = '!';
	character[3] = '!';
}
Now, when I execute this piece of code on GPU everything freeze (the entire system, Mountain Lion) and I've to force the shutdown pressing the power button for 4/5 seconds.
No logs are written in this time and I've got no relevant informations from my system. So, my question is: what's wrong with this? I can't test my kernel or my kernel's loader code if everything freeze!
Thank you so much for your patience!