Results 1 to 8 of 8

Thread: How to properly create Read and Write Buffers:

  1. #1

    How to properly create Read and Write Buffers:

    i am currently doing a straight implementation of openCL in c++, meaning that I am not using the SDKs. I am experiencing a problem with the writebuffer and readbuffer that I cannot debug, and it has something to do with the amount of data that I am sending to the buffers.

    Here is my code:
    Code :
    err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(data)*dataSize, data, 0, NULL, NULL);
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL);
    err = clEnqueueCopyBuffer(queue, output, input, 0, 0, sizeof(data)*dataSize, 0, NULL, NULL);
    err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(data)*dataSize, dataOut, 0, NULL, NULL );
    As dataSize gets bigger (up into 3000), this code fails and I cannot get into NVCuda.dll to figure out why. Does anyone have any suggestions as to how to fix this code or as to how i can go about doing algorithms on up to a million data sets?

  2. #2
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: How to properly create Read and Write Buffers:

    As dataSize gets bigger (up into 3000), this code fails
    How does it fail? Do you get an error code? Wrong results? A crash? What does the kernel source code look like?

    Code :
    err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(data)*dataSize, data, 0, NULL, NULL);

    This line looks odd. Are you sure you meant to write "sizeof(data)"? Could you show us the declaration of variables "data" and "dataSize"?

    Also, can you show us how you create the buffers "input" and "output", and any calls to clSetKernelArg()?
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  3. #3

    Re: How to properly create Read and Write Buffers:

    The error that I get is Access Violation reading location blahblahblah and when I trace the callstack it sends me to nvcuda.dll.

    I am thinking that I have gone wrong somewhere in my buffers, but maybe it is somewhere else.

    Here is my code: Full source

    Code :
    #include <fcntl.h>
    #include <stdio.h>
    #include <iostream>
    #include <stdlib.h>
    #include <string.h>
    #include <math.h>
    #include <limits.h>
    #include <time.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <CL/opencl.h>
     
     
    const char *kernel_source =
    "__kernel void simple(                                                   \n"
    "   global read_only int* input,                                         \n"
    "   global write_only int* output)									     \n"
    "{                                                                       \n"
    "   int index = get_global_id(0);                                        \n"
    "   output[index] = index;						         			     \n"
    "}                                                                       \n";
     
     
    // Storage for the arrays.
    static cl_mem input;
    static cl_mem output;
    // OpenCL state
    static cl_command_queue queue;
    static cl_kernel kernel;
    static cl_device_id device_ids;
    static cl_context context;
     
    static cl_platform_id platform_id;
    static const size_t dataSize = 3500;
     
    int main(int argc, char** argv)
    {
     
    	int* data = new int[dataSize];
    	int* dataOut = new int[dataSize];
     
    	for(int i = dataSize-1; i >= 0; --i)
    	{
    		data[i] = 0;
    		dataOut[i] = 0;
    	}
     
    	cl_uint numPlatforms;
    	cl_int err = CL_SUCCESS;
    	size_t device_list_size;
     
    	//set up the platforms
    	err = clGetPlatformIDs(0, NULL, &numPlatforms);
    	if(numPlatforms > 0)
    	{
    		//we have at least one
    		cl_platform_id* platforms = new cl_platform_id[numPlatforms];
    		err = clGetPlatformIDs(numPlatforms, platforms, NULL);
    		platform_id = platforms[0];
    		delete[] platforms;
    	}
     
    	cl_context_properties cps[3] = 
    	{
    		CL_CONTEXT_PLATFORM, 
    		(cl_context_properties)platform_id, 
    		0
        };
     
    	context = clCreateContextFromType(
                      cps,
                      CL_DEVICE_TYPE_ALL,
                      NULL,
                      NULL,
                      &err);
     
    	data[4] = 4;
    	dataOut[4] = 5;
    	err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_ids, NULL);
     
    	queue = clCreateCommandQueue(context, device_ids, 0, &err);
     
    	//now we set up our buffers
    	input = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,  sizeof(data)*dataSize, &data, &err);
    	output = clCreateBuffer(context,  CL_MEM_WRITE_ONLY,  sizeof(data)*dataSize, NULL, &err);
     
    	cl_program program = clCreateProgramWithSource(context, 1, &kernel_source , NULL, &err);
    	//OPTIMIZATION OPTIONS FOUND AT [url]http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clBuildProgram.html[/url]
    	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); // build and compile the OpenCL
     
    	cl_kernel kernel = clCreateKernel(program, "simple", &err);
     
    	clReleaseProgram(program); // no longer needed
     
    	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
     
    	err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(data)*dataSize, data, 0, NULL, NULL);
     
    	err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL);
     
     
    	err = clEnqueueCopyBuffer(queue, output, input, 0, 0, sizeof(data)*dataSize, 0, NULL, NULL);
     
    	err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(data)*dataSize, dataOut, 0, NULL, NULL );
     
    	for(int i = dataSize - 1; i >= 0; --i)
    	{
    		std::cout << dataOut[i] << std::endl;
    	}
     
    	clReleaseMemObject(input);
        clReleaseMemObject(output);
        clReleaseKernel(kernel);
        clReleaseCommandQueue(queue);
        clReleaseContext(context);
     
        return 0;
    }

  4. #4
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: How to properly create Read and Write Buffers:

    There are some errors in the code:

    Code :
    const char *kernel_source =
    "__kernel void simple(                                                   \n"
    "   global read_only int* input,                                         \n"
    "   global write_only int* output)                                \n"
    "{                                                                       \n"
    "   int index = get_global_id(0);                                        \n"
    "   output[index] = index;                                         \n"
    "}                                                                       \n";

    read_only and write_only are qualifiers for images, not for pointers. This kernel should not compile successfully.

    Code :
    input = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,  sizeof(data)*dataSize, &data, &err);

    "sizeof(data)" means "sizeof(int*)". That's not what you want. What you want is "sizeof(cl_int)" since the type of the argument "input" in kernel "simple()" is "int". Notice that in the host side you should always use data types with the prefix "cl_", such as "cl_int" to ensure that they have the same bit representation as the kernel data types.

    Also, you are passing "&data" as the host pointer, when you actually want to pass "data". This is probably the reason your program is crashing.

    Finally, notice that since you have create this buffer with the flag CL_MEM_USE_HOST_PTR, the implementation will attempt to reuse the memory allocated in "data" instead of allocating new memory. What that means is that since the kernel will take the buffer "input" as an array of CL int values, the variable "data" should be declared as having type "cl_int*" instead. That ensures that the size of the buffer will match on the host and the device.

    Code :
    output = clCreateBuffer(context,  CL_MEM_WRITE_ONLY,  sizeof(data)*dataSize, NULL, &err);

    Same as before: it should be "sizeof(cl_int)".

    Code :
    err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, sizeof(data)*dataSize, data, 0, NULL, NULL);

    Please replace "sizeof(data)" with "sizeof(cl_int)".

    More importantly, this code is doing something very strange: it's copying the contents of "data" into the buffer "input". Why is this strange? Because "data" and "input" reference exactly the same memory. That's what the CL_MEM_USE_HOST_PTR flag means.

    Code :
    err = clEnqueueCopyBuffer(queue, output, input, 0, 0, sizeof(data)*dataSize, 0, NULL, NULL);

    I don't understand the purpose of this. The kernel that was just executed wrote some values into "output" and now you are overwriting them with the values that were stored in "input" (i.e. zeroes). Is this what you intended?

    Replace sizeof(data) with sizeof(cl_int).

    Code :
    err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(data)*dataSize, dataOut, 0, NULL, NULL );

    Same drill: replace sizeof.
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  5. #5

    Re: How to properly create Read and Write Buffers:

    OK so I fixed the code samples that you gave me. Thank you for your help btw. But there are still problems.
    I changed the kernel to use const instead of read_only and nothing for the write_only qualifier.
    I changed all of my sizeof(data) to be sizeof(cl_int) and got rid of the *datasize;

    I got rid of the copybuffer and the writebuffer lines, and kept the CL_MEM_USE_HOST_PTR. Now I am getting an error with the program? (-11, CL_BUILD_PROGRAM_FAILURE)

    Code :
    #include <fcntl.h>
    #include <stdio.h>
    #include <iostream>
    #include <stdlib.h>
    #include <string.h>
    #include <math.h>
    #include <limits.h>
    #include <time.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <CL/opencl.h>
     
     
    const char *kernel_source =
    "__kernel void simple(                                                   \n"
    "   global int* input,                                                   \n"
    "   global int* output)													 \n"
    "{                                                                       \n"
    "   int index = get_global_id(0);                                        \n"
    "   output[index] = index;						         			     \n"
    "}                                                                       \n";
     
     
    // Storage for the arrays.
    static cl_mem input;
    static cl_mem output;
    // OpenCL state
    static cl_command_queue queue;
    static cl_kernel kernel;
    static cl_device_id device_ids;
    static cl_context context;
     
    static cl_platform_id platform_id;
    static const size_t dataSize = 100;
     
    int main(int argc, char** argv)
    {
     
    	cl_int* data = new cl_int[dataSize];
    	cl_int* dataOut = new cl_int[dataSize];
     
    	for(int i = dataSize-1; i >= 0; --i)
    	{
    		data[i] = 0;
    		dataOut[i] = 0;
    	}
     
    	cl_uint numPlatforms;
    	cl_int err = CL_SUCCESS;
    	size_t device_list_size;
     
    	//set up the platforms
    	err = clGetPlatformIDs(0, NULL, &numPlatforms);
    	if(numPlatforms > 0)
    	{
    		//we have at least one
    		cl_platform_id* platforms = new cl_platform_id[numPlatforms];
    		err = clGetPlatformIDs(numPlatforms, platforms, NULL);
    		platform_id = platforms[0];
    		delete[] platforms;
    	}
     
    	cl_context_properties cps[3] = 
    	{
    		CL_CONTEXT_PLATFORM, 
    		(cl_context_properties)platform_id, 
    		0
        };
     
    	context = clCreateContextFromType(
                      cps,
                      CL_DEVICE_TYPE_ALL,
                      NULL,
                      NULL,
                      &err);
     
    	data[4] = 4;
    	dataOut[4] = 5;
    	err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_ids, NULL);
     
    	queue = clCreateCommandQueue(context, device_ids, 0, &err);
     
    	//now we set up our buffers
    	input = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,  sizeof(cl_int), data, &err);
    	output = clCreateBuffer(context,  CL_MEM_WRITE_ONLY,  sizeof(cl_int), NULL, &err);
     
    	cl_program program = clCreateProgramWithSource(context, 1, &kernel_source , NULL, &err);
    	//OPTIMIZATION OPTIONS FOUND AT [url]http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clBuildProgram.html[/url]
    	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); // build and compile the OpenCL
     
    	cl_kernel kernel = clCreateKernel(program, "simple", &err);
     
    	clReleaseProgram(program); // no longer needed
     
    	err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
     
    	err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL,  NULL);
     
    	err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(cl_int), dataOut, 0, NULL,  NULL );
     
    	for(int i = 0; i < 40; i++)
    	{
    		std::cout << dataOut[i] << std::endl;
    	}
     
    	clReleaseMemObject(input);
        clReleaseMemObject(output);
        clReleaseKernel(kernel);
        clReleaseCommandQueue(queue);
        clReleaseContext(context);
     
        return 0;
    }

  6. #6
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: How to properly create Read and Write Buffers:

    and got rid of the *datasize
    I certainly did not intend to suggest that. It is necessary.
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  7. #7
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: How to properly create Read and Write Buffers:

    Now I am getting an error with the program? (-11, CL_BUILD_PROGRAM_FAILURE)
    You can query the kernel compilation errors with clGetProgramBuildInfo(..., CL_PROGRAM_BUILD_LOG, ...).
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  8. #8

    Re: How to properly create Read and Write Buffers:

    Thank you for your help. I went back through the code and the changes that you provided, and apparently I changed too much last night. I recommitted and the changes work. Thank you so much for your help.

Similar Threads

  1. Unittesting Image Read an write
    By clint3112 in forum OpenCL
    Replies: 1
    Last Post: 02-09-2013, 08:21 AM
  2. read/write support for image2d and image3d
    By nathanpackard in forum Suggestions for next release
    Replies: 0
    Last Post: 03-23-2011, 01:32 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •