Results 1 to 2 of 2

Thread: OpenCL only reads/writes from/to 1/4 of the buffer memory

  1. #1
    Junior Member
    Join Date
    Oct 2011
    Posts
    2

    OpenCL only reads/writes from/to 1/4 of the buffer memory

    I have a problem with OpenCL, which is that it executes the entire command queue, but it only reads only 1/4 of the input and writes only 1/4 of the result.
    No matter how many iterations, always 1/4.

    And also it sometimes randomly crashes..with debugging I dont get any information, since there is no debug symbols, where it crashes (0x4c4783f6 in ????, etc.)

    Source code:
    Code :
        #include <iostream>
        #include <cl/cl.h>
        #include <cassert>
        #include <cstring>
     
        const char *progsrc[] = {
        "#pragma OPENCL EXTENSION cl_intel_printf : enable\n\
        __kernel void add(__global const int *a, __global const int *b, __global int *out) \
        { \
            int tid = get_global_id(0);\
            out[tid] = tid/*a[tid]+b[tid]*/;\
            printf(\"krnl: %d = %d + %d \\n\", out[tid], a[tid], b[tid]);\
        }"};
     
        const int iterations = 20;
     
        #define CLCheck(a) \
        do\
        {\
            if(a != CL_SUCCESS)\
            {\
                std::cerr << "OpenCL Error(" << a << ") at " << __LINE__ << std::endl;\
                return -1;\
            }\
        } while(0)
     
        int main()
        {
            cl_int err = CL_SUCCESS;
     
            int *aH = NULL;
            int *bH = NULL;
            int *outH = NULL;
     
          	cl_uint platnum, devnum;
          	cl_device_id dev;
          	cl_platform_id plat;
          	err = clGetPlatformIDs(0, 0, &platnum);
          	CLCheck(err);
          	cl_platform_id pfids[platnum];
            err = clGetPlatformIDs(platnum, pfids, &platnum);
            CLCheck(err);
     
            if(!platnum)
            {
                std::cerr << "No platform found." << std::endl;
                return -1;
            }
            else
                std::cout << platnum << " OpenCL platform(s) found.\n" << std::endl;
     
            for(unsigned int i = 0; i != platnum; i++)
            {
                char buf[4096];
     
                err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, 0, 0, &devnum);
                CLCheck(err);
                cl_device_id devids[devnum];
                err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, devnum, devids, &devnum);
                CLCheck(err);
                if(!devnum)
                {
                    std::cerr << "No device found." << std::endl;
                    return -1;
                }
                else
                    std::cout << " " << devnum << " OpenCL device(s) found.\n" << std::endl;
     
                for(unsigned int i2 = 0; i2 != devnum; i2++)
                {
                    char buf[1024];
                    std::cout << ": \n\tName: " << buf;
                    err = clGetDeviceInfo(devids[i2], CL_DEVICE_VENDOR, 1024, buf, NULL);
                    CLCheck(err);
                    if(!strncmp(buf, "Intel", 5))
                    {
                        dev = devids[0];
                        plat = pfids[i];
                        std::cout << "\n\tFound Intel(R) OpenCL device.";
                    }
                }
            }
            cl_context_properties ctxprop[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)plat, 0};
            cl_context ctx = clCreateContext(ctxprop, 1, &dev, NULL, NULL, &err);
            CLCheck(err);
     
            cl_program program = clCreateProgramWithSource(ctx, 1, progsrc, NULL, &err);
            CLCheck(err);
            err = clBuildProgram(program, 1, &dev, "", NULL, NULL);
            if(err != CL_SUCCESS)
            {
                size_t bufsz;
        		err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &bufsz);
        		char buf[bufsz];
        		err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, bufsz, buf, &bufsz);
        		std::cerr << "OpenCL program building failed: " << buf << std::endl;
        		return -1;
        	}
        	err = clUnloadCompiler();
        	CLCheck(err);
     
            aH = new int[iterations];
            bH = new int[iterations];
            outH = new int[iterations];
            memset(outH, 0, iterations*sizeof(int));
            for(int i = 0; i != iterations; i++)
            {
                aH[i] = i;
                bH[i] = i*2;
            }
     
            cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
            cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
            CLCheck(err);
            cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err);
            CLCheck(err);
     
            cl_kernel krnl = clCreateKernel(program, "add", &err);
            CLCheck(err);
     
            err = clSetKernelArg(krnl, 0, sizeof(aCL), &aCL);
            CLCheck(err);
            err = clSetKernelArg(krnl, 1, sizeof(bCL), &bCL);
            CLCheck(err);
            err = clSetKernelArg(krnl, 2, sizeof(outCL), &outCL);
            CLCheck(err);
     
            cl_command_queue cmdqueue = clCreateCommandQueue(ctx, dev, 0, &err);
            cl_event evt;
            size_t global_work_size[1] = { iterations };
            err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL);
            err = clEnqueueWriteBuffer(cmdqueue, bCL, CL_TRUE, 0, iterations, bH, 0, NULL, NULL);
            err = clEnqueueNDRangeKernel(cmdqueue, krnl, 1, NULL, global_work_size, NULL, 0, NULL, &evt);
            err = clWaitForEvents(1, &evt);
            err = clEnqueueReadBuffer(cmdqueue, outCL, CL_TRUE, 0, iterations, outH, 0, NULL, &evt);
     
            for(int i = 0; i != iterations; i++)
            {
                std::cout << outH[i] << std::endl;
            }
     
            err = clReleaseEvent(evt);
            err = clReleaseCommandQueue(cmdqueue);
            err = clReleaseKernel(krnl);
            err = clReleaseMemObject(outCL);
            err = clReleaseMemObject(bCL);
            err = clReleaseMemObject(aCL);
            err = clReleaseProgram(program);
            err = clReleaseContext(ctx);
     
            if(aH)
                delete aH;
            if(bH)
                delete bH;
            if(outH)
                delete outH;
            return 0;
        }


    output:
    Code :
        2 OpenCL platform(s) found.
     
        Platform 0 :
                Name: NVIDIA CUDA
                Vendor: NVIDIA Corporation
                Profile: FULL_PROFILE
                Version: OpenCL 1.1 CUDA 4.0.1
                Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c
        l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c
        l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll
     
         1 OpenCL device(s) found.
     
          Device 0:
                Name: GeForce GT 425M
                Vendor: NVIDIA Corporation
                Profile: FULL_PROFILE
                Driver version: 280.26
                OpenCL version: OpenCL C 1.1
                Version: OpenCL 1.1 CUDA
                Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing c
        l_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_sharing c
        l_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll  cl_khr_g
        lobal_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32
        _base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64
     
        Platform 1 :
                Name: Intel(R) OpenCL
                Vendor: Intel(R) Corporation
                Profile: FULL_PROFILE
                Version: OpenCL 1.1
                Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i
        nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende
        d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl
        _intel_immediate_execution cl_khr_gl_sharing cl_khr_icd
     
         1 OpenCL device(s) found.
     
          Device 0:
                Name: Intel(R) Core(TM) i3 CPU       M 370  @ 2.40GHz
                Found Intel(R) OpenCL device.
                Vendor: Intel(R) Corporation
                Profile: FULL_PROFILE
                Driver version: 1.1
                OpenCL version: OpenCL C 1.1
                Version: OpenCL 1.1 (Build 15293.6650)
                Extensions: cl_khr_fp64 cl_khr_global_int32_base_atomics cl_khr_global_i
        nt32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extende
        d_atomics cl_khr_byte_addressable_store cl_intel_printf cl_ext_device_fission cl
        _intel_immediate_execution cl_khr_gl_sharing
     
        krnl: 0 = 0 + 0
        krnl: 1 = 1 + 2
        krnl: 2 = 2 + 4
        krnl: 3 = 3 + 6
        krnl: 4 = 4 + 8
        krnl: 5 = 0 + 0
        krnl: 6 = 0 + 0
        krnl: 7 = 0 + 0
        krnl: 16 = 0 + 492859489
        krnl: 17 = 0 + -1042621749
        krnl: 18 = 0 + 1310105771
        krnl: 19 = 0 + 134230852
        krnl: 8 = 0 + 0
        krnl: 9 = 0 + 0
        krnl: 10 = 0 + -1094462526
        krnl: 11 = 0 + -1094462526
        krnl: 12 = 0 + -1230120245
        krnl: 13 = 0 + 500723958
        krnl: 14 = 0 + 530164160
        krnl: 15 = 0 + 492859489
        0
        1
        2
        3
        4
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0

    Thanks

  2. #2
    Junior Member
    Join Date
    Oct 2011
    Posts
    2

    Re: OpenCL only reads/writes from/to 1/4 of the buffer memor

    oh, it seems that i forgot to use iterations*sizeof(int) instead of iterations only when calling buffer functions..thats solved now, thanks to mystical at http://stackoverflow.com/questions/7627 ... -sometimes

Similar Threads

  1. opencl buffer from premade opengl buffer
    By MohamedSakr in forum OpenCL - parallel programming of heterogeneous systems
    Replies: 0
    Last Post: 11-21-2012, 03:42 PM
  2. memory buffer question
    By t.verstraete in forum OpenCL - parallel programming of heterogeneous systems
    Replies: 7
    Last Post: 03-21-2012, 06:43 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •