Results 1 to 2 of 2

Thread: Passing data from one kernel to another

  1. #1
    Newbie
    Join Date
    Aug 2014
    Posts
    1

    Passing data from one kernel to another

    Hey I am fairly new to OpenCL and I am currently making a program using C++ bindings and "OpenCLUtilities/openCLUtilities.hpp". I know that data stays on device unless enqueueReadBuffer method is called but how do I make a pointer to that data? I'd like to perform some computation in one kernel (grad) and use output (ang, mag) in second kernel without copying buffers back to host and again to device. If you could tell me where and what I am doing wrong in my code or provide some relevant example I will be very grateful.


    Code :
    Context context = createCLContextFromArguments(argc, argv);
        Program program = buildProgramFromSource(context, "/Users/Mateusz/Desktop/grad.cl");
        std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
        CommandQueue queue = CommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE);
     
        cl::Device dev0 = devices[1];
        std::string name;
        dev0.getInfo(CL_DEVICE_NAME, &name);
        std::cout << "Used device: " << name << std::endl;
     
        Image2D clImage1 = Image2D(context,
                                   CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                   ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
                                   image1.columns(), image1.rows(), 0, image1_pixels);
     
     
        // Create a buffer for the result
        Buffer clResultAng = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
        Buffer clResultMag = Buffer(context, CL_MEM_READ_WRITE, sizeof(double)*image1.rows()*image1.columns());
        Buffer clResultOut = Buffer(context, CL_MEM_READ_WRITE, sizeof(float)*image1.rows()*image1.columns());
     
     
        Kernel gradients = Kernel(program, "grad");
        gradients.setArg(0, clImage1);
        gradients.setArg(1, clResultAng);
        gradients.setArg(2, clResultMag);
     
     
        Event kernel_event, read_event;
        queue.enqueueNDRangeKernel(gradients, NullRange,
                                   NDRange(image1.columns(), image1.rows() ),
                                   NullRange, NULL, &kernel_event);
     
     
        //    Program program1 = buildProgramFromSource(context, "/Users/Mateusz/Desktop/histograms.cl");
            Kernel histograms = Kernel(program, "histograms");
            histograms.setArg(0, clResultAng);
            histograms.setArg(1, clResultMag);
            histograms.setArg(2, clResultOut);
     
     
        //Synchronize?
        kernel_event.wait();
     
        // Transfer image back to host
        //queue.enqueueReadBuffer(clResultMag, CL_TRUE, 0, sizeof(double)*image1.columns()*image1.rows(), mag);
        queue.enqueueReadBuffer(clResultOut, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), test);
        queue.enqueueReadBuffer(clResultAng, CL_TRUE, 0, sizeof(float)*image1.columns()*image1.rows(), ang);
     
     
        //          DEBUGGING OUTPUT
        // std::cout<<std::endl;
     
         for (int i=0; i<image1_size/4; i++) {
         if(ang[i]>0)
         std::cout << "Test: " << test[i] << " vs. Angle: " << ang[i] << std::endl;
         }


    Code :
    //#pragma OPENCL EXTENSION cl_khr_fp64 : enable
     
    __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
     
    __kernel void grad(
                       __read_only image2d_t input_image,
                       __global float * ang,
                       __global double * mag
                       ) {
     
        const int2 pos = {get_global_id(0), get_global_id(1)};
     
        __private float4 dx = 0.0f;
        __private float4 dy = 0.0f;
     
     
        // calculate gradients in dx direction using [1, 0, -1] kernel
        for(int a = -1; a < 2; a++) {
            dx += (-a)*read_imagef(input_image, sampler, (int2)((pos.x+a), pos.y) );
        }
     
        // calculate gradients in dy direction using transposed [1, 0, -1] kernel
        for(int b = -1; b < 2; b++) {
            dy += (-b)*read_imagef(input_image, sampler, (int2)(pos.x, (pos.y+b)) );
        }
     
        /*  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     
         Dalal and Triggs suggested:
         "For colour images, we calculate separate gradients for
         each colour channel, and take the one with the largest norm
         as the pixel’s gradient vector."
     
     
         *///  !!!!!!!!!!!!!!!!!!!!!!!!!!!!
     
        float R_x=0.0f, G_x=0.0f, B_x=0.0f, x=0;
        float R_y=0.0f, G_y=0.0f, B_y=0.0f, y=0;
        R_x += dx.x; G_x += dx.y; B_x += dx.z;
        R_y += dy.x; G_y += dy.y; B_y += dy.z;
        double xx = 0, yy = 0;
     
        x = (R_x>G_x && R_x>B_x) ? dx.x : ((G_x>B_x) ? dx.y : dx.z);    // is only one channel used or are they mixed??
        y = (R_y>G_y && R_y>B_y) ? dy.x : ((G_y>B_y) ? dy.y : dy.z);
     
        xx = (double)x*x;
        yy = (double)y*y;
     
        ang[pos.x+pos.y*get_global_size(0)] = (float)atan2pi(y,x)*360;
        mag[pos.x+pos.y*get_global_size(0)] = (double)sqrt(xx+yy);
     
    }
     
    __kernel void histograms(
                             __global float * ang,
                             __global double * mag,
                             __global float * out
                             ) {
        const int2 pos = {get_global_id(0), get_global_id(1)};
        out[pos.x+pos.y*get_global_size(0)] =  ang[pos.x+pos.y*get_global_size(0)];
     
    }

  2. #2
    Junior Member
    Join Date
    Oct 2011
    Posts
    26
    You can use the same buffer as argument in both kernels. There is no need to transfer the data to the host unless you need to do host side computations on it. Just make sure you wait for the first kernel to finish before you start the second.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •