Hello guys, I'm new to OpenCL and trying to write a program that will simply copy an image2d buffer to another one, the problem is that the values on the returned image are totally different from the input one.

This is the kernel:
Code :
__kernel void image_test(__read_only image2d_t image, __write_only image2d_t output)
{
  const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 
  int x = get_global_id(0);
  int y = get_global_id(1);
 
  int2 coord = (int2) (x, y);
 
  float4 pixel = read_imagef(image, smp, coord);
 
  write_imagef(output, coord, pixel);
}

And this is the complete opencl program, I'm using OpenCL c++ wrapper by the way:

Code :
#define __CL_ENABLE_EXCEPTIONS
 
#include <highgui.h>
 
#include "cl.hpp"
 
#include <stdio.h>
#include <time.h>
#include <iostream>
#include <vector>
 
#include <sys/stat.h>
 
const char* loadKernel(const char* filename);
 
int main()
{
  int width = 4;
  int height = 2;
  int image_size = width * height * 4;
  float* input = (float*) calloc(image_size, sizeof(float));
  float* output = (float*) calloc(image_size, sizeof(float));
 
  srand (time(NULL));
  for (int a = 0; a < image_size; a++)
    input[a] = (float) (rand() % 100);
 
  for (int a = 0; a < image_size; a++)
    output[a] = 255.0;
 
  // Initialize OpenCL
  cl::Context* context;
  std::vector<cl::Device> devices;
  cl::CommandQueue queue;
  cl::Kernel* kernel;
 
  // The origin is the same as the indices for the first value
  // The z component must be 0 for 2D images
  cl::size_t<3> origin;
  origin.push_back(0);
  origin.push_back(0);
  origin.push_back(0);
 
  // The region is the same as the dimensions of the image
  // The z component must be 1 for 2D images
  cl::size_t<3> region;
  region.push_back(width);
  region.push_back(height);
  region.push_back(1);
 
  // The row pitch is the number of bytes in each row
  size_t row_pitch = width * 4 * sizeof(float);
 
  try
  {
    // Get available platforms
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);
    if (platforms.size() == 0)
      printf("Err: platform size 0!\n");
 
    // Select the default platform and create
    // a context using this platform and the GPU
    cl_context_properties cps[] = {
      CL_CONTEXT_PLATFORM,
      (cl_context_properties) (platforms[0]) (),
      0
    };
    context = new cl::Context(CL_DEVICE_TYPE_GPU, cps);
 
    // Get a list of devices on this platform
    devices = context->getInfo<CL_CONTEXT_DEVICES>();
 
    // Create a command queue and use the first device
    queue = cl::CommandQueue(*context, devices[0]);
  }
  catch (cl::Error error)
  {
    printf("%s (%d)\n", error.what(), error.err());
  }
 
  // Load and build Program and make kernel
  try
  {
    // Read kernel file
    const char* filename = "kernel.cl";
    std::string source_code(loadKernel(filename));
    cl::Program::Sources source(1, std::make_pair(source_code.data(), source_code.length() + 1));
 
    // Make program of the source code in the context
    cl::Program program(*context, source);
 
    // Build program for these specific devices;
    program.build(devices);
 
    // Make kernel
    kernel = new cl::Kernel(program, "image_test");
  }
  catch (cl::Error error)
  {
    printf("%s (%d)\n", error.what(), error.err());
  }
 
  // Memory buffers
  cl::Image2D* image_a;
  cl::Image2D* image_b;
 
  try
  {
    // Create memory buffers
    image_a = new cl::Image2D(*context,
                              CL_MEM_READ_ONLY,
                              cl::ImageFormat(CL_RGBA, CL_FLOAT),
                              width, height, 0);
    image_b = new cl::Image2D(*context,
                              CL_MEM_WRITE_ONLY,
                              cl::ImageFormat(CL_RGBA, CL_FLOAT),
                              width, height, 0);
 
    // Copy lists a and b to the memory buffers
    queue.enqueueWriteImage(*image_a,
                            CL_TRUE,
                            origin, region,
                            row_pitch, 0,
                            (void*) input);
  }
  catch (cl::Error error)
  {
    printf("%s (%d)\n", error.what(), error.err());
  }
 
  // Run kernel
  try
  {
    // Set arguments to kernel
    kernel->setArg(0, *image_a);
    kernel->setArg(1, *image_b);
 
    // Execute the program
    queue.enqueueNDRangeKernel(*kernel, cl::NullRange, cl::NDRange(width, height), cl::NullRange);
 
    queue.finish();
 
  }
  catch (cl::Error error)
  {
    printf("%s (%d)\n", error.what(), error.err());
  }
 
  // Get answer buffer back
  try
  {
    queue.enqueueReadImage(*image_b,
                           CL_TRUE,
                           origin, region,
                           row_pitch, 0,
                           (void*) output);
  }
  catch (cl::Error error)
  {
    printf("%s (%d)\n", error.what(), error.err());
  }
 
  for (int a = 0; a < image_size; a++)
  {
    if (a % (width * 4) == 0)
      printf("\n");
    printf("%d ", (int) input[a]);
  }
  printf("\n");
 
  for (int a = 0; a < image_size; a++)
  {
    if (a % (width * 4) == 0)
      printf("\n");
    printf("%d ", (int) output[a]);
  }
  printf("\n");
 
  return 0;
}
 
const char* loadKernel(const char* filename)
{
  FILE* file_handler;
  struct stat file_stat;
  char* source;
 
  file_handler = fopen(filename, "r");
  if (!file_handler) return 0;
 
  stat(filename, &file_stat);
  source = (char*) calloc(file_stat.st_size + 1, sizeof(char));
  fread(source, file_stat.st_size, 1, file_handler);
  source[file_stat.st_size] = '\0';
 
  return source;
}

The program will simply create 2 arrays of floats with width 4 and height 2, and to every pixel in width there an RGBA information, so in the end the array size is 32, then the program fill the input with random values, then I initialize the OpenCL, load the kernel and execute it, in the end i read back the returned array and show the 2 arrays to compare it, but the values as changed, this is an example of an output of the program:
Code :
~/opencl_test/src $ ./opencl 
 
4 66 93 64 45 35 59 1 66 77 20 85 32 91 95 45 
97 41 9 22 0 28 29 21 87 92 19 82 59 3 19 64 
 
45 35 59 1 32 91 95 45 0 0 2 0 0 0 -2147483648 0 
0 28 29 21 59 3 19 64 -2147483648 0 0 0 0 -576 0 0

As you guys can see the second array is different from the first one

What's going on?

ps: I really love to see some examples of usage of image2d_t, I can't find much information about it on the internet

Thanks a lot!