I'm trying to write image processing OpenCL application, but my problem that any attempt to alter input image produces artifacts which look like vertical bars. This does not happen if I copy image pixels without altering them. So for example this line produces artifacts:
Code :
pixel = (uint4)(image1_pixel.x,
                image1_pixel.y,
                image1_pixel.z,
                255);
...but this one works as expected:
Code :
pixel = (uint4)(image1_pixel.x,
                image1_pixel.y,
                image1_pixel.z,
                image1_pixel.w);
Input is opaque 32-bit PNG image, so I expect both code lines to produce the same result (exact copy of the input in this case). In reality, however, only second line works as expected. First line gives output with artifacts.

Here is my kernel:
Code :
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
                               CLK_ADDRESS_CLAMP |
                               CLK_FILTER_NEAREST;
 
__kernel void test(__read_only image2d_t image1,
                  __write_only image2d_t out) {
  const int2 pos = (int2)(get_global_id(0), get_global_id(1) );
  uint4 image1_pixel = read_imageui(image1, sampler, pos);
  uint4 pixel = (uint4)(image1_pixel.x,
                        image1_pixel.y,
                        image1_pixel.z,
                        255);
  write_imageui(out, pos, pixel);
}
Here is relevant portion of main.cpp code:
Code :
  CImg<unsigned char> image1("../input.png");
  ...
  Image2D clImage1 = Image2D(context,
    CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
    ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
    image1.width(), image1.height(), 0, image1.data() );
  Image2D clResult = Image2D(context, CL_MEM_WRITE_ONLY,
    ImageFormat(CL_RGBA, CL_UNSIGNED_INT8),
    image1.width(), image1.height(), 0, NULL);
  Kernel test = Kernel(program, "test");
  test.setArg(0, clImage1); test.setArg(1, clResult);
  Event kernel_event, read_event;
  queue.enqueueNDRangeKernel(test, NullRange,
    NDRange(image1.width(), image1.height() ),
    NullRange, NULL, &kernel_event);
  cl::size_t<3> origin;
  origin.push_back(0); origin.push_back(0); origin.push_back(0);
  cl::size_t<3> region;
  region.push_back(image1.width() );
  region.push_back(image1.height() ); region.push_back(1);
  queue.enqueueReadImage(clResult, CL_TRUE,
                         origin, region, 0, 0,
                         image1.data(), NULL, NULL);
  kernel_event.wait();
  image1.save("../output.png");
Here can be downloaded full source code for my test application (it contains short main.cpp under 30 lines, CMakeLists.txt, readme.txt explaining how to compile and run it, input image and the kernel). I use CImg library to load and save images. I double-checked that input opens as 32-bit RGBA image. I tried to run the kernel with AMD or NVidia SDK and got the same result.

Any idea why I get unexpected result?