Hello,

I am relatively new in OpenCL programming and frustrated by this problem I am having.
I know the code is not OpenCL efficient, but I thought it should at least work (i.e. right results).

I thought any element of a global array can be read and written by any work-item.
But the results are terribly wrong. Not in the sense that they are off by little, but total garbage is found (like 0, -1074790400, etc).

Below,
model = double[225*6];
modelBeginIndex = int[225];
xWindowSizeArray = int[225];
yWindowSizeArray = int[225];
I give global work size of 225 and local work size of null in EnqueueNDRange.

Code :
       #pragma OPENCL EXTENSION cl_khr_fp64: require
 
                __kernel void getModel(__global double* model,
                                       __global int* modelBeginIndex,
                                       __global int* xWindowSizeArray,
                                       __global int* yWindowSizeArray,
                                       int xWindowSize,
                                       int yWindowSize)
                {
                    int gid = get_global_id(0);
 
                    int xWindowSizeMax = xWindowSize;
                    int yWindowSizeMax = yWindowSize;
                    int xWindowSizeMin = 3;
                    int yWindowSizeMin = 3;
                    int numberOfXWindows = (xWindowSizeMax - xWindowSizeMin) / 2 + 1;
                    int numberOfYWindows = (yWindowSizeMax - yWindowSizeMin) / 2 + 1;
                    int numberOfElements = 6;
 
                    if (gid == 0)
                    {
                        int previousXYWindowSize = 0;
                        for (int i = xWindowSizeMax; i >= xWindowSizeMin; i = i - 2)
                        {
                            for (int j = yWindowSizeMax; j >= yWindowSizeMin; j = j - 2)
                            {
                                int index = (xWindowSizeMax - i) / 2 * numberOfYWindows + (yWindowSizeMax - j) / 2;
                                modelBeginIndex[index] = index == 0 ? 0 : modelBeginIndex[index - 1] + previousXYWindowSize * numberOfElements;
                                xWindowSizeArray[index] = i;
                                yWindowSizeArray[index] = j;
                                previousXYWindowSize = i * j;
                            }
                        }
                    }
 
                    int nX = xWindowSizeArray[gid];
                    int nY = yWindowSizeArray[gid];
 
                    int beginIndex[6];
                    for (int i = 0; i < numberOfElements; i++)
                    {
                        beginIndex[i] = i == 0 ? modelBeginIndex[gid] : beginIndex[i - 1] + nX * nY;
                    }
 
                    int indices[6];
                    int ii,jj;
                    for (int i = 0; i < nX; i++)
                    {
                        ii = i - nX / 2;
                        for (int j = 0; j < nY; j++)
                        {
                            for (int k = 0; k < numberOfElements; k++)
                            {
                                indices[k] = beginIndex[k] + (j + i * nY);
                            }
                            jj = j - nY / 2;
                            model[indices[0]] = 1.0;
                            model[indices[1]] = (double)ii;
                            model[indices[2]] = (double)jj;
                            model[indices[3]] = (double)(ii * ii);
                            model[indices[4]] = (double)(jj * jj);
                            model[indices[5]] = (double)(ii * jj);
                        }
                    }
                }

I would really appreciate it if anybody can point me in the right direction. I hope I am not totally misunderstanding OpenCL memory concept.