Results 1 to 3 of 3

Thread: Computation values turn out to be incorrect

  1. #1
    Junior Member
    Join Date
    Mar 2010
    Posts
    2

    Computation values turn out to be incorrect

    Hi All,
    I am implementing a simple bounding box algorithm on opencl and I have problem with the computations.
    I have a set of vertices which I pass into the kernel and I compute the bounding box accordingly. I also pass in another array just to read back the vertices back
    from the kernel to find that they are all different. I am running on WinXP with Quadro FX 570M and 197.15 (Nvidia Driver).
    The code for the C part is this.
    Code :
      float bbox[8], results[8];
      memset(results, 0, 8 * sizeof(float));
     
      bbox[0] = (float)mesh->vertex[0].pt[0]; bbox[4] = (float)mesh->vertex[0].pt[0];
      bbox[1] = (float)mesh->vertex[0].pt[1]; bbox[5] = (float)mesh->vertex[0].pt[1];
      bbox[2] = (float)mesh->vertex[0].pt[2]; bbox[6] = (float)mesh->vertex[0].pt[2];
     
      // GPU Calculation
      float* vertices = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));
      float* fnew_verts = static_cast<float *>(malloc(4 * mesh->vertex_count * sizeof(float)));
     
      memset(vertices, 0, 4 * mesh->vertex_count * sizeof(float));
      memset(fnew_verts, 0, 4 * mesh->vertex_count * sizeof(float));
     
      float* vertex = vertices;
      for (int i = 0; i < mesh->vertex_count; i++)
      {
        *vertex++ = (float)mesh->vertex[i].pt[0];
        *vertex++ = (float)mesh->vertex[i].pt[1];
        *vertex++ = (float)mesh->vertex[i].pt[2];
        *vertex++;
      }
     
      ShaderManager* shm = ShaderManager::Instance();
     
      cl_int err;
      cl_mem vertices_mem, bbox_mem, new_verts;
      vertices_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);
      err = clEnqueueWriteBuffer(shm->opencl_command_queue(), vertices_mem, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float), 
                                 (void *)vertices, 0, NULL, NULL);
     
      bbox_mem = clCreateBuffer(shm->opencl_context(), CL_MEM_READ_WRITE, 8 * sizeof(float), NULL, NULL);
      err = clEnqueueWriteBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float), (void *)bbox, 0, NULL, NULL);
     
      new_verts = clCreateBuffer(shm->opencl_context(), CL_MEM_WRITE_ONLY, 4 * mesh->vertex_count * sizeof(float), NULL, NULL);
     
      clFinish(shm->opencl_command_queue());
     
      err = clSetKernelArg(shm->opencl_kernel(), 0, sizeof(cl_mem), &vertices_mem);
      err |= clSetKernelArg(shm->opencl_kernel(), 1, sizeof(cl_mem), &bbox_mem);
      err |= clSetKernelArg(shm->opencl_kernel(), 2, sizeof(cl_mem), &new_verts);
     
      size_t global_work_size = mesh->vertex_count;
      err = clEnqueueNDRangeKernel(shm->opencl_command_queue(), shm->opencl_kernel(), 1, NULL, 
                                   &global_work_size, NULL, 0, NULL, NULL);
     
    #pragma warning(disable:4189)
      const char* str = errorString(err);
      clFinish(shm->opencl_command_queue());
     
     
      err = clEnqueueReadBuffer(shm->opencl_command_queue(), bbox_mem, CL_TRUE, 0, 8 * sizeof(float), 
                                results, 0, NULL, NULL);
      err = clEnqueueReadBuffer(shm->opencl_command_queue(), new_verts, CL_TRUE, 0, 4 * mesh->vertex_count * sizeof(float),
                                fnew_verts, 0, NULL, NULL);
     
      clFinish(shm->opencl_command_queue());
     
      // Verify with input data
      for (int i = 0; i < 4 * mesh->vertex_count; i++)
        assert(fnew_verts[i] == vertices[i]);
     
      for (int i = 0; i < mesh->vertex_count; i++)
      {
        mesh->vertex[i].pt[0] = (double)(fnew_verts[0]);
        mesh->vertex[i].pt[1] = (double)(fnew_verts[1]);
        mesh->vertex[i].pt[2] = (double)(fnew_verts[2]);
        fnew_verts += 4;
      }
     
      free(vertices);

    The kernel is defined like this as follows:
    Code :
     __kernel void bound_mesh(__global float4* vertices, __global float4* bbox, __global float4 new_verts)
     {
      int gid = get_global_id(0);
     
      if (vertices[gid].x < bbox[0].x)
        bbox[0].x = vertices[gid].x;
     
      if (vertices[gid].y < bbox[0].y)
        bbox[0].y = vertices[gid].y;
     
      if (vertices[gid].z < bbox[0].z)
        bbox[0].z = vertices[gid].z;
     
      if (vertices[gid].x > bbox[1].x)
        bbox[1].x = vertices[gid].x;
     
      if (vertices[gid].y > bbox[1].y)
        bbox[1].y = vertices[gid].y;
     
      if (vertices[gid].z > bbox[2].z)
        bbox[1].z = vertices[gid].z;
     
      new_verts[gid = vertices[gid];
     
      bbox[0].w = -1.0; bbox[1].w = -1.0;
     }

    So new_verts should atleast give me the correct vertices considering I am assigning them the same value for the vertices.
    So neither the bounding box nor the new_verts has the correct values in them. Any suggestions as to what I might be doing wrong.
    Thanks for your help.

  2. #2
    Junior Member
    Join Date
    Feb 2010
    Posts
    5

    Re: Computation values turn out to be incorrect

    Hi,

    I didn't tried your kernel, but it might be caused by fact the vertices parameter is defined as:
    __global float4* vertices
    while the new_verts:
    __global float4 new_verts
    (first as pointer, second not)

    Maybe changing it to:
    __global float4* new_verts
    could fix the problem.

    You might also try using:
    new_verts[gid].xyzw = vertices[gid].xyzw;
    If I remember correctly, it helped me to workaround similar problem long time ago with older drivers.


    Petr

  3. #3
    Senior Member
    Join Date
    Jul 2009
    Location
    Northern Europe
    Posts
    311

    Re: Computation values turn out to be incorrect

    It looks like you are setting bbox[0] in all your work-items at the same time. This means they may overwrite each other in a non-determinisitic fashion. (E.g., work-item 302 might replace the value of bbox[0].y at the same time that work-item 303 is trying to check it, resulting in the check getting the old number and the replacement not being noticed.) Effectively this looks like a data-race between all your threads. You need to make sure that they all write to different locations or that you explicitly synchronize your writes.

Similar Threads

  1. iOS - 2d image turn into a 3d
    By mathewi9 in forum OpenGL ES general technical discussions
    Replies: 0
    Last Post: 03-25-2013, 12:55 AM
  2. Problem with uniform boolean var to turn lighting on/off
    By nickw in forum Developers Coding:Beginner
    Replies: 2
    Last Post: 12-17-2009, 03:53 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •