Results 1 to 2 of 2

Thread: Confusion on output

  1. #1
    Junior Member
    Join Date
    Feb 2012
    Posts
    4

    Confusion on output

    I am very new to OpenCL and am trying to learn how to use it to improve the performance of a Python-based project of mine. The performance bottleneck in my project is related to inverse distance trees used to calculate nearest-neighbors, so that's what I'm trying to do in OpenCL.

    The algorithm I'm using as a first pass has been cobbled together from various sources on the net like StackOverflow and "Adventures in OpenCL". When run as the heart of a C++ application, it works fine. When run in OpenCL, it does not. I'm confused!

    Here is the source to the OpenCL portion:

    Code :
    struct point {
      int x;
      int z;
    };
     
    __kernel void nearest(__global struct point *coords, __global float *values, __global struct point *base, __global float *output, const unsigned int lencoords) {
      // base index
      int g_dataset_id = get_global_id(0);
     
      int dmin = -1;
      int d;
     
      for (int i=0; i<lencoords; i++) {
     
        d = (coords[i].x-base[g_dataset_id].x)*(coords[i].x-base[g_dataset_id].x)+(coords[i].z-base[g_dataset_id].z)*(coords[i].z-base[g_dataset_id].z);
     
        if (dmin == -1 || d < dmin) {
          output[g_dataset_id] = values[i];
          dmin = d;
        }
      }
    }

    And here is the Python portion:

    Code :
    import pyopencl as cl
    import numpy
    from itertools import product
    from random import randint
     
    class CL:
        def __init__(self):
            self.ctx = cl.create_some_context()
            self.queue = cl.CommandQueue(self.ctx)
     
        def loadProgram(self, filename):
            #read in the OpenCL source file as a string
            f = open(filename, 'r')
            fstr = "".join(f.readlines())
            #print fstr
            #create the program
            self.program = cl.Program(self.ctx, fstr).build()
     
        def popCorn(self):
            mf = cl.mem_flags
     
            #initialize client side (CPU) arrays
            self.xsize = 4
            self.zsize = 4
            self.lencoords = 4
            self.coords = numpy.array([(randint(0, self.xsize-1),randint(0, self.xsize-1)) for elem in xrange(self.lencoords)], dtype=numpy.uint8)
            self.values = numpy.array([elem+4 for elem in xrange(self.lencoords)], dtype=numpy.float32)
            self.base = numpy.array([(z, x) for (z, x) in product(xrange(self.zsize),xrange(self.xsize))], dtype=numpy.uint8)
            self.output = numpy.zeros((self.zsize*self.xsize), dtype=numpy.float32)
     
            #create OpenCL buffers
            self.coords_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.coords)
            self.values_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.values)
            self.base_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.base)
            self.output_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, size=self.output.nbytes)
     
        def execute(self):
            self.program.nearest(self.queue, (self.base.shape), None, self.coords_buf, self.values_buf, self.base_buf, self.output_buf, numpy.int32(self.lencoords))
            self.results = numpy.empty_like(self.output)
            cl.enqueue_read_buffer(self.queue, self.output_buf, self.results).wait()
     
        def dumpit(self):
            print zip(self.coords, self.values)
            print zip(self.base, self.results)
     
    if __name__ == "__main__":
        example = CL()
        example.loadProgram("nearest.cl")
        example.popCorn()
        example.execute()
        example.dumpit()

    The output as found in self.results is a mishmash of values from self.values, not ordered correctly at all. I'm afraid I don't know even know enough to know how to put this into a debugger or something. Help! Thank you very much in advance.

    Jack.

  2. #2
    Junior Member
    Join Date
    Feb 2012
    Posts
    4

    Re: Confusion on output

    I was able to solve my own problem. My main trouble was argument agreement -- don't send an integer when a float is expected, et cetera. The next step is to figure out how to handle arbitrarily large arrays, but that will be a different thread if necessary.

    Here is the resulting code:

    First the OpenCL code:

    Code :
    struct point {
      int x;
      int z;
    };
     
    __kernel void nearest(__global struct point *coords, __global int *values, __global struct point *base, __global int *output, const unsigned int lencoords) {
      // base index
      int g_dataset_id = get_global_id(0);
     
      int dmin = -1;
      int d, dx, dz;
     
      for (int i=0; i<lencoords; i++) {
        dx = coords[i].x-base[g_dataset_id].x;
        dx = dx * dx;
        dz = coords[i].z-base[g_dataset_id].z;
        dz = dz * dz;
        d = dx + dz;
        if (dmin == -1 || d < dmin) {
          output[g_dataset_id] = values[i];
          dmin = d;
        }
      }
    }

    Now the Python code:

    Code :
    import pyopencl as cl
    import numpy
    from itertools import product
    from random import randint, uniform
     
    class CL:
        def __init__(self):
            self.ctx = cl.create_some_context()
            self.queue = cl.CommandQueue(self.ctx)
     
        def loadProgram(self, filename):
            #read in the OpenCL source file as a string
            f = open(filename, 'r')
            fstr = "".join(f.readlines())
            #print fstr
            #create the program
            self.program = cl.Program(self.ctx, fstr).build()
     
        def popCorn(self):
            mf = cl.mem_flags
     
            #initialize client side (CPU) arrays
            self.xsize = 256
            self.zsize = 256
            self.lenbase = self.xsize * self.zsize
            self.lencoords = max(1, int(self.lenbase*0.05))
            self.coords = numpy.array([(randint(0, self.xsize-1),randint(0, self.xsize-1)) for elem in xrange(self.lencoords)], dtype=numpy.int32)
            self.values = numpy.array([uniform(1,5) for elem in xrange(self.lencoords)], dtype=numpy.int32)
            self.base = numpy.array([(index / self.zsize, index % self.zsize) for index in xrange(self.lenbase)], dtype=numpy.int32)
            self.output = numpy.zeros((self.lenbase), dtype=numpy.int32)
     
            #create OpenCL buffers
            self.coords_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.coords)
            self.values_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.values)
            self.base_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.base)
            self.output_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, size=self.output.nbytes)
     
        def execute(self):
            self.program.nearest(self.queue, self.base.shape, None, self.coords_buf, self.values_buf, self.base_buf, self.output_buf, numpy.int32(self.lencoords))
            self.results = numpy.empty_like(self.output)
            cl.enqueue_read_buffer(self.queue, self.output_buf, self.results).wait()
            self.results.resize((self.zsize, self.xsize))
     
        def dumpit(self):
            for xind in xrange(self.xsize):
                line = ''
                for zind in xrange(self.zsize):
                    line += str(int(self.results[xind,zind]))
                print line
     
     
     
    if __name__ == "__main__":
        example = CL()
        example.loadProgram("nearest.cl")
        example.popCorn()
        example.execute()
        example.dumpit()

Similar Threads

  1. Confusion with write_imagef()
    By giridhart in forum OpenCL
    Replies: 1
    Last Post: 09-08-2010, 01:19 PM
  2. UST confusion
    By jocphx in forum OpenML Coding & Technical Issues
    Replies: 1
    Last Post: 08-28-2005, 06:33 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •