Results 1 to 3 of 3

Thread: Matrix Addition - what is wrong?

  1. #1

    Matrix Addition - what is wrong?

    Hello,

    I am kinda new to the OpenCL and I tried to write this small testing program of matrix add (don't get confused by the kernel function name )

    The code runs successfully, however the output matrix is full of zeros instead of the proper result 3.

    Could You please help me find the mistake in my code?

    Thanks


    #include <stdlib.h>
    #include <stdio.h>

    #include <CL/cl.h>


    const char *OpenCLSource[] = {
    "void MatMul(__global int* matAA, __global int* matBB, __global int* matCC)",
    "{",
    " unsigned int i = get_global_id(0);",
    " matCC[i] = matAA[i] + matBB[i];",
    "}"
    };


    // Main function
    // ************************************************** *******************
    int main(int argc, char *argv[])
    {
    int clerror = CL_SUCCESS;

    cl_mem matAA;
    cl_mem matBB;
    cl_mem matCC;

    const int dsize = 16 * 16;

    int matA[dsize];
    int matB[dsize];
    int matC[dsize];

    for(int i = 0; i < dsize; i++)
    {
    //matA[i] = matB[i] = rand();
    matA[i] = 1;
    matB[i] = 2;
    }

    // Query platform ID
    cl_platform_id platform;
    clGetPlatformIDs (1, &platform, NULL);

    // Setup context properties
    cl_context_properties props[3];
    props[0] = (cl_context_properties)CL_CONTEXT_PLATFORM;
    props[1] = (cl_context_properties)platform;
    props[2] = (cl_context_properties)0;

    // Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU
    cl_context GPUContext = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU,NULL, NULL, NULL);

    // Get the list of GPU devices associated with this context
    size_t ParmDataBytes;
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);
    cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);
    clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL);

    // Create a command-queue on the first GPU device
    cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext, GPUDevices[0], 0, NULL);

    // Allocate GPU memory for source vectors AND initialize from CPU memory
    matAA = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * dsize, matA, NULL);
    matBB = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * dsize, matB, NULL);
    matCC = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, sizeof(int) * dsize, NULL, NULL);

    // Create OpenCL program with source code
    cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 18, OpenCLSource, NULL, NULL);

    // Build the program (OpenCL JIT compilation)
    clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);

    // Create a handle to the compiled OpenCL function (Kernel)
    cl_kernel matMulKernel = clCreateKernel(OpenCLProgram, "MatMul", NULL);

    size_t global_work_size[1];
    size_t local_work_size[1];

    global_work_size[0] = dsize;
    local_work_size[0] = dsize;

    // In the next step we associate the GPU memory with the Kernel arguments
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matC);

    // Launch the Kernel on the GPU
    clEnqueueNDRangeKernel(GPUCommandQueue, matMulKernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);

    // Copy the output in GPU memory back to CPU memory
    clEnqueueReadBuffer(GPUCommandQueue, matCC, CL_TRUE, 0, global_work_size[0], matC, 0, NULL, NULL);

    // Print out the results
    for (int i = 0; i < 10; i++)
    {
    printf("%d\n", matC[i]);
    }

    // Cleanup
    free(GPUDevices);
    clReleaseKernel(matMulKernel);
    clReleaseProgram(OpenCLProgram);
    clReleaseCommandQueue(GPUCommandQueue);
    clReleaseContext(GPUContext);
    clReleaseMemObject(matAA);
    clReleaseMemObject(matBB);
    clReleaseMemObject(matCC);

    getchar();

    return 0;
    }

  2. #2
    Senior Member
    Join Date
    May 2010
    Location
    Toronto, Canada
    Posts
    845

    Re: Matrix Addition - what is wrong?

    I'm almost surprised that the code didn't crash.

    This is always the first step you should follow: check the error code returned by all functions. Currently the code is not checking for any errors.

    Second, the issue appear to be here:

    Code :
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matC);

    Do you see it now? matA, matB and matC are your local arrays, not the CL buffers. What you were trying to do is this:

    Code :
    clSetKernelArg(matMulKernel, 0, sizeof(cl_mem), (void*)&matAA);
    clSetKernelArg(matMulKernel, 1, sizeof(cl_mem), (void*)&matBB);
    clSetKernelArg(matMulKernel, 2, sizeof(cl_mem), (void*)&matCC);

    I recommend using clearly different names for local variables and for CL objects to avoid this error.
    Disclaimer: Employee of Qualcomm Canada. Any opinions expressed here are personal and do not necessarily reflect the views of my employer. LinkedIn profile.

  3. #3

    Re: Matrix Addition - what is wrong?

    Thank You very much for reply. In the end it crashed as You said

    However there was a mistake a bit earlier. In the definition of the kernel the name of the function was not preceded by the keyword __kernel so the clCreateKernel function did not create the right kernel and everything afterwards just passed without computing anything.

    Problema solved!!!

Similar Threads

  1. Non square matrix mulitiplacation return wrong result
    By hatakemisaki in forum Interoperability issues
    Replies: 0
    Last Post: 06-30-2012, 10:24 PM
  2. matrix addition problem
    By bubu in forum OpenCL
    Replies: 6
    Last Post: 04-23-2011, 01:29 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •