Results 1 to 2 of 2

Thread: Segmentation fault (core dumped )

  1. #1
    Junior Member
    Join Date
    Mar 2013
    Posts
    2

    Segmentation fault (core dumped )

    Hi everyone,

    I am doing parallel programming(using cuda) over an year now and I am new to OpenCL. I was making my 1st openCL code (matrix multiplication ). I wrote the following code,

    [#include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>
    #include <string.h>
    #include <SDKCommon.hpp>
    #include <SDKApplication.hpp>
    #include <SDKCommandArgs.hpp>
    #include <SDKFile.hpp>
    #include <CL/cl.h>

    #define MAX_SOURCE_SIZE (0x100000)
    #define MATSIZE 16


    void initmat(float *Aa,float *Bb,float *Cc,int row,int colrow,int col);

    void initmat(float *Aa,float *Bb,float *Cc,int row,int colrow,int col)
    {
    unsigned int i;

    for(i=0;i<row*colrow;i++){
    Aa[i]=1;
    }

    for(i=0;i<colrow*col;i++){
    Bb[i]=2;
    }

    for(i=0;i<row*col;i++){
    Cc[i]=0;
    }

    }


    int main(void)
    {

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("matmul.cl", "r");
    if (!fp) {
    fprintf(stderr, "Failed to load kernel.\n");
    exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    // matrix declaration
    float *A;
    float *B;
    float *C;

    // set dimesions
    int Arow,AcolBrow,Bcol;

    Arow=AcolBrow=Bcol=MATSIZE;

    // no. of elements in matrix
    int sizea, sizeb, sizec;

    // Error code from opencl

    int err;

    // Setting up matrices
    sizea= Arow*AcolBrow;
    sizeb= AcolBrow*Bcol;
    sizec= Arow*Bcol;

    A = (float *) malloc(sizeof(float)*sizea);
    B = (float *) malloc(sizeof(float)*sizeb);
    C = (float *) malloc(sizeof(float)*sizec);

    initmat(A,B,C,Arow,AcolBrow,Bcol);

    // get platform id & device id

    cl_uint numplatform;
    cl_platform_id platformid=NULL;
    cl_device_id deviceid=NULL;

    err= clGetPlatformIDs(1,&platformid,&numplatform);
    err=clGetDeviceIDs(platformid,CL_DEVICE_TYPE_GPU,1 ,&deviceid,NULL);

    cl_context_properties properties[]=
    {
    CL_CONTEXT_PLATFORM, (cl_context_properties)platformid,0
    };

    // create context
    cl_context context= clCreateContext(properties,1,&deviceid,NULL,NULL,& err);

    /* when more than one gpu is installed on the system than we make use of the approach as we stated in the notes !! */

    // create command queue

    cl_command_queue queue = clCreateCommandQueue(context,deviceid,0,&err); // I have disabled profiling option


    // Allocate buffer object for Ad,Bd,Cd

    cl_mem Ad = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(cl_ float)*sizea,NULL,NULL);
    cl_mem Bd = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(cl_ float)*sizeb,NULL,NULL);
    cl_mem Cd = clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(cl _float)*sizec,NULL,NULL);

    // We are not explicitely making kernel. We are putting the kernel code here itself (see notes)


    cl_program program= clCreateProgramWithSource(context,1,(const char **)&source_str, (const size_t *)&source_size,&err);

    // Build program using program object just created

    err = clBuildProgram(program,0,NULL,NULL,NULL,NULL);

    if(err !=CL_SUCCESS)
    {

    size_t len;
    char buffer[2048];
    printf("ERROR: Failed to build executable \n ");
    clGetProgramBuildInfo(program,deviceid,CL_PROGRAM_ BUILD_LOG, sizeof(buffer), buffer , &len);
    printf("%s \n",buffer);
    //return FAILURE;

    }

    // Create kernel object

    cl_kernel kernel = clCreateKernel(program,"matmul",NULL);

    // set kernel argument values

    err=0;
    err= clSetKernelArg(kernel,0,sizeof(int),&Arow);
    err|= clSetKernelArg(kernel,1,sizeof(int),&AcolBrow);
    err|= clSetKernelArg(kernel,2,sizeof(int),&Bcol);
    err|= clSetKernelArg(kernel,3,sizeof(cl_mem),&Ad);
    err|= clSetKernelArg(kernel,4,sizeof(cl_mem),&Bd);
    err|= clSetKernelArg(kernel,5,sizeof(cl_mem),&Cd);

    // Write to device buffers. Ad=A and Bd=B : Equivalent to CUDAmemcpy

    err=clEnqueueWriteBuffer(queue,Ad,CL_TRUE,0,sizeof (cl_float)*sizea,A,0,NULL,NULL);
    err=clEnqueueWriteBuffer(queue,Bd,CL_TRUE,0,sizeof (cl_float)*sizeb,B,0,NULL,NULL);

    // since we have set the copy as synchronous we will be creating event
    cl_event event;

    // Execute the kernel over entire range of C matrix

    size_t global[2];
    size_t local[2];
    cl_uint * ndim; // no. of dimension in ND range. 3rd parameter in kernel call signifies the dimension.

    global[0]=(size_t)Arow;
    global[1]=(size_t)Bcol;

    * ndim=2; // because we want 2-D multiplication. Gives n

    /* no local size declaration cause we are not making work groups ie blocks.We are just make making oneblock where everythread takes one element of A,B and computes C */

    err = clEnqueueNDRangeKernel(queue,kernel,*ndim,NULL,glo bal,NULL,0,NULL,&event); // the NULL position after global is for passing local dimension. In this case we don't have one.
    clFinish(queue); // wait for kernel to finish before we begin copying the result back on host

    //read back the result

    err=clEnqueueReadBuffer(queue,Cd,CL_TRUE,0,sizeof( cl_float)*sizec,C,0,NULL,NULL);

    // free all memory

    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseMemObject(Ad);
    clReleaseMemObject(Bd);
    clReleaseMemObject(Cd);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    return 0;
    }][/code]







    My kernel code is as follows,
    [
    __kernel void matmul(const int Mdim, const int Ndim,const int Pdim,__global float* A,__global float* B,__global float* C)
    {

    float tmp;
    //*int k;
    // int i = get_global_id(0);
    //int j = get_global_id(1);
    //if((i<Ndim)&(j<Mdim))
    //C[i*Ndim+j] = 3.0;

    }
    ][/code]


    I could compile it successfully and it created a binary in ../bin/x86/ folder. When I try to run it using ./matmul it throes the following error,

    93 > Sun Mar 17 : 04:22 PM : samkit@samkit:~/AMD/AMD-APP-SDK-v2.8-RC-lnx32/samples/opencl/bin/x86$ ./matmul
    Segmentation fault (core dumped)

    I know functionality of kernel is no where near that of matrix multiplication kernel but I did this to just check if these is some error in my kernel. Please give suggestions or advice that can help me make my code run.

    Thanks in advance.

  2. #2
    Junior Member
    Join Date
    Mar 2013
    Posts
    2

    Re: Segmentation fault (core dumped )

    I have debugged it.

Similar Threads

  1. Replies: 3
    Last Post: 04-05-2013, 02:17 AM
  2. Segmentation fault while creating command queue
    By whitepearl in forum OpenCL
    Replies: 2
    Last Post: 03-23-2012, 01:13 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •