We are trying to process images from a framegrabber. The data appears on a mmaped buffer.

Every time we use the mmaped buffer on OpenCL we are getting an error message on syslog and the Slab section of /proc/mem_info increases. On our system we are leaking around 1 MB per second!

syslog error: [fglrx:MCIL_LockMemory] *ERROR* Could not lock memory into GART space

In order to demonstrate this error we have made a simple program with a loop that maps and unmaps a mmaped buffer. While running the program please run dmesg and examine /proc/mem_info. We are using the AMD implementation of OpenCL: Driver linux_x64 13.4.

Is there anything special we should do when handling mmaped memory?



This is the demo program:

#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <CL/opencl.h>

#define BUF_SIZE 4096

#define cl_err_exit(errnum, errstring){ \
if (errnum != CL_SUCCESS) { \
fprintf(stderr, "%s failed on line %d: %d\n", errstring, __LINE__, errnum); \
exit(1); \
} \
}


int main ()
{
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue gpu_queue = NULL;
cl_int err;

cl_mem pinned_buffer = NULL;
cl_mem device_buffer = NULL;

void *pinned_mem;
int fd;

fd = open("/dev/mem", O_RDONLY);
if (fd == -1) {
perror("open");
exit(1);
}

pinned_mem = mmap(NULL, BUF_SIZE, PROT_READ, MAP_SHARED, fd, 0);
if (pinned_mem == MAP_FAILED) {
perror("mmap");
exit(1);
}

err = clGetPlatformIDs(1, &platform_id, NULL);
cl_err_exit(err, "clGetPlatformIDs");

err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_err_exit(err, "clGetDeviceIDs");

context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
cl_err_exit(err, "clCreateContext");
gpu_queue = clCreateCommandQueue(context, device_id, 0, &err);
cl_err_exit(err, "clCreateCommandQueue");
while (1) {


pinned_buffer = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, BUF_SIZE, pinned_mem, &err);
cl_err_exit(err, "clCreateBuffer");

device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, BUF_SIZE, NULL, NULL);
cl_err_exit(err, "clCreateBuffer");

pinned_mem = (float *) clEnqueueMapBuffer(gpu_queue, pinned_buffer, CL_TRUE, CL_MAP_WRITE, 0, BUF_SIZE, 0, NULL, NULL, &err);
cl_err_exit(err, "clEnqueueMapBuffer");

err = clEnqueueWriteBuffer(gpu_queue, device_buffer, CL_FALSE, 0, BUF_SIZE, pinned_mem, 0, NULL, NULL);
cl_err_exit(err, "clEnqueueWriteBuffer");

err = clEnqueueUnmapMemObject(gpu_queue, pinned_buffer, pinned_mem, 0, NULL, NULL);
cl_err_exit(err, "clEnqueueUnmapMemObject");

clFinish(gpu_queue);
clReleaseMemObject(pinned_buffer);
clReleaseMemObject(device_buffer);
}
clReleaseCommandQueue(gpu_queue);
clReleaseContext(context);
munmap(pinned_mem, BUF_SIZE);
close(fd);

return 0;
}