Results 1 to 2 of 2

Thread: Parallel breadth-first search

  1. #1
    Junior Member
    Join Date
    Sep 2012
    Posts
    1

    Parallel breadth-first search

    hi, i want to do a program that visit a graph with a parallel BFS in OpenCL but it is my first time i used this library. I write a code for the host and a code for the kernel but, when i run the program, the error code for the function clEnqueueReadBuffer return sometimes CL_OUT_OF_RESOURCES and sometimes CL_INVALID_COMMAND_QUEUE. this is the code for the host

    Code :
    #include <stdio.h>
    #include <stdlib.h>
    #include <CL/cl.h>
    #include "head.h"
     
     
    int main(void) {
    	printf("\n\n\n");
    	//calculating the size of the matrix
    	const int MATR=grand_matr("matrice2.txt"); 
     
    	//create an array of lists that represent my array
    	int *visitati=(int*)malloc(sizeof(int)*MATR);
    	int *grafo=(int*)malloc(sizeof(int)*MATR*MATR);
    	int *vicini=(int*)malloc(sizeof(int)*MATR*MATR);
    	int *nodi=(int*)malloc(sizeof(int)*MATR);
    	int *stack;
    	FILE *matrice;
    	int i=0,j=0,giro=0;
    	int *sem_nodi=(int*)malloc(sizeof(int));
    	sem_nodi[0]=0;
    	int *sem_vis=(int*)malloc(sizeof(int));
    	sem_vis[0]=0;
    	int *lato=(int*)malloc(sizeof(int));
    	lato[0]=MATR;
    	int x,start,due;         //x rappresenta il carrattere che leggo dal file
    	for(i=0;i<MATR;i++){
    		nodi[i]=0;
    		visitati[i]=MATR+1;
    	}
    	for(i=0;i<MATR*MATR;i++)
    		grafo[i]=0;
    	i=0;
     
    		matrice = fopen("matrice2.txt", "r");
    	if (!matrice) {
    		fprintf(stderr, "Failed to load matrice2.txt\n");
    		exit(1);
    	}
     
     
    	else{
    		while (!feof (matrice))
    		{
    			fscanf(matrice,"%d", &x);
    			grafo[i]=x;
    			i++;
    			/*if (i!=j){
    				grafo[j][i]=x;
    			}
    			i=(i+1)%MATR;
    			if ((i%MATR)==0)
    				j++;*/
    		}
    	}
    	fclose(matrice);
    	// Load the kernel source code into the array source_str
    	FILE *fp;
    	char *source_str;
    	size_t source_size;
     
    	fp = fopen("kernel.cl", "r");
    	if (!fp) {
    		fprintf(stderr, "Failed to load kernel.\n");
    		exit(1);
    	}
    	source_str = (char*)malloc(MAX_SOURCE_SIZE);
    	source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    	fclose( fp );
     
    	// Get platform and device information
    	cl_platform_id platform_id = NULL;
     
    	cl_device_id device_id = NULL;   
     
    	cl_uint ret_num_devices;
     
    	cl_uint ret_num_platforms;
     
    	cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    	printf("clGetPlatformIDs %d\n",ret);
    	if (ret!=0) exit(0);
     
    	ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, 
    		&device_id, &ret_num_devices);
    	printf("clGetDeviceIDs %d\n",ret); 
    	if (ret!=0) exit(0);
     
    	// Create an OpenCL context
    	cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    	printf("clCreateContext %d\n",ret);
    	if (ret!=0) exit(0);
     
            // Create a command queue
            cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    	printf("clCreateCommandQueue %d\n",ret);
    	if (ret!=0) exit(0);
    	// Insert into stack the start node
    	start=rand()%MATR;
    	nodi[start]=2;
     
    	// Create memory buffers on the device for each vector 
     
    	cl_mem visitati_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer visitati %d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem nodi_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer nodi%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem stack_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer stack%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem grafo_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
    		MATR * MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer grafo%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem vicini_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer vicini%d\n",ret);
    	if (ret!=0) exit(0);
     
    	// Create a program from the kernel source
    	cl_program program = clCreateProgramWithSource(context, 1, 
    	(const char **)&source_str, (const size_t *)&source_size, &ret);
    	printf("clCreateProgramWithSource %d\n",ret);
    	if (ret!=0) exit(0);	
    	// Build the program
    	if((ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL))!=CL_SUCCESS){
    		printf("clBuildProgram %d\n",ret);
    		size_t log_size;
    		clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size);
    		char build_log [log_size+1];
    		clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,log_size,build_log,NULL);
    		build_log[log_size]='\0';
    		printf("ale %s \n",build_log);
    		exit(0);
    	}
    	// Create the OpenCL kernel
    	cl_kernel kernel = clCreateKernel(program, "bfs", &ret);
    	printf("clCreateKernel %d\n",ret);
    	if (ret!=0) exit(0);
     
    	while((due=howtwo(nodi,MATR))!=0){
    		giro++;
    		printf("\n\nGiro num %d\n",giro);
    		// Create the stack
    		stack=(int*)malloc(sizeof(int)*MATR);
    		j=0;
    		for(i=0;i<MATR;i++){
    			if(nodi[i]==2){
    				stack[j]=i;
    				j++;
    			}
    		}
    		// Copy the arrays to their respective memory buffers
    		ret = clEnqueueWriteBuffer(command_queue, visitati_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), visitati, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer visitati%d\n",ret);
    		if (ret!=0) exit(0);
     
    		ret = clEnqueueWriteBuffer(command_queue, nodi_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), nodi, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer visitati%d\n",ret);
    		if (ret!=0) exit(0);
     
    		ret = clEnqueueWriteBuffer(command_queue, stack_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), stack, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer stack %d\n",ret);
    		if (ret!=0) exit(0);	
     
    		ret = clEnqueueWriteBuffer(command_queue, grafo_mem_obj, CL_TRUE, 0, 
    			MATR * MATR * sizeof(int), grafo, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer grafo %d\n",ret);
    		if (ret!=0) exit(0);		
     
    		ret = clEnqueueWriteBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0, 
    			MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer vicini %d\n",ret);
    		if (ret!=0) exit(0);		
     
    		// Set the arguments of the kernel
    		ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&visitati_mem_obj);
    		printf("clSetKernelArg visitati%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&nodi_mem_obj);
    		printf("clSetKernelArg nodi%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&stack_mem_obj);
    		printf("clSetKernelArg stack%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&grafo_mem_obj);
    		printf("clSetKernelArg grafo%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&vicini_mem_obj);
    		printf("clSetKernelArg vicini_vis%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 5, sizeof(int),&lato);
    		printf("clSetKernelArg lato%d\n",ret);
    		if (ret!=0) exit(0);
     
    		// Execute the OpenCL kernel on the list
    		size_t global_item_size = due;
    		size_t local_item_size = due;
    		ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
    			&global_item_size, &local_item_size, 0, NULL, NULL);
    		printf("clEnqueueNDRangeKernel %d\n",ret);
    		if (ret!=0) exit(0);
     
    		// Read the memory buffer vicini_mem_obj on the device to the local variable vicini
     
     
    		ret = clEnqueueReadBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0, 
    		MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
    		printf("clEnqueueReadBuffer vicini%d\n",ret);
    		if (ret!=0) exit(0);  //HERE IS WHERE THE ERROR IS
    		for(i=0;i<MATR*due;i++){
    			if (vicini[i]==2){
    				nodi[i%MATR]=2;
    			}
    		}
    	}
    	// Display the result to the screen
    	printf("\nOrdine di visita ");	
    	for(i=0;i<MATR;i++){
    		printf(" %d ",visitati[i]);
    	}
    	printf("\nVerifico che ogni nodo sia stato visitato");
    	for(i=0;i<MATR;i++){
    		printf("i %d ",nodi[i]);
    	}
    	printf("\n");	
    	ret = clFlush(command_queue);
    	ret = clFinish(command_queue);
    	ret = clReleaseKernel(kernel);
    	ret = clReleaseProgram(program);
    	ret = clReleaseMemObject(visitati_mem_obj);
    	ret = clReleaseMemObject(nodi_mem_obj);
    	ret = clReleaseMemObject(stack_mem_obj);
    	ret = clReleaseMemObject(grafo_mem_obj);
    	ret = clReleaseCommandQueue(command_queue);
    	ret = clReleaseContext(context);
    	free(visitati);
    	free(nodi);
    	free(stack);
    	free(grafo);
     
     
    }

    and this is the code for the kernel

    Code :
    __kernel void bfs( 	__global int *visitati,
    				__global int *nodi,
    				__global int *stack,
    				__global int *grafo,
    				__global int *vicini,
    				int lato) {
     
    	// prendo l'indice dell'elemento da processare
    	int id = get_global_id(0); 
     
    	// prendo l'id del nodo che devo visitare
    	int exp=stack[id];
     
    	int i,j;
    	int sez=exp*lato;
     
    	//Ciclo la sezione di matrice che riguarda il work-item
    	for(i=sez;i<sez+lato;i++){
     
    		//Controllo che ci sia una vicinanza tra i due nodi 
    		if(grafo[i]==1){
    			// se il nodo vicino non  stato ancora aggiunto allo stack o visitato lo aggiungo a nodi
    			if (nodi[grafo[i]]==0){
    				//scrivo su nodi
    				vicini[(id*lato)+i]=2;
    			}
    		}
    	}
     
    }

    thanks for the help! Are 2 days that i'm completely blocked!!!! And sorry for my english!!!

  2. #2
    Senior Member
    Join Date
    Aug 2011
    Posts
    271

    Re: Parallel breadth-first search

    Those errors indicate that it's probably crashed - mostly likely from reading/writing beyond the memory bounds. Try using some printf stuff and/or using a gpu driver to debug your memory accesses.

    Given the code is so simple, you might want to try writing it in a cpu language and simulating the parallelism with loops, to get the algorithm correct.

Similar Threads

  1. how to do task parallel and data parallel on the same device
    By kang13 in forum OpenCL - parallel programming of heterogeneous systems
    Replies: 2
    Last Post: 04-29-2010, 04:14 PM
  2. egl search sample with context_share
    By mick22 in forum OpenGL ES general technical discussions
    Replies: 0
    Last Post: 02-27-2009, 02:45 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •