Results 1 to 2 of 2

Thread: Parallel breadth-first search

  1. #1
    Junior Member
    Join Date
    Sep 2012
    Posts
    1

    Parallel breadth-first search

    hi, i want to do a program that visit a graph with a parallel BFS in OpenCL but it is my first time i used this library. I write a code for the host and a code for the kernel but, when i run the program, the error code for the function clEnqueueReadBuffer return sometimes CL_OUT_OF_RESOURCES and sometimes CL_INVALID_COMMAND_QUEUE. this is the code for the host

    Code :
    #include <stdio.h>
    #include <stdlib.h>
    #include <CL/cl.h>
    #include "head.h"
     
     
    int main(void) {
    	printf("\n\n\n");
    	//calculating the size of the matrix
    	const int MATR=grand_matr("matrice2.txt"); 
     
    	//create an array of lists that represent my array
    	int *visitati=(int*)malloc(sizeof(int)*MATR);
    	int *grafo=(int*)malloc(sizeof(int)*MATR*MATR);
    	int *vicini=(int*)malloc(sizeof(int)*MATR*MATR);
    	int *nodi=(int*)malloc(sizeof(int)*MATR);
    	int *stack;
    	FILE *matrice;
    	int i=0,j=0,giro=0;
    	int *sem_nodi=(int*)malloc(sizeof(int));
    	sem_nodi[0]=0;
    	int *sem_vis=(int*)malloc(sizeof(int));
    	sem_vis[0]=0;
    	int *lato=(int*)malloc(sizeof(int));
    	lato[0]=MATR;
    	int x,start,due;         //x rappresenta il carrattere che leggo dal file
    	for(i=0;i<MATR;i++){
    		nodi[i]=0;
    		visitati[i]=MATR+1;
    	}
    	for(i=0;i<MATR*MATR;i++)
    		grafo[i]=0;
    	i=0;
     
    		matrice = fopen("matrice2.txt", "r");
    	if (!matrice) {
    		fprintf(stderr, "Failed to load matrice2.txt\n");
    		exit(1);
    	}
     
     
    	else{
    		while (!feof (matrice))
    		{
    			fscanf(matrice,"%d", &x);
    			grafo[i]=x;
    			i++;
    			/*if (i!=j){
    				grafo[j][i]=x;
    			}
    			i=(i+1)%MATR;
    			if ((i%MATR)==0)
    				j++;*/
    		}
    	}
    	fclose(matrice);
    	// Load the kernel source code into the array source_str
    	FILE *fp;
    	char *source_str;
    	size_t source_size;
     
    	fp = fopen("kernel.cl", "r");
    	if (!fp) {
    		fprintf(stderr, "Failed to load kernel.\n");
    		exit(1);
    	}
    	source_str = (char*)malloc(MAX_SOURCE_SIZE);
    	source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    	fclose( fp );
     
    	// Get platform and device information
    	cl_platform_id platform_id = NULL;
     
    	cl_device_id device_id = NULL;   
     
    	cl_uint ret_num_devices;
     
    	cl_uint ret_num_platforms;
     
    	cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    	printf("clGetPlatformIDs %d\n",ret);
    	if (ret!=0) exit(0);
     
    	ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, 
    		&device_id, &ret_num_devices);
    	printf("clGetDeviceIDs %d\n",ret); 
    	if (ret!=0) exit(0);
     
    	// Create an OpenCL context
    	cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    	printf("clCreateContext %d\n",ret);
    	if (ret!=0) exit(0);
     
            // Create a command queue
            cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    	printf("clCreateCommandQueue %d\n",ret);
    	if (ret!=0) exit(0);
    	// Insert into stack the start node
    	start=rand()%MATR;
    	nodi[start]=2;
     
    	// Create memory buffers on the device for each vector 
     
    	cl_mem visitati_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer visitati %d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem nodi_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer nodi%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem stack_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
    		MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer stack%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem grafo_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
    		MATR * MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer grafo%d\n",ret);
    	if (ret!=0) exit(0);
     
    	cl_mem vicini_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
    		MATR * MATR * sizeof(int), NULL, &ret);
    	printf("clCreateBuffer vicini%d\n",ret);
    	if (ret!=0) exit(0);
     
    	// Create a program from the kernel source
    	cl_program program = clCreateProgramWithSource(context, 1, 
    	(const char **)&source_str, (const size_t *)&source_size, &ret);
    	printf("clCreateProgramWithSource %d\n",ret);
    	if (ret!=0) exit(0);	
    	// Build the program
    	if((ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL))!=CL_SUCCESS){
    		printf("clBuildProgram %d\n",ret);
    		size_t log_size;
    		clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size);
    		char build_log [log_size+1];
    		clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,log_size,build_log,NULL);
    		build_log[log_size]='\0';
    		printf("ale %s \n",build_log);
    		exit(0);
    	}
    	// Create the OpenCL kernel
    	cl_kernel kernel = clCreateKernel(program, "bfs", &ret);
    	printf("clCreateKernel %d\n",ret);
    	if (ret!=0) exit(0);
     
    	while((due=howtwo(nodi,MATR))!=0){
    		giro++;
    		printf("\n\nGiro num %d\n",giro);
    		// Create the stack
    		stack=(int*)malloc(sizeof(int)*MATR);
    		j=0;
    		for(i=0;i<MATR;i++){
    			if(nodi[i]==2){
    				stack[j]=i;
    				j++;
    			}
    		}
    		// Copy the arrays to their respective memory buffers
    		ret = clEnqueueWriteBuffer(command_queue, visitati_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), visitati, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer visitati%d\n",ret);
    		if (ret!=0) exit(0);
     
    		ret = clEnqueueWriteBuffer(command_queue, nodi_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), nodi, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer visitati%d\n",ret);
    		if (ret!=0) exit(0);
     
    		ret = clEnqueueWriteBuffer(command_queue, stack_mem_obj, CL_TRUE, 0, 
    			MATR * sizeof(int), stack, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer stack %d\n",ret);
    		if (ret!=0) exit(0);	
     
    		ret = clEnqueueWriteBuffer(command_queue, grafo_mem_obj, CL_TRUE, 0, 
    			MATR * MATR * sizeof(int), grafo, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer grafo %d\n",ret);
    		if (ret!=0) exit(0);		
     
    		ret = clEnqueueWriteBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0, 
    			MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
    		printf("clEnqueueWriteBuffer vicini %d\n",ret);
    		if (ret!=0) exit(0);		
     
    		// Set the arguments of the kernel
    		ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&visitati_mem_obj);
    		printf("clSetKernelArg visitati%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&nodi_mem_obj);
    		printf("clSetKernelArg nodi%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&stack_mem_obj);
    		printf("clSetKernelArg stack%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&grafo_mem_obj);
    		printf("clSetKernelArg grafo%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&vicini_mem_obj);
    		printf("clSetKernelArg vicini_vis%d\n",ret);
    		if (ret!=0) exit(0);
    		ret = clSetKernelArg(kernel, 5, sizeof(int),&lato);
    		printf("clSetKernelArg lato%d\n",ret);
    		if (ret!=0) exit(0);
     
    		// Execute the OpenCL kernel on the list
    		size_t global_item_size = due;
    		size_t local_item_size = due;
    		ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
    			&global_item_size, &local_item_size, 0, NULL, NULL);
    		printf("clEnqueueNDRangeKernel %d\n",ret);
    		if (ret!=0) exit(0);
     
    		// Read the memory buffer vicini_mem_obj on the device to the local variable vicini
     
     
    		ret = clEnqueueReadBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0, 
    		MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
    		printf("clEnqueueReadBuffer vicini%d\n",ret);
    		if (ret!=0) exit(0);  //HERE IS WHERE THE ERROR IS
    		for(i=0;i<MATR*due;i++){
    			if (vicini[i]==2){
    				nodi[i%MATR]=2;
    			}
    		}
    	}
    	// Display the result to the screen
    	printf("\nOrdine di visita ");	
    	for(i=0;i<MATR;i++){
    		printf(" %d ",visitati[i]);
    	}
    	printf("\nVerifico che ogni nodo sia stato visitato");
    	for(i=0;i<MATR;i++){
    		printf("i %d ",nodi[i]);
    	}
    	printf("\n");	
    	ret = clFlush(command_queue);
    	ret = clFinish(command_queue);
    	ret = clReleaseKernel(kernel);
    	ret = clReleaseProgram(program);
    	ret = clReleaseMemObject(visitati_mem_obj);
    	ret = clReleaseMemObject(nodi_mem_obj);
    	ret = clReleaseMemObject(stack_mem_obj);
    	ret = clReleaseMemObject(grafo_mem_obj);
    	ret = clReleaseCommandQueue(command_queue);
    	ret = clReleaseContext(context);
    	free(visitati);
    	free(nodi);
    	free(stack);
    	free(grafo);
     
     
    }

    and this is the code for the kernel

    Code :
    __kernel void bfs( 	__global int *visitati,
    				__global int *nodi,
    				__global int *stack,
    				__global int *grafo,
    				__global int *vicini,
    				int lato) {
     
    	// prendo l'indice dell'elemento da processare
    	int id = get_global_id(0); 
     
    	// prendo l'id del nodo che devo visitare
    	int exp=stack[id];
     
    	int i,j;
    	int sez=exp*lato;
     
    	//Ciclo la sezione di matrice che riguarda il work-item
    	for(i=sez;i<sez+lato;i++){
     
    		//Controllo che ci sia una vicinanza tra i due nodi 
    		if(grafo[i]==1){
    			// se il nodo vicino non  stato ancora aggiunto allo stack o visitato lo aggiungo a nodi
    			if (nodi[grafo[i]]==0){
    				//scrivo su nodi
    				vicini[(id*lato)+i]=2;
    			}
    		}
    	}
     
    }

    thanks for the help! Are 2 days that i'm completely blocked!!!! And sorry for my english!!!

  2. #2
    Senior Member
    Join Date
    Aug 2011
    Posts
    271

    Re: Parallel breadth-first search

    Those errors indicate that it's probably crashed - mostly likely from reading/writing beyond the memory bounds. Try using some printf stuff and/or using a gpu driver to debug your memory accesses.

    Given the code is so simple, you might want to try writing it in a cpu language and simulating the parallelism with loops, to get the algorithm correct.

Similar Threads

  1. Replies: 2
    Last Post: 04-29-2010, 04:14 PM
  2. egl search sample with context_share
    By mick22 in forum OpenGL ES general technical discussions
    Replies: 0
    Last Post: 02-27-2009, 02:45 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •