PDA

View Full Version : Parallel breadth-first search



getnut
09-29-2012, 07:57 AM
hi, i want to do a program that visit a graph with a parallel BFS in OpenCL but it is my first time i used this library. I write a code for the host and a code for the kernel but, when i run the program, the error code for the function clEnqueueReadBuffer return sometimes CL_OUT_OF_RESOURCES and sometimes CL_INVALID_COMMAND_QUEUE. this is the code for the host


#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#include "head.h"


int main(void) {
printf("\n\n\n");
//calculating the size of the matrix
const int MATR=grand_matr("matrice2.txt");

//create an array of lists that represent my array
int *visitati=(int*)malloc(sizeof(int)*MATR);
int *grafo=(int*)malloc(sizeof(int)*MATR*MATR);
int *vicini=(int*)malloc(sizeof(int)*MATR*MATR);
int *nodi=(int*)malloc(sizeof(int)*MATR);
int *stack;
FILE *matrice;
int i=0,j=0,giro=0;
int *sem_nodi=(int*)malloc(sizeof(int));
sem_nodi[0]=0;
int *sem_vis=(int*)malloc(sizeof(int));
sem_vis[0]=0;
int *lato=(int*)malloc(sizeof(int));
lato[0]=MATR;
int x,start,due; //x rappresenta il carrattere che leggo dal file
for(i=0;i<MATR;i++){
nodi[i]=0;
visitati[i]=MATR+1;
}
for(i=0;i<MATR*MATR;i++)
grafo[i]=0;
i=0;

matrice = fopen("matrice2.txt", "r");
if (!matrice) {
fprintf(stderr, "Failed to load matrice2.txt\n");
exit(1);
}


else{
while (!feof (matrice))
{
fscanf(matrice,"%d", &x);
grafo[i]=x;
i++;
/*if (i!=j){
grafo[j][i]=x;
}
i=(i+1)%MATR;
if ((i%MATR)==0)
j++;*/
}
}
fclose(matrice);
// Load the kernel source code into the array source_str
FILE *fp;
char *source_str;
size_t source_size;

fp = fopen("kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );

// Get platform and device information
cl_platform_id platform_id = NULL;

cl_device_id device_id = NULL;

cl_uint ret_num_devices;

cl_uint ret_num_platforms;

cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
printf("clGetPlatformIDs %d\n",ret);
if (ret!=0) exit(0);

ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,
&device_id, &ret_num_devices);
printf("clGetDeviceIDs %d\n",ret);
if (ret!=0) exit(0);

// Create an OpenCL context
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
printf("clCreateContext %d\n",ret);
if (ret!=0) exit(0);

// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
printf("clCreateCommandQueue %d\n",ret);
if (ret!=0) exit(0);
// Insert into stack the start node
start=rand()%MATR;
nodi[start]=2;

// Create memory buffers on the device for each vector

cl_mem visitati_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
MATR * sizeof(int), NULL, &ret);
printf("clCreateBuffer visitati %d\n",ret);
if (ret!=0) exit(0);

cl_mem nodi_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
MATR * sizeof(int), NULL, &ret);
printf("clCreateBuffer nodi%d\n",ret);
if (ret!=0) exit(0);

cl_mem stack_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
MATR * sizeof(int), NULL, &ret);
printf("clCreateBuffer stack%d\n",ret);
if (ret!=0) exit(0);

cl_mem grafo_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
MATR * MATR * sizeof(int), NULL, &ret);
printf("clCreateBuffer grafo%d\n",ret);
if (ret!=0) exit(0);

cl_mem vicini_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
MATR * MATR * sizeof(int), NULL, &ret);
printf("clCreateBuffer vicini%d\n",ret);
if (ret!=0) exit(0);

// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
printf("clCreateProgramWithSource %d\n",ret);
if (ret!=0) exit(0);
// Build the program
if((ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL))!=CL_SUCCESS){
printf("clBuildProgram %d\n",ret);
size_t log_size;
clGetProgramBuildInfo(program,device_id,CL_PROGRAM _BUILD_LOG,0,NULL,&log_size);
char build_log [log_size+1];
clGetProgramBuildInfo(program,device_id,CL_PROGRAM _BUILD_LOG,log_size,build_log,NULL);
build_log[log_size]='\0';
printf("ale %s \n",build_log);
exit(0);
}
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "bfs", &ret);
printf("clCreateKernel %d\n",ret);
if (ret!=0) exit(0);

while((due=howtwo(nodi,MATR))!=0){
giro++;
printf("\n\nGiro num %d\n",giro);
// Create the stack
stack=(int*)malloc(sizeof(int)*MATR);
j=0;
for(i=0;i<MATR;i++){
if(nodi[i]==2){
stack[j]=i;
j++;
}
}
// Copy the arrays to their respective memory buffers
ret = clEnqueueWriteBuffer(command_queue, visitati_mem_obj, CL_TRUE, 0,
MATR * sizeof(int), visitati, 0, NULL, NULL);
printf("clEnqueueWriteBuffer visitati%d\n",ret);
if (ret!=0) exit(0);

ret = clEnqueueWriteBuffer(command_queue, nodi_mem_obj, CL_TRUE, 0,
MATR * sizeof(int), nodi, 0, NULL, NULL);
printf("clEnqueueWriteBuffer visitati%d\n",ret);
if (ret!=0) exit(0);

ret = clEnqueueWriteBuffer(command_queue, stack_mem_obj, CL_TRUE, 0,
MATR * sizeof(int), stack, 0, NULL, NULL);
printf("clEnqueueWriteBuffer stack %d\n",ret);
if (ret!=0) exit(0);

ret = clEnqueueWriteBuffer(command_queue, grafo_mem_obj, CL_TRUE, 0,
MATR * MATR * sizeof(int), grafo, 0, NULL, NULL);
printf("clEnqueueWriteBuffer grafo %d\n",ret);
if (ret!=0) exit(0);

ret = clEnqueueWriteBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0,
MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
printf("clEnqueueWriteBuffer vicini %d\n",ret);
if (ret!=0) exit(0);

// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&visitati_mem_obj);
printf("clSetKernelArg visitati%d\n",ret);
if (ret!=0) exit(0);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&nodi_mem_obj);
printf("clSetKernelArg nodi%d\n",ret);
if (ret!=0) exit(0);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&stack_mem_obj);
printf("clSetKernelArg stack%d\n",ret);
if (ret!=0) exit(0);
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&grafo_mem_obj);
printf("clSetKernelArg grafo%d\n",ret);
if (ret!=0) exit(0);
ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&vicini_mem_obj);
printf("clSetKernelArg vicini_vis%d\n",ret);
if (ret!=0) exit(0);
ret = clSetKernelArg(kernel, 5, sizeof(int),&lato);
printf("clSetKernelArg lato%d\n",ret);
if (ret!=0) exit(0);

// Execute the OpenCL kernel on the list
size_t global_item_size = due;
size_t local_item_size = due;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
printf("clEnqueueNDRangeKernel %d\n",ret);
if (ret!=0) exit(0);

// Read the memory buffer vicini_mem_obj on the device to the local variable vicini


ret = clEnqueueReadBuffer(command_queue, vicini_mem_obj, CL_TRUE, 0,
MATR * MATR * sizeof(int), vicini, 0, NULL, NULL);
printf("clEnqueueReadBuffer vicini%d\n",ret);
if (ret!=0) exit(0); //HERE IS WHERE THE ERROR IS
for(i=0;i<MATR*due;i++){
if (vicini[i]==2){
nodi[i%MATR]=2;
}
}
}
// Display the result to the screen
printf("\nOrdine di visita ");
for(i=0;i<MATR;i++){
printf(" %d ",visitati[i]);
}
printf("\nVerifico che ogni nodo sia stato visitato");
for(i=0;i<MATR;i++){
printf("i %d ",nodi[i]);
}
printf("\n");
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(visitati_mem_obj);
ret = clReleaseMemObject(nodi_mem_obj);
ret = clReleaseMemObject(stack_mem_obj);
ret = clReleaseMemObject(grafo_mem_obj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(visitati);
free(nodi);
free(stack);
free(grafo);


}

and this is the code for the kernel


__kernel void bfs( __global int *visitati,
__global int *nodi,
__global int *stack,
__global int *grafo,
__global int *vicini,
int lato) {

// prendo l'indice dell'elemento da processare
int id = get_global_id(0);

// prendo l'id del nodo che devo visitare
int exp=stack[id];

int i,j;
int sez=exp*lato;

//Ciclo la sezione di matrice che riguarda il work-item
for(i=sez;i<sez+lato;i++){

//Controllo che ci sia una vicinanza tra i due nodi
if(grafo[i]==1){
// se il nodo vicino non stato ancora aggiunto allo stack o visitato lo aggiungo a nodi
if (nodi[grafo[i]]==0){
//scrivo su nodi
vicini[(id*lato)+i]=2;
}
}
}

}

thanks for the help! Are 2 days that i'm completely blocked!!!! And sorry for my english!!!

notzed
10-08-2012, 07:48 PM
Those errors indicate that it's probably crashed - mostly likely from reading/writing beyond the memory bounds. Try using some printf stuff and/or using a gpu driver to debug your memory accesses.

Given the code is so simple, you might want to try writing it in a cpu language and simulating the parallelism with loops, to get the algorithm correct.