Results 1 to 2 of 2

Thread: Hello World not working in Linux

  1. #1
    Newbie
    Join Date
    Oct 2013
    Posts
    1

    Hello World not working in Linux

    After writing a large program that works in Windows and not Linux, I decided to debug with a Hello World program. I realie there are better ways to write this program, I am just trying to debug my hardware / install.

    This runs correctly in Windows using the CUDA 5.5 and the Intel OpenCL libs. I have a Centos 6.4 server I would like to run on as well. I installed CUDA 5.5 and the Intel XE SDK. Neither will run Hello World. If I choose the NVIDIA GPU I get a -45 (CL_INVALID_PROGRAM_EXECUTABLE) error on clCreateKernel. If I select to run on my Intel Xeon Phi I actually get the message "1 warning generated." however I cannot figure out how to view the warning!!

    Can anyone provide insight or what to debug from here?

    Code :
    #include <utility>
    #include <CL/cl.h> 
    #include <cstdio>
    #include <cstdlib>
    #include <fstream>
    #include <iostream>
    #include <string>
    #include <iterator>
    #include <cstring>
     
     
    const char* kernel_text = 
    	"#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n" \
    	"__constant char hw[] = \"Hello World\\n\";\n"
    	"__kernel void hello(__global char * out){ \n"
    	"size_t tid = get_global_id(0);	\n"
    	"out = hw[tid];}\n";
     
    int	main(void)
    {
    	cl_int err;
    	cl_uint platformCount, devCount;
    	cl_int status;
    	char nameBuf[1000];
     
    	// Get list of OpenCL compatible platforms
    	err = clGetPlatformIDs(0, NULL, &platformCount);  
     
    	// Allocate memory, get list of platform handles
    	cl_platform_id *platforms =
    		(cl_platform_id *) malloc(platformCount*sizeof(cl_platform_id));
     
    	err = clGetPlatformIDs(platformCount, platforms, NULL);
     
    	// List platform(s) and vendor(s)
    	for(unsigned int i = 0; i < platformCount; i++)
    	{    
    		err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,sizeof(nameBuf), nameBuf, NULL);
     
    		std::cerr << "Platform " << i << " is by: " << nameBuf << std::endl;
    	}
     
    	std::string selection = "";
    	std::cout << "Please select a platform (0 to " << platformCount-1 << ")" << std::endl;
    	getline(std::cin, selection);
     
    	// Let user make selection
    	cl_platform_id selectedPlatform = platforms[stoi(selection)];
     
    	// allocate memory, get list of device handles in platform
    	err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, 0, NULL, &devCount);
    	cl_device_id *devices =
    		(cl_device_id *) malloc(devCount*sizeof(cl_device_id));
    	err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, devCount, devices, NULL);
    	cl_device_id device = devices[0];
     
    	// Create platform context and don't ask why the platform ID 
    	// and properties are the same value with different typecasts...
    	cl_context_properties cprops[3] = 
    	{CL_CONTEXT_PLATFORM, (cl_context_properties)selectedPlatform, 0};
    	cl_context context = clCreateContext(cprops, 1, &device, NULL, NULL, &status);
     
    	// create a command queue
    	cl_command_queue_properties queueProps = 0;
    	cl_command_queue queue = clCreateCommandQueue(context, device, queueProps, &status);
     
    	// Setup result buffer for OpenCL
    	char * outH = new char[hw.length()+1];
    	cl_mem outCL = clCreateBuffer(context,
    		CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
    		hw.length()+1,
    		outH,
    		&err);
    	clFinish(queue);
     
     
    	//const char *srcStr = kernel_text;
    	const char **str = &kernel_text;//&srcStr;
    	// create an OpenCL program (may have multiple kernels)
    	size_t kernelSize[] = {strlen(kernel_text)};
    	cl_program program = clCreateProgramWithSource(context, 1, str, kernelSize, &status);
    	std::cout << "sts " << status << std::endl;
     
    	// build it
    	status = clBuildProgram(program, devCount, &devices[0], NULL, NULL, NULL);
    	if (status != 0) {
    		// Determine the size of the log
    		size_t log_size;
    		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
     
    		// Allocate memory for the log
    		char *log = (char *) malloc(log_size);
     
    		// Get the log
    		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
     
    		// Print the log
    		printf("%s\n", log);
    		free(log);
    	}
     
    	// fish the kernel out of the program
    	const char* kernelName = "hello";
    	cl_kernel kernel = clCreateKernel(program, kernelName, &status);
    	std::cout << "sts " << status << std::endl;
    	clFinish(queue);
     
    	// Device computations use a command queue. This is a 1-to-1 mapping
    	// with the device, associated with a context.
    	size_t wgSize[]= {1};
    	size_t block_size[] = {1};
    	clSetKernelArg(kernel, 0, hw.length(), outCL);
    	clEnqueueNDRangeKernel(queue,kernel,1,NULL,block_size,wgSize,0,NULL,NULL);
    	clFinish(queue);
     
    	clEnqueueReadBuffer(queue,outCL,CL_TRUE,0,hw.length()+1,outH,0,NULL,NULL);
    	std::cout << outH;
     
    	std::cout << "Press any key to exit..." << std::endl;
    	getline(std::cin, selection);
    	//free(kernel_text);
     
    	return 0;
    }

  2. #2
    Senior Member
    Join Date
    Oct 2012
    Posts
    107
    There are several problems:

    - The last line of your kernel should be: out[tid]=hw[tid]
    - arg_size in clSetKernelArg is the size of the type of the memory object: clSetKernelArg(kernel, 0, sizeof(cl_mem), outCL)
    - The global work-size block_size should contain hw.length()+1, not 1

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •