After writing a large program that works in Windows and not Linux, I decided to debug with a Hello World program. I realie there are better ways to write this program, I am just trying to debug my hardware / install.

This runs correctly in Windows using the CUDA 5.5 and the Intel OpenCL libs. I have a Centos 6.4 server I would like to run on as well. I installed CUDA 5.5 and the Intel XE SDK. Neither will run Hello World. If I choose the NVIDIA GPU I get a -45 (CL_INVALID_PROGRAM_EXECUTABLE) error on clCreateKernel. If I select to run on my Intel Xeon Phi I actually get the message "1 warning generated." however I cannot figure out how to view the warning!!

Can anyone provide insight or what to debug from here?

Code :
#include <utility>
#include <CL/cl.h> 
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <iterator>
#include <cstring>
 
 
const char* kernel_text = 
	"#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n" \
	"__constant char hw[] = \"Hello World\\n\";\n"
	"__kernel void hello(__global char * out){ \n"
	"size_t tid = get_global_id(0);	\n"
	"out = hw[tid];}\n";
 
int	main(void)
{
	cl_int err;
	cl_uint platformCount, devCount;
	cl_int status;
	char nameBuf[1000];
 
	// Get list of OpenCL compatible platforms
	err = clGetPlatformIDs(0, NULL, &platformCount);  
 
	// Allocate memory, get list of platform handles
	cl_platform_id *platforms =
		(cl_platform_id *) malloc(platformCount*sizeof(cl_platform_id));
 
	err = clGetPlatformIDs(platformCount, platforms, NULL);
 
	// List platform(s) and vendor(s)
	for(unsigned int i = 0; i < platformCount; i++)
	{    
		err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,sizeof(nameBuf), nameBuf, NULL);
 
		std::cerr << "Platform " << i << " is by: " << nameBuf << std::endl;
	}
 
	std::string selection = "";
	std::cout << "Please select a platform (0 to " << platformCount-1 << ")" << std::endl;
	getline(std::cin, selection);
 
	// Let user make selection
	cl_platform_id selectedPlatform = platforms[stoi(selection)];
 
	// allocate memory, get list of device handles in platform
	err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, 0, NULL, &devCount);
	cl_device_id *devices =
		(cl_device_id *) malloc(devCount*sizeof(cl_device_id));
	err = clGetDeviceIDs(platforms[stoi(selection)], CL_DEVICE_TYPE_ALL, devCount, devices, NULL);
	cl_device_id device = devices[0];
 
	// Create platform context and don't ask why the platform ID 
	// and properties are the same value with different typecasts...
	cl_context_properties cprops[3] = 
	{CL_CONTEXT_PLATFORM, (cl_context_properties)selectedPlatform, 0};
	cl_context context = clCreateContext(cprops, 1, &device, NULL, NULL, &status);
 
	// create a command queue
	cl_command_queue_properties queueProps = 0;
	cl_command_queue queue = clCreateCommandQueue(context, device, queueProps, &status);
 
	// Setup result buffer for OpenCL
	char * outH = new char[hw.length()+1];
	cl_mem outCL = clCreateBuffer(context,
		CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
		hw.length()+1,
		outH,
		&err);
	clFinish(queue);
 
 
	//const char *srcStr = kernel_text;
	const char **str = &kernel_text;//&srcStr;
	// create an OpenCL program (may have multiple kernels)
	size_t kernelSize[] = {strlen(kernel_text)};
	cl_program program = clCreateProgramWithSource(context, 1, str, kernelSize, &status);
	std::cout << "sts " << status << std::endl;
 
	// build it
	status = clBuildProgram(program, devCount, &devices[0], NULL, NULL, NULL);
	if (status != 0) {
		// Determine the size of the log
		size_t log_size;
		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
 
		// Allocate memory for the log
		char *log = (char *) malloc(log_size);
 
		// Get the log
		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
 
		// Print the log
		printf("%s\n", log);
		free(log);
	}
 
	// fish the kernel out of the program
	const char* kernelName = "hello";
	cl_kernel kernel = clCreateKernel(program, kernelName, &status);
	std::cout << "sts " << status << std::endl;
	clFinish(queue);
 
	// Device computations use a command queue. This is a 1-to-1 mapping
	// with the device, associated with a context.
	size_t wgSize[]= {1};
	size_t block_size[] = {1};
	clSetKernelArg(kernel, 0, hw.length(), outCL);
	clEnqueueNDRangeKernel(queue,kernel,1,NULL,block_size,wgSize,0,NULL,NULL);
	clFinish(queue);
 
	clEnqueueReadBuffer(queue,outCL,CL_TRUE,0,hw.length()+1,outH,0,NULL,NULL);
	std::cout << outH;
 
	std::cout << "Press any key to exit..." << std::endl;
	getline(std::cin, selection);
	//free(kernel_text);
 
	return 0;
}