// Hello.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include "CL/cl.h"
#define DATA_SIZE 10
const char *KernelSource =
"kernel void hello(global float *input , global float *output)\n"\
"{\n"\
" size_t id =get_global_id(0);\n"\
"output[id] =input[id]*input[id];\n"\
"} \n"\
"\n";
//float start_time,end_time;

int main(void)
{
float start_time,end_time;
start_time=clock();
cl_context context;
cl_context_properties properties[3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms=0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices=0;
cl_mem input,output;
size_t global;

float inputData[DATA_SIZE]={0,1,2,3,4,5,6,7,8,9};
float results[DATA_SIZE]={0};

int i;

//retrieve a list of platform variable
if(clGetPlatformIDs(1,&platform_id,&num_of_platfor ms)!=CL_SUCCESS)
{
printf("Unable to get platform_id\n");
return 1;
}

//try to get supported GPU DEvice
if(clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_GPU,1 ,&device_id,
&num_of_devices)!=CL_SUCCESS)
{
printf("unable to get device_id\n");
return 1;
}

//context properties list -must be terminated with 0
properties[0]=CL_CONTEXT_PLATFORM;
properties[1]=(cl_context_properties) platform_id;
properties[2]=0;

//create a context with the GPU device
context=clCreateContext(properties,1,&device_id,NU LL,NULL,&err);

//create command queue using the context and device
command_queue=clCreateCommandQueue(context,device_ id,0,&err);

//create a program from the kernel source code
program=clCreateProgramWithSource(context,1,(const char**)
&KernelSource,NULL,&err);

//compile the program
err=clBuildProgram(program,0,NULL,NULL,NULL,NULL);
if((err!=CL_SUCCESS))
{
printf("build error \n",err);
size_t len;
char buffer[4096];
//get the build log
clGetProgramBuildInfo(program,device_id,CL_PROGRAM _BUILD_LOG,sizeof(buffer),buffer,&len);
printf("----build Log---\n%s\n",buffer);
exit(1);

// return 1;
}

//specify which kernel from the program to execute
kernel=clCreateKernel(program,"hello",&err);

//create buffers for the input and output
input=clCreateBuffer(context,CL_MEM_READ_ONLY,size of(float)*DATA_SIZE,NULL,NULL);

output=clCreateBuffer(context,CL_MEM_WRITE_ONLY,si zeof(float)*DATA_SIZE,NULL,NULL);

//load data into the input buffer

clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0 ,
sizeof(float)*DATA_SIZE,inputData,0,NULL,NULL);

//set the argument list for the kernel command
clSetKernelArg(kernel,0,sizeof(cl_mem),&input);
clSetKernelArg(kernel,1,sizeof(cl_mem),&output);
global=DATA_SIZE;

//enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue,kernel,1,NULL ,&global,NULL,0,NULL,NULL);
clFinish(command_queue);

//copy the results from out of the buffer
clEnqueueReadBuffer(command_queue,output,CL_TRUE,0 ,sizeof(float)*DATA_SIZE,results,0,
NULL,NULL);

//print the results
printf("output:");
for(i=0;i<DATA_SIZE;i++)
{
printf("%f",results[i]);
}

//cleanup-release OpenCL resources

clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
end_time=clock();
printf("execution time is%f",end_time-start_time);
return 0;

}