116 lines
2.5 KiB
C++
116 lines
2.5 KiB
C++
///original author: Erwin Coumans
|
|
#include "btOpenCLUtils.h"
|
|
#include "../parallel_primitives/host/btOpenCLArray.h"
|
|
#include "../parallel_primitives/host/btLauncherCL.h"
|
|
#include <stdio.h>
|
|
|
|
|
|
#define MSTRINGIFY(A) #A
|
|
const char* kernelString= MSTRINGIFY(
|
|
__kernel void ReduceGlobal(__global int* d_in, __global int* d_out, int numElements)
|
|
{
|
|
int myId = get_global_id(0);
|
|
int tid = get_local_id(0);
|
|
|
|
|
|
int ls = get_local_size(0);
|
|
for (unsigned int s=ls/2;s>0;s>>=1)
|
|
{
|
|
if (myId<numElements)
|
|
{
|
|
if (tid<s)
|
|
{
|
|
d_in[myId] += d_in[myId+s];
|
|
}
|
|
}
|
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
|
}
|
|
if (tid==0)
|
|
{
|
|
if (myId<numElements)
|
|
{
|
|
d_out[get_group_id(0)]=d_in[myId];
|
|
}
|
|
}
|
|
}
|
|
);
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
int ciErrNum = 0;
|
|
int preferred_device = -1;
|
|
int preferred_platform = -1;
|
|
cl_platform_id platformId;
|
|
cl_context ctx;
|
|
cl_command_queue queue;
|
|
cl_device_id device;
|
|
cl_kernel addKernel;
|
|
ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
|
|
btOpenCLUtils::printPlatformInfo(platformId);
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
if (!ctx) {
|
|
printf("No OpenCL capable GPU found!");
|
|
return 0;
|
|
}
|
|
|
|
device = btOpenCLUtils::getDevice(ctx,0);
|
|
queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
|
|
addKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
|
|
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
|
int numElements = 1024*1024;
|
|
btOpenCLArray<int> a(ctx,queue);
|
|
btOpenCLArray<int> b(ctx,queue);
|
|
btAlignedObjectArray<int> hostA;
|
|
btAlignedObjectArray<int> hostB;
|
|
|
|
for (int i=0;i<numElements;i++)
|
|
{
|
|
hostA.push_back(1);
|
|
hostB.push_back(0.f);
|
|
}
|
|
a.copyFromHost(hostA);
|
|
b.copyFromHost(hostB);
|
|
|
|
int hostSum= 0;
|
|
for (int i=0;i<numElements;i++)
|
|
{
|
|
hostSum += hostA.at(i);
|
|
}
|
|
b.resize(numElements);
|
|
|
|
{
|
|
btLauncherCL launcher( queue, addKernel);
|
|
launcher.setBuffer( a.getBufferCL());
|
|
launcher.setBuffer( b.getBufferCL());
|
|
launcher.setConst( numElements );
|
|
launcher.launch1D( numElements,1024);
|
|
}
|
|
clFinish(queue);
|
|
{
|
|
btLauncherCL launcher( queue, addKernel);
|
|
launcher.setBuffer( b.getBufferCL());
|
|
launcher.setBuffer( a.getBufferCL());
|
|
launcher.setConst( 1024 );
|
|
launcher.launch1D( 1024,1024);
|
|
}
|
|
clFinish(queue);
|
|
|
|
printf("hostSum = %d\n", hostSum);
|
|
|
|
int clSum = a.at(0);
|
|
printf("clSum = %d\n", clSum );
|
|
if (hostSum != clSum)
|
|
{
|
|
printf("Incorrect result\n");
|
|
} else
|
|
{
|
|
printf("Correct result\n");
|
|
}
|
|
|
|
|
|
clReleaseCommandQueue(queue);
|
|
clReleaseContext(ctx);
|
|
printf("press key\n");
|
|
getchar();
|
|
return 0;
|
|
} |