|
|
|
|
@@ -1,9 +1,9 @@
|
|
|
|
|
|
|
|
|
|
#include "btGpuSapBroadphase.h"
|
|
|
|
|
#include "b3GpuSapBroadphase.h"
|
|
|
|
|
#include "BulletCommon/btVector3.h"
|
|
|
|
|
#include "parallel_primitives/host/btLauncherCL.h"
|
|
|
|
|
#include "BulletCommon/btQuickprof.h"
|
|
|
|
|
#include "basic_initialize/btOpenCLUtils.h"
|
|
|
|
|
#include "basic_initialize/b3OpenCLUtils.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "../kernels/sapKernels.h"
|
|
|
|
|
@@ -11,7 +11,7 @@
|
|
|
|
|
#include "BulletCommon/btMinMax.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
btGpuSapBroadphase::btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
|
|
|
|
|
b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
|
|
|
|
|
:m_context(ctx),
|
|
|
|
|
m_device(device),
|
|
|
|
|
m_queue(q),
|
|
|
|
|
@@ -28,44 +28,44 @@ m_currentBuffer(-1)
|
|
|
|
|
|
|
|
|
|
cl_int errNum=0;
|
|
|
|
|
|
|
|
|
|
cl_program sapProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
|
|
|
|
|
cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
cl_program sapFastProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
|
|
|
|
|
cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
|
|
|
|
//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
|
|
|
|
|
//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
|
|
|
|
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
|
|
|
|
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
|
|
|
|
|
//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_sap2Kernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
|
|
|
|
|
m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
|
|
|
|
|
m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
|
|
|
|
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
#else
|
|
|
|
|
#ifndef __APPLE__
|
|
|
|
|
m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
|
|
|
|
|
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
#else
|
|
|
|
|
m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
|
|
|
|
m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
|
|
|
|
|
btAssert(errNum==CL_SUCCESS);
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
m_flipFloatKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
|
|
|
|
|
m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
|
|
|
|
|
|
|
|
|
|
m_copyAabbsKernel= btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
|
|
|
|
|
m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
|
|
|
|
|
|
|
|
|
|
m_scatterKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
|
|
|
|
|
m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
|
|
|
|
|
|
|
|
|
|
m_sorter = new btRadixSort32CL(m_context,m_device,m_queue);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
btGpuSapBroadphase::~btGpuSapBroadphase()
|
|
|
|
|
b3GpuSapBroadphase::~b3GpuSapBroadphase()
|
|
|
|
|
{
|
|
|
|
|
delete m_sorter;
|
|
|
|
|
clReleaseKernel(m_scatterKernel);
|
|
|
|
|
@@ -97,7 +97,7 @@ static unsigned int FloatFlip(float fl)
|
|
|
|
|
return f ^ mask;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::init3dSap()
|
|
|
|
|
void b3GpuSapBroadphase::init3dSap()
|
|
|
|
|
{
|
|
|
|
|
if (m_currentBuffer<0)
|
|
|
|
|
{
|
|
|
|
|
@@ -123,7 +123,7 @@ void btGpuSapBroadphase::init3dSap()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
void btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
|
|
|
|
void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
|
|
|
|
{
|
|
|
|
|
btAssert(m_currentBuffer>=0);
|
|
|
|
|
if (m_currentBuffer<0)
|
|
|
|
|
@@ -155,7 +155,7 @@ void btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::calculateOverlappingPairsHost()
|
|
|
|
|
void b3GpuSapBroadphase::calculateOverlappingPairsHost()
|
|
|
|
|
{
|
|
|
|
|
//test
|
|
|
|
|
//if (m_currentBuffer>=0)
|
|
|
|
|
@@ -249,7 +249,7 @@ void btGpuSapBroadphase::calculateOverlappingPairsHost()
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::calculateOverlappingPairs()
|
|
|
|
|
void b3GpuSapBroadphase::calculateOverlappingPairs()
|
|
|
|
|
{
|
|
|
|
|
int axis = 0;//todo on GPU for now hardcode
|
|
|
|
|
|
|
|
|
|
@@ -512,7 +512,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs()
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::writeAabbsToGpu()
|
|
|
|
|
void b3GpuSapBroadphase::writeAabbsToGpu()
|
|
|
|
|
{
|
|
|
|
|
m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
|
|
|
|
|
m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
|
|
|
|
|
@@ -520,10 +520,10 @@ void btGpuSapBroadphase::writeAabbsToGpu()
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
|
|
|
|
|
void b3GpuSapBroadphase::createLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
|
|
|
|
|
{
|
|
|
|
|
int index = userPtr;
|
|
|
|
|
btSapAabb aabb;
|
|
|
|
|
b3SapAabb aabb;
|
|
|
|
|
for (int i=0;i<4;i++)
|
|
|
|
|
{
|
|
|
|
|
aabb.m_min[i] = aabbMin[i];
|
|
|
|
|
@@ -535,10 +535,10 @@ void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin, const btVec
|
|
|
|
|
m_allAabbsCPU.push_back(aabb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void btGpuSapBroadphase::createProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
|
|
|
|
|
void b3GpuSapBroadphase::createProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
|
|
|
|
|
{
|
|
|
|
|
int index = userPtr;
|
|
|
|
|
btSapAabb aabb;
|
|
|
|
|
b3SapAabb aabb;
|
|
|
|
|
for (int i=0;i<4;i++)
|
|
|
|
|
{
|
|
|
|
|
aabb.m_min[i] = aabbMin[i];
|
|
|
|
|
@@ -550,16 +550,16 @@ void btGpuSapBroadphase::createProxy(const btVector3& aabbMin, const btVector3&
|
|
|
|
|
m_allAabbsCPU.push_back(aabb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cl_mem btGpuSapBroadphase::getAabbBufferWS()
|
|
|
|
|
cl_mem b3GpuSapBroadphase::getAabbBufferWS()
|
|
|
|
|
{
|
|
|
|
|
return m_allAabbsGPU.getBufferCL();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int btGpuSapBroadphase::getNumOverlap()
|
|
|
|
|
int b3GpuSapBroadphase::getNumOverlap()
|
|
|
|
|
{
|
|
|
|
|
return m_overlappingPairs.size();
|
|
|
|
|
}
|
|
|
|
|
cl_mem btGpuSapBroadphase::getOverlappingPairBuffer()
|
|
|
|
|
cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer()
|
|
|
|
|
{
|
|
|
|
|
return m_overlappingPairs.getBufferCL();
|
|
|
|
|
}
|