This commit is contained in:
erwincoumans
2013-06-17 13:47:41 -07:00
8 changed files with 112 additions and 55 deletions

View File

@@ -397,7 +397,7 @@ void ParticleDemo::clientMoveAndDisplay()
cl_mem pairsGPU = 0;
{
m_data->m_broadphaseGPU->calculateOverlappingPairs();
m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numParticles);
pairsGPU = m_data->m_broadphaseGPU->getOverlappingPairBuffer();
numPairsGPU = m_data->m_broadphaseGPU->getNumOverlap();
}

View File

@@ -308,7 +308,7 @@ void PairBench::clientMoveAndDisplay()
}
{
B3_PROFILE("calculateOverlappingPairs");
m_data->m_broadphaseGPU->calculateOverlappingPairs();
m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numObjects);
//int numPairs = m_data->m_broadphaseGPU->getNumOverlap();
//printf("numPairs = %d\n", numPairs);
}

View File

@@ -155,7 +155,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
}
void b3GpuSapBroadphase::calculateOverlappingPairsHost()
void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
{
//test
//if (m_currentBuffer>=0)
@@ -236,6 +236,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost()
}
}
if (hostPairs.size() > maxPairs)
{
hostPairs.resize(maxPairs);
}
if (hostPairs.size())
{
@@ -262,7 +266,7 @@ void b3GpuSapBroadphase::reset()
}
void b3GpuSapBroadphase::calculateOverlappingPairs()
void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{
int axis = 0;//todo on GPU for now hardcode
@@ -398,8 +402,6 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
}
int maxPairsPerBody = 64;
int maxPairs = maxPairsPerBody * numSmallAabbs;//todo
m_overlappingPairs.resize(maxPairs);
b3OpenCLArray<int> pairCount(m_context, m_queue);
@@ -423,8 +425,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
numPairs = pairCount.at(0);
if (numPairs >maxPairs)
{
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
numPairs =maxPairs;
}
}
}
if (m_gpuSmallSortedAabbs.size())
@@ -467,7 +471,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs()
numPairs = pairCount.at(0);
if (numPairs>maxPairs)
{
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
numPairs = maxPairs;
}
}
#else

View File

@@ -49,8 +49,8 @@ class b3GpuSapBroadphase
b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
virtual ~b3GpuSapBroadphase();
void calculateOverlappingPairs();
void calculateOverlappingPairsHost();
void calculateOverlappingPairs(int maxPairs);
void calculateOverlappingPairsHost(int maxPairs);
void reset();

View File

@@ -7,8 +7,8 @@
template <typename T>
class b3OpenCLArray
{
int m_size;
int m_capacity;
size_t m_size;
size_t m_capacity;
cl_mem m_clBuffer;
cl_context m_clContext;
@@ -30,14 +30,14 @@ class b3OpenCLArray
b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);
B3_FORCE_INLINE int allocSize(int size)
B3_FORCE_INLINE size_t allocSize(size_t size)
{
return (size ? size*2 : 1);
}
public:
b3OpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true)
b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true)
:m_size(0), m_capacity(0),m_clBuffer(0),
m_clContext(ctx),m_commandQueue(queue),
m_ownsMemory(true),m_allowGrowingCapacity(true)
@@ -50,7 +50,7 @@ public:
}
///this is an error-prone method with no error checking, be careful!
void setFromOpenCLBuffer(cl_mem buffer, int sizeInElements)
void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements)
{
deallocate();
m_ownsMemory = false;
@@ -81,18 +81,20 @@ public:
m_capacity=0;
}
B3_FORCE_INLINE void push_back(const T& _Val,bool waitForCompletion=true)
B3_FORCE_INLINE bool push_back(const T& _Val,bool waitForCompletion=true)
{
int sz = size();
bool result = true;
size_t sz = size();
if( sz == capacity() )
{
reserve( allocSize(size()) );
result = reserve( allocSize(size()) );
}
copyFromHostPointer(&_Val, 1, sz, waitForCompletion);
m_size++;
return result;
}
B3_FORCE_INLINE T forcedAt(int n) const
B3_FORCE_INLINE T forcedAt(size_t n) const
{
b3Assert(n>=0);
b3Assert(n<capacity());
@@ -101,7 +103,7 @@ public:
return elem;
}
B3_FORCE_INLINE T at(int n) const
B3_FORCE_INLINE T at(size_t n) const
{
b3Assert(n>=0);
b3Assert(n<size());
@@ -110,9 +112,10 @@ public:
return elem;
}
B3_FORCE_INLINE void resize(int newsize, bool copyOldContents=true)
B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents=true)
{
int curSize = size();
bool result = true;
size_t curSize = size();
if (newsize < curSize)
{
@@ -121,28 +124,31 @@ public:
{
if (newsize > size())
{
reserve(newsize,copyOldContents);
result = reserve(newsize,copyOldContents);
}
//leave new data uninitialized (init in debug mode?)
//for (int i=curSize;i<newsize;i++) ...
//for (size_t i=curSize;i<newsize;i++) ...
}
m_size = newsize;
return result;
}
B3_FORCE_INLINE int size() const
B3_FORCE_INLINE size_t size() const
{
return m_size;
}
B3_FORCE_INLINE int capacity() const
B3_FORCE_INLINE size_t capacity() const
{
return m_capacity;
}
B3_FORCE_INLINE void reserve(int _Count, bool copyOldContents=true)
{ // determine new minimum length of allocated storage
B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents=true)
{
bool result=true;
// determine new minimum length of allocated storage
if (capacity() < _Count)
{ // not enough room, reallocate
@@ -150,14 +156,17 @@ public:
{
cl_int ciErrNum;
//create a new OpenCL buffer
int memSizeInBytes = sizeof(T)*_Count;
size_t memSizeInBytes = sizeof(T)*_Count;
cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
b3Assert(ciErrNum==CL_SUCCESS);
if (ciErrNum!=CL_SUCCESS)
{
result = false;
}
//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
for (int i=0;i<memSizeInBytes;i++)
for (size_t i=0;i<memSizeInBytes;i++)
src[i] = 0xbb;
ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 );
b3Assert(ciErrNum==CL_SUCCESS);
@@ -165,8 +174,11 @@ public:
free(src);
#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
if (copyOldContents)
copyToCL(buf, size());
if (result)
{
if (copyOldContents)
copyToCL(buf, size());
}
//deallocate the old buffer
deallocate();
@@ -179,12 +191,14 @@ public:
//fail: assert and
b3Assert(0);
deallocate();
result=false;
}
}
return result;
}
void copyToCL(cl_mem destination, int numElements, int firstElem=0, int dstOffsetInElems=0) const
void copyToCL(cl_mem destination, size_t numElements, size_t firstElem=0, size_t dstOffsetInElems=0) const
{
if (numElements<=0)
return;
@@ -203,8 +217,8 @@ public:
b3Assert(numElements>0);
b3Assert(numElements<=m_size);
int srcOffsetBytes = sizeof(T)*firstElem;
int dstOffsetInBytes = sizeof(T)*dstOffsetInElems;
size_t srcOffsetBytes = sizeof(T)*firstElem;
size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems;
status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination,
srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 );
@@ -214,7 +228,7 @@ public:
void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true)
{
int newSize = srcArray.size();
size_t newSize = srcArray.size();
bool copyOldContents = false;
resize (newSize,copyOldContents);
@@ -223,12 +237,12 @@ public:
}
void copyFromHostPointer(const T* src, int numElems, int destFirstElem= 0, bool waitForCompletion=true)
void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true)
{
b3Assert(numElems+destFirstElem <= capacity());
cl_int status = 0;
int sizeInBytes=sizeof(T)*numElems;
size_t sizeInBytes=sizeof(T)*numElems;
status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes,
src, 0,0,0 );
b3Assert(status == CL_SUCCESS );
@@ -245,7 +259,7 @@ public:
copyToHostPointer(&destArray[0], size(),0,waitForCompletion);
}
void copyToHostPointer(T* destPtr, int numElem, int srcFirstElem=0, bool waitForCompletion=true) const
void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const
{
b3Assert(numElem+srcFirstElem <= capacity());
@@ -260,7 +274,7 @@ public:
void copyFromOpenCLArray(const b3OpenCLArray& src)
{
int newSize = src.size();
size_t newSize = src.size();
resize(newSize);
if (size())
{

View File

@@ -19,22 +19,17 @@ struct b3Config
int m_maxTriConvexPairCapacity;
b3Config()
#ifdef __APPLE__
:m_maxConvexBodies(32*1024),
#else
:m_maxConvexBodies(32*1024),
#endif
m_maxConvexShapes(81920),
:m_maxConvexBodies(128*1024),
m_maxVerticesPerFace(64),
m_maxFacesPerShape(64),
m_maxConvexVertices(8192000),
m_maxConvexIndices(8192000),
m_maxConvexUniqueEdges(819200),
m_maxCompoundChildShapes(81920),
m_maxTriConvexPairCapacity(512*1024)
//m_maxTriConvexPairCapacity(256*1024)
m_maxFacesPerShape(12),
m_maxConvexVertices(8192),
m_maxConvexIndices(81920),
m_maxConvexUniqueEdges(8192),
m_maxCompoundChildShapes(8192),
m_maxTriConvexPairCapacity(256*1024)
{
m_maxBroadphasePairs = 16*m_maxConvexBodies;
m_maxConvexShapes = m_maxConvexBodies;
m_maxBroadphasePairs = 8*m_maxConvexBodies;
m_maxContactCapacity = m_maxBroadphasePairs;
}
};

View File

@@ -155,7 +155,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
} else
{
m_data->m_broadphaseSap->calculateOverlappingPairs();
m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
numPairs = m_data->m_broadphaseSap->getNumOverlap();
}
}

View File

@@ -16,6 +16,8 @@ subject to the following restrictions:
///original author: Erwin Coumans
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include <stdio.h>
cl_context g_cxMainContext;
@@ -71,6 +73,27 @@ int main(int argc, char* argv[])
b3OpenCLDeviceInfo devInfo;
b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
b3OpenCLUtils::printDeviceInfo(dev);
b3OpenCLArray<char*> memTester(g_cxMainContext,g_cqCommandQue,0,true);
int maxMem = 8192;
bool result=true;
for (size_t i=1;result;i++)
{
size_t numBytes = i*1024*1024;
result = memTester.resize(numBytes,false);
if (result)
{
printf("allocated %d MB successfully\n",i);
} else
{
printf("allocated %d MB failed\n", i);
}
}
}
clReleaseContext(context);
@@ -101,6 +124,24 @@ int main(int argc, char* argv[])
oclCHECKERROR(ciErrNum, CL_SUCCESS);
//normally you would create and execute kernels using this command queue
b3OpenCLArray<char*> memTester(g_cxMainContext,g_cqCommandQue,0,true);
int maxMem = 8192;
bool result=true;
for (size_t i=1;result;i++)
{
size_t numBytes = i*1024*1024;
result = memTester.resize(numBytes,false);
if (result)
{
printf("allocated %d MB successfully\n",i);
} else
{
printf("allocated %d MB failed\n", i);
}
}
clReleaseCommandQueue(g_cqCommandQue);
}