From f73d11d896985c88e54058296ebdfd5b8f739cf6 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Fri, 14 Jun 2013 12:10:16 -0700 Subject: [PATCH 1/2] tweak default values of b3Config.h so some laptop GPUs can run 128k scenes return the result of resize use size_t instead of int for btOpenCLArray BasicInitialize demo will test how much memory can be allocated, using the btOpenCLArray --- .../ParallelPrimitives/b3OpenCLArray.h | 76 +++++++++++-------- src/Bullet3OpenCL/RigidBody/b3Config.h | 23 +++--- test/OpenCL/BasicInitialize/main.cpp | 41 ++++++++++ 3 files changed, 95 insertions(+), 45 deletions(-) diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h b/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h index 36ecd6126..7497ba70d 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h @@ -7,8 +7,8 @@ template class b3OpenCLArray { - int m_size; - int m_capacity; + size_t m_size; + size_t m_capacity; cl_mem m_clBuffer; cl_context m_clContext; @@ -30,14 +30,14 @@ class b3OpenCLArray b3OpenCLArray& operator=(const b3OpenCLArray& src); - B3_FORCE_INLINE int allocSize(int size) + B3_FORCE_INLINE size_t allocSize(size_t size) { return (size ? size*2 : 1); } public: - b3OpenCLArray(cl_context ctx, cl_command_queue queue, int initialCapacity=0, bool allowGrowingCapacity=true) + b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true) :m_size(0), m_capacity(0),m_clBuffer(0), m_clContext(ctx),m_commandQueue(queue), m_ownsMemory(true),m_allowGrowingCapacity(true) @@ -50,7 +50,7 @@ public: } ///this is an error-prone method with no error checking, be careful! - void setFromOpenCLBuffer(cl_mem buffer, int sizeInElements) + void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) { deallocate(); m_ownsMemory = false; @@ -81,18 +81,20 @@ public: m_capacity=0; } - B3_FORCE_INLINE void push_back(const T& _Val,bool waitForCompletion=true) + B3_FORCE_INLINE bool push_back(const T& _Val,bool waitForCompletion=true) { - int sz = size(); + bool result = true; + size_t sz = size(); if( sz == capacity() ) { - reserve( allocSize(size()) ); + result = reserve( allocSize(size()) ); } copyFromHostPointer(&_Val, 1, sz, waitForCompletion); m_size++; + return result; } - B3_FORCE_INLINE T forcedAt(int n) const + B3_FORCE_INLINE T forcedAt(size_t n) const { b3Assert(n>=0); b3Assert(n=0); b3Assert(n size()) { - reserve(newsize,copyOldContents); + result = reserve(newsize,copyOldContents); } //leave new data uninitialized (init in debug mode?) - //for (int i=curSize;i0); b3Assert(numElements<=m_size); - int srcOffsetBytes = sizeof(T)*firstElem; - int dstOffsetInBytes = sizeof(T)*dstOffsetInElems; + size_t srcOffsetBytes = sizeof(T)*firstElem; + size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems; status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination, srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 ); @@ -214,7 +228,7 @@ public: void copyFromHost(const b3AlignedObjectArray& srcArray, bool waitForCompletion=true) { - int newSize = srcArray.size(); + size_t newSize = srcArray.size(); bool copyOldContents = false; resize (newSize,copyOldContents); @@ -223,12 +237,12 @@ public: } - void copyFromHostPointer(const T* src, int numElems, int destFirstElem= 0, bool waitForCompletion=true) + void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true) { b3Assert(numElems+destFirstElem <= capacity()); cl_int status = 0; - int sizeInBytes=sizeof(T)*numElems; + size_t sizeInBytes=sizeof(T)*numElems; status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes, src, 0,0,0 ); b3Assert(status == CL_SUCCESS ); @@ -245,7 +259,7 @@ public: copyToHostPointer(&destArray[0], size(),0,waitForCompletion); } - void copyToHostPointer(T* destPtr, int numElem, int srcFirstElem=0, bool waitForCompletion=true) const + void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const { b3Assert(numElem+srcFirstElem <= capacity()); @@ -260,7 +274,7 @@ public: void copyFromOpenCLArray(const b3OpenCLArray& src) { - int newSize = src.size(); + size_t newSize = src.size(); resize(newSize); if (size()) { diff --git a/src/Bullet3OpenCL/RigidBody/b3Config.h b/src/Bullet3OpenCL/RigidBody/b3Config.h index 2886f6a7c..1d46f8f17 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Config.h +++ b/src/Bullet3OpenCL/RigidBody/b3Config.h @@ -19,22 +19,17 @@ struct b3Config int m_maxTriConvexPairCapacity; b3Config() -#ifdef __APPLE__ - :m_maxConvexBodies(32*1024), -#else - :m_maxConvexBodies(32*1024), -#endif - m_maxConvexShapes(81920), + :m_maxConvexBodies(128*1024), m_maxVerticesPerFace(64), - m_maxFacesPerShape(64), - m_maxConvexVertices(8192000), - m_maxConvexIndices(8192000), - m_maxConvexUniqueEdges(819200), - m_maxCompoundChildShapes(81920), - m_maxTriConvexPairCapacity(512*1024) - //m_maxTriConvexPairCapacity(256*1024) + m_maxFacesPerShape(12), + m_maxConvexVertices(8192), + m_maxConvexIndices(81920), + m_maxConvexUniqueEdges(8192), + m_maxCompoundChildShapes(8192), + m_maxTriConvexPairCapacity(256*1024) { - m_maxBroadphasePairs = 16*m_maxConvexBodies; + m_maxConvexShapes = m_maxConvexBodies; + m_maxBroadphasePairs = 8*m_maxConvexBodies; m_maxContactCapacity = m_maxBroadphasePairs; } }; diff --git a/test/OpenCL/BasicInitialize/main.cpp b/test/OpenCL/BasicInitialize/main.cpp index c85234125..5ead2bb9c 100644 --- a/test/OpenCL/BasicInitialize/main.cpp +++ b/test/OpenCL/BasicInitialize/main.cpp @@ -16,6 +16,8 @@ subject to the following restrictions: ///original author: Erwin Coumans #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" + #include cl_context g_cxMainContext; @@ -71,6 +73,27 @@ int main(int argc, char* argv[]) b3OpenCLDeviceInfo devInfo; b3OpenCLUtils::getDeviceInfo(dev,&devInfo); b3OpenCLUtils::printDeviceInfo(dev); + + + b3OpenCLArray memTester(g_cxMainContext,g_cqCommandQue,0,true); + int maxMem = 8192; + bool result=true; + for (size_t i=1;result;i++) + { + size_t numBytes = i*1024*1024; + result = memTester.resize(numBytes,false); + + if (result) + { + printf("allocated %d MB successfully\n",i); + } else + { + printf("allocated %d MB failed\n", i); + } + } + + + } clReleaseContext(context); @@ -101,6 +124,24 @@ int main(int argc, char* argv[]) oclCHECKERROR(ciErrNum, CL_SUCCESS); //normally you would create and execute kernels using this command queue + b3OpenCLArray memTester(g_cxMainContext,g_cqCommandQue,0,true); + int maxMem = 8192; + bool result=true; + for (size_t i=1;result;i++) + { + size_t numBytes = i*1024*1024; + result = memTester.resize(numBytes,false); + + if (result) + { + printf("allocated %d MB successfully\n",i); + } else + { + printf("allocated %d MB failed\n", i); + } + } + + clReleaseCommandQueue(g_cqCommandQue); } From 31282ab85ecc1a9e23384a3984f627fffefad89b Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Mon, 17 Jun 2013 13:23:41 -0700 Subject: [PATCH 2/2] don't crash if the maximum number of pairs is exceeded, but report an error using b3Error --- Demos3/GpuDemos/ParticleDemo.cpp | 2 +- Demos3/GpuDemos/broadphase/PairBench.cpp | 2 +- .../BroadphaseCollision/b3GpuSapBroadphase.cpp | 17 ++++++++++++----- .../BroadphaseCollision/b3GpuSapBroadphase.h | 4 ++-- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 2 +- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Demos3/GpuDemos/ParticleDemo.cpp b/Demos3/GpuDemos/ParticleDemo.cpp index e226b8778..cf8d95908 100644 --- a/Demos3/GpuDemos/ParticleDemo.cpp +++ b/Demos3/GpuDemos/ParticleDemo.cpp @@ -397,7 +397,7 @@ void ParticleDemo::clientMoveAndDisplay() cl_mem pairsGPU = 0; { - m_data->m_broadphaseGPU->calculateOverlappingPairs(); + m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numParticles); pairsGPU = m_data->m_broadphaseGPU->getOverlappingPairBuffer(); numPairsGPU = m_data->m_broadphaseGPU->getNumOverlap(); } diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index ed1d32ce4..f3d2077e6 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -308,7 +308,7 @@ void PairBench::clientMoveAndDisplay() } { B3_PROFILE("calculateOverlappingPairs"); - m_data->m_broadphaseGPU->calculateOverlappingPairs(); + m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numObjects); //int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); //printf("numPairs = %d\n", numPairs); } diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index a38c12ee1..d6cb05890 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -155,7 +155,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() } -void b3GpuSapBroadphase::calculateOverlappingPairsHost() +void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) { //test //if (m_currentBuffer>=0) @@ -236,6 +236,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost() } } + if (hostPairs.size() > maxPairs) + { + hostPairs.resize(maxPairs); + } if (hostPairs.size()) { @@ -262,7 +266,7 @@ void b3GpuSapBroadphase::reset() } -void b3GpuSapBroadphase::calculateOverlappingPairs() +void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { int axis = 0;//todo on GPU for now hardcode @@ -398,8 +402,6 @@ void b3GpuSapBroadphase::calculateOverlappingPairs() } - int maxPairsPerBody = 64; - int maxPairs = maxPairsPerBody * numSmallAabbs;//todo m_overlappingPairs.resize(maxPairs); b3OpenCLArray pairCount(m_context, m_queue); @@ -423,8 +425,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs() numPairs = pairCount.at(0); if (numPairs >maxPairs) + { + b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); numPairs =maxPairs; - + } } } if (m_gpuSmallSortedAabbs.size()) @@ -467,7 +471,10 @@ void b3GpuSapBroadphase::calculateOverlappingPairs() numPairs = pairCount.at(0); if (numPairs>maxPairs) + { + b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); numPairs = maxPairs; + } } #else diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h index b53d4b4b1..cf5709435 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h @@ -49,8 +49,8 @@ class b3GpuSapBroadphase b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q ); virtual ~b3GpuSapBroadphase(); - void calculateOverlappingPairs(); - void calculateOverlappingPairsHost(); + void calculateOverlappingPairs(int maxPairs); + void calculateOverlappingPairsHost(int maxPairs); void reset(); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 3e4a86a6b..29d2a8bee 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -144,7 +144,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); } else { - m_data->m_broadphaseSap->calculateOverlappingPairs(); + m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); numPairs = m_data->m_broadphaseSap->getNumOverlap(); } }