From a95b2a33bd8caa4deb35d68507eec90eb090b102 Mon Sep 17 00:00:00 2001 From: rponom Date: Thu, 14 May 2009 20:20:56 +0000 Subject: [PATCH] Added a possibility to build appGpu2dDemo with CUDA solver. It is disabled by default, see Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.h for instructions how to enable it Cleaned up Extras/CUDA folder and libbulletcuda project --- Demos/ConstraintDemo/ConstraintDemo.cpp | 69 +- Demos/Gpu2dDemo/BasicDemo.cpp | 39 +- Demos/Gpu2dDemo/BasicDemo.h | 6 +- Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h | 10 +- Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.cpp | 4 +- Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.h | 22 +- Extras/CUDA/README.txt | 13 - Extras/CUDA/bt3DGridBroadphase.cpp | 604 ------------ Extras/CUDA/bt3DGridBroadphaseFunc.h | 444 --------- Extras/CUDA/btCudaBroadphase.cpp | 58 +- Extras/CUDA/btCudaBroadphase.cu | 176 +--- Extras/CUDA/btCudaBroadphase.h | 123 +-- Extras/CUDA/btCudaBroadphaseKernel.h | 105 --- Extras/CUDA/btCudaDefines.h | 138 +++ Extras/CUDA/btCudaUtils.cu | 91 ++ .../CUDA}/btGpuDemo2dCudaFunc.cu | 10 +- Extras/CUDA/libbulletcuda.vcproj | 58 +- Extras/CUDA/param.cpp | 3 - Extras/CUDA/param.h | 226 ----- Extras/CUDA/paramgl.cpp | 209 ----- Extras/CUDA/paramgl.h | 54 -- Extras/CUDA/particleSystem.cpp | 806 ---------------- Extras/CUDA/particleSystem.cu | 336 ------- Extras/CUDA/particleSystem.cuh | 57 -- Extras/CUDA/particleSystem.h | 193 ---- Extras/CUDA/particles.cpp | 571 ------------ Extras/CUDA/particles.sln | 228 ----- Extras/CUDA/particles.vcproj | 880 ------------------ Extras/CUDA/particles_kernel.cu | 381 -------- Extras/CUDA/particles_kernel.cuh | 46 - Extras/CUDA/render_particles.cpp | 260 ------ Extras/CUDA/render_particles.h | 82 -- Extras/CUDA/shaders.cpp | 41 - Extras/CUDA/shaders.h | 2 - src/BulletMultiThreaded/btGpuUtils.cpp | 28 + 35 files changed, 528 insertions(+), 5845 deletions(-) delete mode 100644 Extras/CUDA/README.txt delete mode 100644 Extras/CUDA/bt3DGridBroadphase.cpp delete mode 100644 Extras/CUDA/bt3DGridBroadphaseFunc.h delete mode 100644 Extras/CUDA/btCudaBroadphaseKernel.h create mode 100644 Extras/CUDA/btCudaDefines.h create mode 100644 Extras/CUDA/btCudaUtils.cu rename {Demos/Gpu2dDemo => Extras/CUDA}/btGpuDemo2dCudaFunc.cu (87%) delete mode 100644 Extras/CUDA/param.cpp delete mode 100644 Extras/CUDA/param.h delete mode 100644 Extras/CUDA/paramgl.cpp delete mode 100644 Extras/CUDA/paramgl.h delete mode 100644 Extras/CUDA/particleSystem.cpp delete mode 100644 Extras/CUDA/particleSystem.cu delete mode 100644 Extras/CUDA/particleSystem.cuh delete mode 100644 Extras/CUDA/particleSystem.h delete mode 100644 Extras/CUDA/particles.cpp delete mode 100644 Extras/CUDA/particles.sln delete mode 100644 Extras/CUDA/particles.vcproj delete mode 100644 Extras/CUDA/particles_kernel.cu delete mode 100644 Extras/CUDA/particles_kernel.cuh delete mode 100644 Extras/CUDA/render_particles.cpp delete mode 100644 Extras/CUDA/render_particles.h delete mode 100644 Extras/CUDA/shaders.cpp delete mode 100644 Extras/CUDA/shaders.h create mode 100644 src/BulletMultiThreaded/btGpuUtils.cpp diff --git a/Demos/ConstraintDemo/ConstraintDemo.cpp b/Demos/ConstraintDemo/ConstraintDemo.cpp index f1436e27d..7372a973b 100644 --- a/Demos/ConstraintDemo/ConstraintDemo.cpp +++ b/Demos/ConstraintDemo/ConstraintDemo.cpp @@ -97,7 +97,7 @@ void ConstraintDemo::initPhysics() trans.setOrigin(btVector3(0,20,0)); float mass = 1.f; -#if 1 +#if 0 //point to point constraint (ball socket) { btRigidBody* body0 = localCreateRigidBody( mass,trans,shape); @@ -135,7 +135,7 @@ void ConstraintDemo::initPhysics() } #endif -#if 1 +#if 0 //create a slider, using the generic D6 constraint { mass = 1.f; @@ -179,7 +179,7 @@ void ConstraintDemo::initPhysics() } #endif -#if 1 +#if 0 { // create a door using hinge constraint attached to the world btCollisionShape* pDoorShape = new btBoxShape(btVector3(2.0f, 5.0f, 0.2f)); m_collisionShapes.push_back(pDoorShape); @@ -201,7 +201,7 @@ void ConstraintDemo::initPhysics() //btRigidBody* pDropBody = localCreateRigidBody( 10.0, doorTrans, shape); } #endif -#if 1 +#if 0 { // create a generic 6DOF constraint btTransform tr; @@ -262,7 +262,7 @@ void ConstraintDemo::initPhysics() pGen6DOF->setDbgDrawSize(btScalar(5.f)); } #endif -#if 1 +#if 0 { // create a ConeTwist constraint btTransform tr; @@ -294,7 +294,7 @@ void ConstraintDemo::initPhysics() pCT->setDbgDrawSize(btScalar(5.f)); } #endif -#if 1 +#if 0 { // Hinge connected to the world, with motor (to hinge motor with new and old constraint solver) btTransform tr; tr.setIdentity(); @@ -311,6 +311,63 @@ void ConstraintDemo::initPhysics() pHinge->setDbgDrawSize(btScalar(5.f)); } #endif + +#if 1 + { + // create a universal joint using generic 6DOF constraint + // create two rigid bodies + // static bodyA (parent) on top: + btTransform tr; + tr.setIdentity(); + tr.setOrigin(btVector3(btScalar(0.), btScalar(4.), btScalar(0.))); + btRigidBody* pBodyA = localCreateRigidBody( 0.0, tr, shape); + pBodyA->setActivationState(DISABLE_DEACTIVATION); + // dynamic bodyB (child) below it : + tr.setIdentity(); + tr.setOrigin(btVector3(btScalar(0.), btScalar(0.), btScalar(0.))); + btRigidBody* pBodyB = localCreateRigidBody(1.0, tr, shape); + pBodyB->setActivationState(DISABLE_DEACTIVATION); + // add some (arbitrary) data to build constraint frames + btVector3 parentAxis(1.f, 0.f, 0.f); + btVector3 childAxis(0.f, 0.f, 1.f); + btVector3 anchor(0.f, 2.f, 0.f); + // build frame basis + // 6DOF constraint uses Euler angles and to define limits + // it is assumed that rotational order is : + // Z - first, allowed limits are (-PI,PI); + // new position of Y - second (allowed limits are (-PI/2 + epsilon, PI/2 - epsilon), where epsilon is a small positive number + // used to prevent constraint from instability on poles; + // new position of X, allowed limits are (-PI,PI); + // So to simulate ODE Universal joint we should use parent axis as Z, child axis as Y and limit all other DOFs + // Build the frame in world coordinate system first + btVector3 zAxis = parentAxis.normalize(); + btVector3 yAxis = childAxis.normalize(); + btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system + btTransform frameInW; + frameInW.setIdentity(); + frameInW.getBasis().setValue( xAxis[0], yAxis[0], zAxis[0], + xAxis[1], yAxis[1], zAxis[1], + xAxis[2], yAxis[2], zAxis[2]); + frameInW.setOrigin(anchor); + // now get constraint frame in local coordinate systems + btTransform frameInA = pBodyA->getCenterOfMassTransform().inverse() * frameInW; + btTransform frameInB = pBodyB->getCenterOfMassTransform().inverse() * frameInW; + // now create the constraint + btGeneric6DofConstraint* pGen6DOF = new btGeneric6DofConstraint(*pBodyA, *pBodyB, frameInA, frameInB, true); + // linear limits in our case are allowed offset of origin of frameInB in frameInA, so set them to zero + pGen6DOF->setLinearLowerLimit(btVector3(0., 0., 0.)); + pGen6DOF->setLinearUpperLimit(btVector3(0., 0., 0.)); + // set limits for parent (axis z) and child (axis Y) + pGen6DOF->setAngularLowerLimit(btVector3(0.f, -SIMD_HALF_PI * 0.5f, -SIMD_HALF_PI * 0.5f)); + pGen6DOF->setAngularUpperLimit(btVector3(0.f, SIMD_HALF_PI * 0.5f, SIMD_HALF_PI * 0.5f)); + // add constraint to world + m_dynamicsWorld->addConstraint(pGen6DOF, true); + // draw constraint frames and limits for debugging + pGen6DOF->setDbgDrawSize(btScalar(10.f)); + } +#endif + + } ConstraintDemo::~ConstraintDemo() diff --git a/Demos/Gpu2dDemo/BasicDemo.cpp b/Demos/Gpu2dDemo/BasicDemo.cpp index ba82cd3fd..8a1a65ea0 100644 --- a/Demos/Gpu2dDemo/BasicDemo.cpp +++ b/Demos/Gpu2dDemo/BasicDemo.cpp @@ -13,9 +13,9 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/btGpuDefines.h" -#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" -#include "BulletMultiThreaded/btGpuUtilsSharedCode.h" +//#include "BulletMultiThreaded/btGpuDefines.h" +//#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" +//#include "BulletMultiThreaded/btGpuUtilsSharedCode.h" //---------------------------------------------------------------------------------------- @@ -144,6 +144,7 @@ public: } btMultiSphereShape* multiSphere = new btMultiSphereShape(inertiaHalfExtents,positions,radii,numSpheres); + m_demo->addCollisionShape(multiSphere); btVector3 localInertia(0,0,0); if (mass) @@ -241,6 +242,13 @@ void BasicDemo::displayCallback(void) { btOverlappingPairCache* gPairCache; +static btScalar fRandMinMax(btScalar fMin, btScalar fMax) +{ + btScalar fr = btScalar(rand()) / btScalar(RAND_MAX); + return fMax - (fMax - fMin) * fr; +} + + void BasicDemo::initPhysics() { setTexturing(false); @@ -363,6 +371,31 @@ void BasicDemo::initPhysics() { loader.processFile("../../test1.oec"); } +#if 0 // perfomance test : work-in-progress + { // add more object, but share their shapes + int numNewObjects = 500; + mass = 1.f; + for(int n_obj = 0; n_obj < numNewObjects; n_obj++) + { + btDefaultMotionState* myMotionState= 0; + btVector3 localInertia(0,0,0); + btTransform worldTransform; + worldTransform.setIdentity(); + btScalar fx = fRandMinMax(-30., 30.); + btScalar fy = fRandMinMax(5., 30.); + worldTransform.setOrigin(btVector3(fx, fy, 0.f)); + int sz = m_collisionShapes.size(); + btMultiSphereShape* multiSphere = (btMultiSphereShape*)m_collisionShapes[1]; + myMotionState = new btDefaultMotionState(worldTransform); + multiSphere->calculateLocalInertia(mass, localInertia); + btRigidBody* body = new btRigidBody(mass,myMotionState,multiSphere,localInertia); + body->setLinearFactor(btVector3(1,1,0)); + body->setAngularFactor(btVector3(0,0,1)); + body->setWorldTransform(worldTransform); + getDynamicsWorld()->addRigidBody(body); + } + } +#endif #else #if (!SPEC_TEST) diff --git a/Demos/Gpu2dDemo/BasicDemo.h b/Demos/Gpu2dDemo/BasicDemo.h index 64b9387c0..642a36e05 100644 --- a/Demos/Gpu2dDemo/BasicDemo.h +++ b/Demos/Gpu2dDemo/BasicDemo.h @@ -21,8 +21,8 @@ subject to the following restrictions: #ifdef BT_USE_CUDA -#include "btCudaDemoPairCache.h" -#include +//#include "btCudaDemoPairCache.h" +//#include #endif //BT_USE_CUDA class btBroadphaseInterface; @@ -88,7 +88,7 @@ class BasicDemo : public GlutDemoApplication void DrawConstraintInfo(); void outputDebugInfo(int & xOffset,int & yStart, int yIncr); virtual void renderme(); - + void addCollisionShape(btCollisionShape* pShape) { m_collisionShapes.push_back(pShape); } }; diff --git a/Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h b/Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h index bcfece161..2e3636640 100644 --- a/Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h +++ b/Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h @@ -19,7 +19,7 @@ subject to the following restrictions: #define FRICTION_BOX_GROUND_FACT 0.05f #define FRICTION_BOX_BOX_FACT 0.05f #define USE_CENTERS 1 -#include "LinearMath/btMinMax.h" +//#include "LinearMath/btMinMax.h" //------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------ @@ -187,7 +187,8 @@ BT_GPU___device__ float computeImpulse1(float3 rVel, if(positionConstraint > 0) return lambdaDt; - positionConstraint = btMin(0.0f,positionConstraint+penetrationError); +// positionConstraint = btMin(0.0f,positionConstraint+penetrationError); + positionConstraint = (positionConstraint+penetrationError) < 0.f ? (positionConstraint+penetrationError) : 0.0f; lambdaDt = -(BT_GPU_dot(cNormal,rVel)*(1+collisionConstant)); lambdaDt -= (baumgarteConstant/dt*positionConstraint); @@ -328,8 +329,6 @@ BT_GPU___device__ void collisionResolutionBox( int constrId, int bId=constraints[constrId].y; float3 aPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,aId)); float3 bPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,bId)); - float aRot=rotation[aId]; - float bRot=rotation[bId]; float3 aVel=BT_GPU_make_float34(vel[aId]); float3 bVel=BT_GPU_make_float34(vel[bId]); float aAngVel=angularVel[aId]; @@ -354,7 +353,8 @@ BT_GPU___device__ void collisionResolutionBox( int constrId, { float rLambdaDt=lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx]; float pLambdaDt=rLambdaDt; - rLambdaDt=btMax(pLambdaDt+lambdaDt,0.0f); +// rLambdaDt=btMax(pLambdaDt+lambdaDt,0.0f); + rLambdaDt=(pLambdaDt+lambdaDt) > 0.0f ? (pLambdaDt+lambdaDt) : 0.0f; lambdaDt=rLambdaDt-pLambdaDt; lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx]=rLambdaDt; } diff --git a/Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.cpp b/Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.cpp index 555ea247a..ecc5535a2 100644 --- a/Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.cpp +++ b/Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.cpp @@ -413,7 +413,7 @@ void btGpuDemoDynamicsWorld::solveConstraints2(btContactSolverInfo& solverInfo) for(int i=0;i -#define BT_GPU_PREF(func) btCuda_##func -#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" +// #include "btCudaDemoPairCache.h" +// #include + #include "BulletMultiThreaded/btGpuDefines.h" + #undef BT_GPU_PREF + #define BT_GPU_PREF(func) btCuda_##func + #include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" #else -#include "BulletMultiThreaded/btGpuDefines.h" -#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" + #include "BulletMultiThreaded/btGpuDefines.h" + #include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" #endif #undef BT_GPU_PREF + + //---------------------------------------------------------------------------------------- diff --git a/Extras/CUDA/README.txt b/Extras/CUDA/README.txt deleted file mode 100644 index 9eb1b409e..000000000 --- a/Extras/CUDA/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -btCudaBroadphase is some R&D work, a first attempt to use CUDA in Bullet. -It uses the NVidia CUDA particle demo grid broadphase as Bullet broadphase. - -Press 's' to toggle between the original CUDA particle demo and using Bullet+btCudaBroadphase. -Press '1', '2','3','4' for different start configurations. -Right click for options menu. - -See some related discussion here: -http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=500&start=105 - - -Get NVidia CUDA from here: -http://www.nvidia.com/object/cuda_get.html \ No newline at end of file diff --git a/Extras/CUDA/bt3DGridBroadphase.cpp b/Extras/CUDA/bt3DGridBroadphase.cpp deleted file mode 100644 index b46170e88..000000000 --- a/Extras/CUDA/bt3DGridBroadphase.cpp +++ /dev/null @@ -1,604 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//-------------------------------------------------------------------------- - -#include "LinearMath/btAlignedAllocator.h" -#include "LinearMath/btQuickprof.h" -#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" -#include "btCudaBroadphaseKernel.h" -#include "btCudaBroadphase.h" - -//-------------------------------------------------------------------------- - -#include - -//-------------------------------------------------------------------------- - -static btCudaBroadphaseParams s3DGridBroadphaseParams; - -//-------------------------------------------------------------------------- - -bt3DGridBroadphase::bt3DGridBroadphase( const btVector3& worldAabbMin,const btVector3& worldAabbMax, - int gridSizeX, int gridSizeY, int gridSizeZ, - int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, - int maxBodiesPerCell, - btScalar cellFactorAABB) : - btSimpleBroadphase(maxSmallProxies, -// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache), - new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache), - m_bInitialized(false), - m_numBodies(0) -{ - m_ownsPairCache = true; - m_params.m_gridSizeX = gridSizeX; - m_params.m_gridSizeY = gridSizeY; - m_params.m_gridSizeZ = gridSizeZ; - m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ; - btVector3 w_org = worldAabbMin; - m_params.m_worldOriginX = w_org.getX(); - m_params.m_worldOriginY = w_org.getY(); - m_params.m_worldOriginZ = w_org.getZ(); - btVector3 w_size = worldAabbMax - worldAabbMin; - m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX; - m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY; - m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ; - m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ); - m_maxRadius *= btScalar(0.5f); - m_params.m_numBodies = m_numBodies; - m_params.m_maxBodiesPerCell = maxBodiesPerCell; - - m_numLargeHandles = 0; - m_maxLargeHandles = maxLargeProxies; - - m_maxPairsPerBody = maxPairsPerBody; - - m_cellFactorAABB = cellFactorAABB; - - m_LastLargeHandleIndex = -1; - - _initialize(); -} // bt3DGridBroadphase::bt3DGridBroadphase() - -//-------------------------------------------------------------------------- - -bt3DGridBroadphase::~bt3DGridBroadphase() -{ - //btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache - assert(m_bInitialized); - _finalize(); -} // bt3DGridBroadphase::~bt3DGridBroadphase() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::_initialize() -{ - assert(!m_bInitialized); - // allocate host storage - m_hBodiesHash = new unsigned int[m_maxHandles * 2]; - memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int)); - - m_hCellStart = new unsigned int[m_params.m_numCells]; - memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int)); - - m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2]; - // --------------- for now, init with m_maxPairsPerBody for each body - m_hPairBuffStartCurr[0] = 0; - m_hPairBuffStartCurr[1] = 0; - for(int i = 1; i <= m_maxHandles; i++) - { - m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; - m_hPairBuffStartCurr[i * 2 + 1] = 0; - } - //---------------- - unsigned int numAABB = m_maxHandles + m_maxLargeHandles; - m_hAABB = new btCuda3F1U[numAABB * 2]; // AABB Min & Max - - m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody]; - memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed? - - m_hPairScan = new unsigned int[m_maxHandles + 1]; - - m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody]; - -// large proxies - - // allocate handles buffer and put all handles on free list - m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16); - m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles]; - m_firstFreeLargeHandle = 0; - { - for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++) - { - m_pLargeHandles[i].SetNextFree(i + 1); - m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i; - } - m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0); - } - -// debug data - m_numPairsAdded = 0; - m_numOverflows = 0; - - m_bInitialized = true; -} // bt3DGridBroadphase::_initialize() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::_finalize() -{ - assert(m_bInitialized); - delete [] m_hBodiesHash; - delete [] m_hCellStart; - delete [] m_hPairBuffStartCurr; - delete [] m_hAABB; - delete [] m_hPairBuff; - delete [] m_hPairScan; - delete [] m_hPairOut; - btAlignedFree(m_pLargeHandlesRawPtr); - m_bInitialized = false; -} // bt3DGridBroadphase::_finalize() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher) -{ - if(m_numHandles <= 0) - { - BT_PROFILE("addLarge2LargePairsToCache"); - addLarge2LargePairsToCache(dispatcher); - return; - } - // update constants - setParameters(&m_params); - // prepare AABB array - prepareAABB(); - // calculate hash - calcHashAABB(); - // sort bodies based on hash - sortHash(); - // find start of each cell - findCellStart(); - // findOverlappingPairs (small/small) - findOverlappingPairs(); - // findOverlappingPairs (small/large) - findPairsLarge(); - // add pairs to CPU cache - computePairCacheChanges(); - scanOverlappingPairBuff(); - squeezeOverlappingPairBuff(); - addPairsToCache(dispatcher); - // find and add large/large pairs to CPU cache - addLarge2LargePairsToCache(dispatcher); - return; -} // bt3DGridBroadphase::calculateOverlappingPairs() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher) -{ - m_numPairsAdded = 0; - m_numPairsRemoved = 0; - for(int i = 0; i < m_numHandles; i++) - { - unsigned int num = m_hPairScan[i+1] - m_hPairScan[i]; - if(!num) - { - continue; - } - unsigned int* pInp = m_hPairOut + m_hPairScan[i]; - unsigned int index0 = m_hAABB[i * 2].uw; - btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0]; - for(unsigned int j = 0; j < num; j++) - { - unsigned int indx1_s = pInp[j]; - unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG); - btSimpleBroadphaseProxy* proxy1; - if(index1 < (unsigned int)m_maxHandles) - { - proxy1 = &m_pHandles[index1]; - } - else - { - index1 -= m_maxHandles; - btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles)); - proxy1 = &m_pLargeHandles[index1]; - } - if(indx1_s & BT_CUDA_PAIR_NEW_FLG) - { - m_pairCache->addOverlappingPair(proxy0,proxy1); - m_numPairsAdded++; - } - else - { - m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); - m_numPairsRemoved++; - } - } - } -} // bt3DGridBroadphase::addPairsToCache() - -//-------------------------------------------------------------------------- - -btBroadphaseProxy* bt3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) -{ - btBroadphaseProxy* proxy; - bool bIsLarge = isLargeProxy(aabbMin, aabbMax); - if(bIsLarge) - { - if (m_numLargeHandles >= m_maxLargeHandles) - { - btAssert(0); - return 0; //should never happen, but don't let the game crash ;-) - } - btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2])); - int newHandleIndex = allocLargeHandle(); - proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy); - } - else - { - proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy); - } - return proxy; -} // bt3DGridBroadphase::createProxy() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher) -{ - bool bIsLarge = isLargeProxy(proxy); - if(bIsLarge) - { - - btSimpleBroadphaseProxy* proxy0 = static_cast(proxy); - freeLargeHandle(proxy0); - m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher); - } - else - { - btSimpleBroadphase::destroyProxy(proxy, dispatcher); - } - return; -} // bt3DGridBroadphase::destroyProxy() - -//-------------------------------------------------------------------------- - -bool bt3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax) -{ - btVector3 diag = aabbMax - aabbMin; - btScalar radius = diag.length() * btScalar(0.5f); - - radius *= m_cellFactorAABB; // user-defined factor - - return (radius > m_maxRadius); -} // bt3DGridBroadphase::isLargeProxy() - -//-------------------------------------------------------------------------- - -bool bt3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy) -{ - return (proxy->getUid() >= (m_maxHandles+2)); -} // bt3DGridBroadphase::isLargeProxy() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher) -{ - int i,j; - if (m_numLargeHandles <= 0) - { - return; - } - int new_largest_index = -1; - for(i = 0; i <= m_LastLargeHandleIndex; i++) - { - btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; - if(!proxy0->m_clientObject) - { - continue; - } - new_largest_index = i; - for(j = i + 1; j <= m_LastLargeHandleIndex; j++) - { - btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j]; - if(!proxy1->m_clientObject) - { - continue; - } - btAssert(proxy0 != proxy1); - btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0); - btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1); - if(aabbOverlap(p0,p1)) - { - if (!m_pairCache->findPair(proxy0,proxy1)) - { - m_pairCache->addOverlappingPair(proxy0,proxy1); - } - } - else - { - if(m_pairCache->findPair(proxy0,proxy1)) - { - m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); - } - } - } - } - m_LastLargeHandleIndex = new_largest_index; - return; -} // bt3DGridBroadphase::addLarge2LargePairsToCache() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback) -{ - btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback); - for (int i=0; i <= m_LastLargeHandleIndex; i++) - { - btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i]; - if(!proxy->m_clientObject) - { - continue; - } - rayCallback.process(proxy); - } -} // bt3DGridBroadphase::rayTest() - -//-------------------------------------------------------------------------- -//-------------------------------------------------------------------------- -// -// overrides for CPU version -// -//-------------------------------------------------------------------------- -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::prepareAABB() -{ - BT_PROFILE("prepareAABB"); - btCuda3F1U* pBB = m_hAABB; - int i; - int new_largest_index = -1; - unsigned int num_small = 0; - for(i = 0; i <= m_LastHandleIndex; i++) - { - btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i]; - if(!proxy0->m_clientObject) - { - continue; - } - new_largest_index = i; - pBB->fx = proxy0->m_aabbMin.getX(); - pBB->fy = proxy0->m_aabbMin.getY(); - pBB->fz = proxy0->m_aabbMin.getZ(); - pBB->uw = i; - pBB++; - pBB->fx = proxy0->m_aabbMax.getX(); - pBB->fy = proxy0->m_aabbMax.getY(); - pBB->fz = proxy0->m_aabbMax.getZ(); - pBB->uw = num_small; - pBB++; - num_small++; - } - m_LastHandleIndex = new_largest_index; - new_largest_index = -1; - unsigned int num_large = 0; - for(i = 0; i <= m_LastLargeHandleIndex; i++) - { - btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; - if(!proxy0->m_clientObject) - { - continue; - } - new_largest_index = i; - pBB->fx = proxy0->m_aabbMin.getX(); - pBB->fy = proxy0->m_aabbMin.getY(); - pBB->fz = proxy0->m_aabbMin.getZ(); - pBB->uw = i + m_maxHandles; - pBB++; - pBB->fx = proxy0->m_aabbMax.getX(); - pBB->fy = proxy0->m_aabbMax.getY(); - pBB->fz = proxy0->m_aabbMax.getZ(); - pBB->uw = num_large + m_maxHandles; - pBB++; - num_large++; - } - m_LastLargeHandleIndex = new_largest_index; - // paranoid checks - btAssert(num_small == m_numHandles); - btAssert(num_large == m_numLargeHandles); - return; -} // bt3DGridBroadphase::prepareAABB() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::setParameters(btCudaBroadphaseParams* hostParams) -{ - s3DGridBroadphaseParams = *hostParams; - return; -} // bt3DGridBroadphase::setParameters() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::calcHashAABB() -{ - BT_PROFILE("bt3DGrid_calcHashAABB"); - bt3DGrid_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles); - return; -} // bt3DGridBroadphase::calcHashAABB() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::sortHash() -{ - class bt3DGridHashKey - { - public: - unsigned int hash; - unsigned int index; - void quickSort(bt3DGridHashKey* pData, int lo, int hi) - { - int i=lo, j=hi; - bt3DGridHashKey x = pData[(lo+hi)/2]; - do - { - while(pData[i].hash > x.hash) i++; - while(x.hash > pData[j].hash) j--; - if(i <= j) - { - bt3DGridHashKey t = pData[i]; - pData[i] = pData[j]; - pData[j] = t; - i++; j--; - } - } while(i <= j); - if(lo < j) pData->quickSort(pData, lo, j); - if(i < hi) pData->quickSort(pData, i, hi); - } - }; - BT_PROFILE("bt3DGrid_sortHash"); - bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash; - pHash->quickSort(pHash, 0, m_numHandles - 1); - return; -} // bt3DGridBroadphase::sortHash() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::findCellStart() -{ - BT_PROFILE("bt3DGrid_findCellStart"); - bt3DGrid_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells); - return; -} // bt3DGridBroadphase::findCellStart() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::findOverlappingPairs() -{ - BT_PROFILE("bt3DGrid_findOverlappingPairs"); - bt3DGrid_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles); - return; -} // bt3DGridBroadphase::findOverlappingPairs() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::findPairsLarge() -{ - BT_PROFILE("bt3DGrid_findPairsLarge"); - bt3DGrid_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles); - return; -} // bt3DGridBroadphase::findPairsLarge() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::computePairCacheChanges() -{ - BT_PROFILE("bt3DGrid_computePairCacheChanges"); - bt3DGrid_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles); - return; -} // bt3DGridBroadphase::computePairCacheChanges() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::scanOverlappingPairBuff() -{ - BT_PROFILE("bt3DGrid_scanOverlappingPairBuff"); - m_hPairScan[0] = 0; - for(int i = 1; i <= m_numHandles; i++) - { - unsigned int delta = m_hPairScan[i]; - m_hPairScan[i] = m_hPairScan[i-1] + delta; - } - return; -} // bt3DGridBroadphase::scanOverlappingPairBuff() - -//-------------------------------------------------------------------------- - -void bt3DGridBroadphase::squeezeOverlappingPairBuff() -{ - BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff"); - bt3DGrid_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles); - return; -} // bt3DGridBroadphase::squeezeOverlappingPairBuff() - -//-------------------------------------------------------------------------- - -typedef unsigned int uint; - -struct uint2 -{ - unsigned int x, y; -}; - -struct int3 -{ - int x, y, z; -}; - -struct uint3 -{ - unsigned int x, y, z; -}; - -struct float4 -{ - float x, y, z, w; -}; - - -#define BT3DGRID__device__ inline -#define BT3DGRIDmax(a, b) ((a) > (b) ? (a) : (b)) -#define BT3DGRIDmin(a, b) ((a) < (b) ? (a) : (b)) -#define BT3DGRIDparams s3DGridBroadphaseParams -#define BT3DGRID__mul24(a, b) ((a)*(b)) -#define BT3DGRID__global__ inline -#define BT3DGRID__shared__ static -#define BT3DGRID__syncthreads() - - -static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y) -{ - uint2 t; t.x = x; t.y = y; return t; -} -#define BT3DGRIDmake_uint2(x, y) bt3dGrid_make_uint2(x, y) - -static inline int3 bt3dGrid_make_int3(int x, int y, int z) -{ - int3 t; t.x = x; t.y = y; t.z = z; return t; -} -inline int3 operator+(int3 a, int3 b) -{ - return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -} -#define BT3DGRIDmake_int3(x, y, z) bt3dGrid_make_int3(x, y, z) - -#define BT3DGRIDFETCH(a, b) a[b] -#define BT3DGRIDPREF(func) bt3DGrid_##func -#define MY_CUDA_SAFE_CALL(func) func -#define BT3DGPRDMemset memset - -static uint2 s_blockIdx, s_blockDim, s_threadIdx; -#define BT3DGRIDblockIdx s_blockIdx -#define BT3DGRIDblockDim s_blockDim -#define BT3DGRIDthreadIdx s_threadIdx -#define BT3DGRIDEXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb= (int)numBodies) - { - return; - } - btCuda3F1U bbMin = pAABB[index*2]; - btCuda3F1U bbMax = pAABB[index*2 + 1]; - float4 pos; - pos.x = (bbMin.fx + bbMax.fx) * 0.5f; - pos.y = (bbMin.fy + bbMax.fy) * 0.5f; - pos.z = (bbMin.fz + bbMax.fz) * 0.5f; - // get address in grid - int3 gridPos = btCuda_calcGridPos(pos); - uint gridHash = btCuda_calcGridHash(gridPos); - // store grid hash and body index - pHash[index] = BT3DGRIDmake_uint2(gridHash, index); -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies) -{ - int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x; - if(index >= (int)numBodies) - { - return; - } - uint2 sortedData = pHash[index]; - // Load hash data into shared memory so that we can look - // at neighboring body's hash value without loading - // two hash values per thread - BT3DGRID__shared__ uint sharedHash[257]; - sharedHash[BT3DGRIDthreadIdx.x+1] = sortedData.x; - if((index > 0) && (BT3DGRIDthreadIdx.x == 0)) - { - // first thread in block must load neighbor body hash - volatile uint2 prevData = pHash[index-1]; - sharedHash[0] = prevData.x; - } - BT3DGRID__syncthreads(); - if((index == 0) || (sortedData.x != sharedHash[BT3DGRIDthreadIdx.x])) - { - cellStart[sortedData.x] = index; - } -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__device__ uint cudaTestAABBOverlap(btCuda3F1U min0, btCuda3F1U max0, btCuda3F1U min1, btCuda3F1U max1) -{ - return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && - (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && - (min0.fz <= max1.fz)&& (min1.fz <= max0.fz); -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__device__ void findPairsInCell(int3 gridPos, - uint index, - uint2* pHash, - uint* pCellStart, - btCuda3F1U* pAABB, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numBodies) -{ - if ( (gridPos.x < 0) || (gridPos.x > (int)BT3DGRIDparams.m_gridSizeX - 1) - || (gridPos.y < 0) || (gridPos.y > (int)BT3DGRIDparams.m_gridSizeY - 1) - || (gridPos.z < 0) || (gridPos.z > (int)BT3DGRIDparams.m_gridSizeZ - 1)) - { - return; - } - uint gridHash = btCuda_calcGridHash(gridPos); - // get start of bucket for this cell - uint bucketStart = pCellStart[gridHash]; - if (bucketStart == 0xffffffff) - { - return; // cell empty - } - // iterate over bodies in this cell - uint2 sortedData = pHash[index]; - uint unsorted_indx = sortedData.y; - btCuda3F1U min0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2); - btCuda3F1U max0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1); - uint handleIndex = min0.uw; - uint2 start_curr = pPairBuffStartCurr[handleIndex]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; - uint curr_max = start_curr_next.x - start - 1; - uint bucketEnd = bucketStart + BT3DGRIDparams.m_maxBodiesPerCell; - bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd; - for(uint index2 = bucketStart; index2 < bucketEnd; index2++) - { - uint2 cellData = pHash[index2]; - if (cellData.x != gridHash) - { - break; // no longer in same bucket - } - uint unsorted_indx2 = cellData.y; - if (unsorted_indx2 < unsorted_indx) // check not colliding with self - { - btCuda3F1U min1 = BT3DGRIDFETCH(pAABB, unsorted_indx2*2); - btCuda3F1U max1 = BT3DGRIDFETCH(pAABB, unsorted_indx2*2 + 1); - if(cudaTestAABBOverlap(min0, max0, min1, max1)) - { - uint handleIndex2 = min1.uw; - uint k; - for(k = 0; k < curr; k++) - { - uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); - if(old_pair == handleIndex2) - { - pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; - break; - } - } - if(k == curr) - { - pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG; - if(curr >= curr_max) - { // not a good solution, but let's avoid crash - break; - } - curr++; - } - } - } - } - pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, curr); - return; -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__global__ void -findOverlappingPairsD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, - uint2* pPairBuffStartCurr, uint numBodies) -{ - int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x; - if(index >= (int)numBodies) - { - return; - } - uint2 sortedData = pHash[index]; - uint unsorted_indx = sortedData.y; - btCuda3F1U bbMin = BT3DGRIDFETCH(pAABB, unsorted_indx*2); - btCuda3F1U bbMax = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1); - float4 pos; - pos.x = (bbMin.fx + bbMax.fx) * 0.5f; - pos.y = (bbMin.fy + bbMax.fy) * 0.5f; - pos.z = (bbMin.fz + bbMax.fz) * 0.5f; - // get address in grid - int3 gridPos = btCuda_calcGridPos(pos); - // examine only neighbouring cells - for(int z=-1; z<=1; z++) { - for(int y=-1; y<=1; y++) { - for(int x=-1; x<=1; x++) { - findPairsInCell(gridPos + BT3DGRIDmake_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies); - } - } - } -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__global__ void -findPairsLargeD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, - uint2* pPairBuffStartCurr, uint numBodies, uint numLarge) -{ - int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x; - if(index >= (int)numBodies) - { - return; - } - uint2 sortedData = pHash[index]; - uint unsorted_indx = sortedData.y; - btCuda3F1U min0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2); - btCuda3F1U max0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1); - uint handleIndex = min0.uw; - uint2 start_curr = pPairBuffStartCurr[handleIndex]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; - uint curr_max = start_curr_next.x - start - 1; - for(uint i = 0; i < numLarge; i++) - { - uint indx2 = numBodies + i; - btCuda3F1U min1 = BT3DGRIDFETCH(pAABB, indx2*2); - btCuda3F1U max1 = BT3DGRIDFETCH(pAABB, indx2*2 + 1); - if(cudaTestAABBOverlap(min0, max0, min1, max1)) - { - uint k; - uint handleIndex2 = min1.uw; - for(k = 0; k < curr; k++) - { - uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); - if(old_pair == handleIndex2) - { - pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; - break; - } - } - if(k == curr) - { - pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG; - if(curr >= curr_max) - { // not a good solution, but let's avoid crash - break; - } - curr++; - } - } - } - pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, curr); - return; -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, btCuda3F1U* pAABB, uint numBodies) -{ - int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x; - if(index >= (int)numBodies) - { - return; - } - btCuda3F1U bbMin = pAABB[index * 2]; - uint handleIndex = bbMin.uw; - uint2 start_curr = pPairBuffStartCurr[handleIndex]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint *pInp = pPairBuff + start; - uint num_changes = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - num_changes++; - } - } - pPairScan[index+1] = num_changes; -} - -//---------------------------------------------------------------------------------------- - -BT3DGRID__global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, btCuda3F1U* pAABB, uint numBodies) -{ - int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x; - if(index >= (int)numBodies) - { - return; - } - btCuda3F1U bbMin = pAABB[index * 2]; - uint handleIndex = bbMin.uw; - uint2 start_curr = pPairBuffStartCurr[handleIndex]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint* pInp = pPairBuff + start; - uint* pOut = pPairOut + pPairScan[index]; - uint* pOut2 = pInp; - uint num = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - *pOut = *pInp; - pOut++; - } - if((*pInp) & BT_CUDA_PAIR_ANY_FLG) - { - *pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG); - pOut2++; - num++; - } - } - pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, num); -} // squeezeOverlappingPairBuffD() - - -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -// E N D O F K E R N E L F U N C T I O N S -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- - -extern "C" -{ - -//Round a / b to nearest higher integer value -int BT3DGRIDPREF(iDivUp)(int a, int b) -{ - return (a % b != 0) ? (a / b + 1) : (a / b); -} - -// compute grid and thread block size for a given number of elements -void BT3DGRIDPREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads) -{ - numThreads = BT3DGRIDmin(blockSize, n); - numBlocks = BT3DGRIDPREF(iDivUp)(n, numThreads); -} - -void BT3DGRIDPREF(calcHashAABB)(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies) -{ - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); - // execute the kernel - BT3DGRIDEXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies)); - // check if kernel invocation generated an error - CUT_CHECK_ERROR("calcHashAABBD kernel execution failed"); -} - -void BT3DGRIDPREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells)) -{ - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); - MY_CUDA_SAFE_CALL(BT3DGPRDMemset(cellStart, 0xffffffff, numCells*sizeof(uint))); - BT3DGRIDEXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies)); - CUT_CHECK_ERROR("Kernel execution failed: findCellStartD"); -} - -void BT3DGRIDPREF(findOverlappingPairs(btCuda3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies)) -{ -#if B_CUDA_USE_TEX - MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(btCuda3F1U))); -#endif - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); - BT3DGRIDEXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies)); - CUT_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD"); -#if B_CUDA_USE_TEX - MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex)); -#endif - } - - - -void BT3DGRIDPREF(findPairsLarge(btCuda3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge)) -{ -#if B_CUDA_USE_TEX - MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(btCuda3F1U))); -#endif - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 64, numBlocks, numThreads); - BT3DGRIDEXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge)); - CUT_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD"); -#if B_CUDA_USE_TEX - MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex)); -#endif - } - - -void BT3DGRIDPREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies)) -{ - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); - BT3DGRIDEXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies)); - CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD"); - } - - -void BT3DGRIDPREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies)) -{ - int numThreads, numBlocks; - BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads); - BT3DGRIDEXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies)); - CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD"); -} // btCuda_squeezeOverlappingPairBuff() - - -} // extern "C" diff --git a/Extras/CUDA/btCudaBroadphase.cpp b/Extras/CUDA/btCudaBroadphase.cpp index 44e22fa87..d69c63809 100644 --- a/Extras/CUDA/btCudaBroadphase.cpp +++ b/Extras/CUDA/btCudaBroadphase.cpp @@ -1,6 +1,6 @@ /* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. @@ -18,23 +18,32 @@ subject to the following restrictions: #include "LinearMath/btAlignedAllocator.h" #include "LinearMath/btQuickprof.h" #include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" -#include "btCudaBroadphaseKernel.h" + #include "btCudaBroadphase.h" #include "radixsort.cuh" -//#include "vector_functions.h" + +//-------------------------------------------------------------------------- + +#define BT_GPU_PREF(func) btCuda_##func +#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" +#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h" +#undef BT_GPU_PREF + +extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams); //-------------------------------------------------------------------------- #include + //-------------------------------------------------------------------------- -btCudaBroadphase::btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, - int gridSizeX, int gridSizeY, int gridSizeZ, - int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, - int maxBodiesPerCell, - btScalar cellFactorAABB) : - bt3DGridBroadphase(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerBody, maxBodiesPerCell,cellFactorAABB) +btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, + int maxSmallProxiesPerCell) : + btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell) { _initialize(); } // btCudaBroadphase::btCudaBroadphase() @@ -65,7 +74,7 @@ void btCudaBroadphase::_initialize() btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); unsigned int numAABB = m_maxHandles + m_maxLargeHandles; - btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(btCuda3F1U) * 2); + btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2); btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int)); @@ -77,13 +86,6 @@ void btCudaBroadphase::_initialize() void btCudaBroadphase::_finalize() { assert(m_bInitialized); - delete [] m_hBodiesHash; - delete [] m_hCellStart; - delete [] m_hPairBuffStartCurr; - delete [] m_hAABB; - delete [] m_hPairBuff; - delete [] m_hPairScan; - delete [] m_hPairOut; btCuda_freeArray(m_dBodiesHash[0]); btCuda_freeArray(m_dBodiesHash[1]); btCuda_freeArray(m_dCellStart); @@ -92,10 +94,6 @@ void btCudaBroadphase::_finalize() btCuda_freeArray(m_dPairBuff); btCuda_freeArray(m_dPairScan); btCuda_freeArray(m_dPairOut); - - btAlignedFree(m_pLargeHandlesRawPtr); - - m_bInitialized = false; } // btCudaBroadphase::_finalize() //-------------------------------------------------------------------------- @@ -108,14 +106,14 @@ void btCudaBroadphase::_finalize() void btCudaBroadphase::prepareAABB() { - bt3DGridBroadphase::prepareAABB(); - btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(btCuda3F1U) * 2 * (m_numHandles + m_numLargeHandles)); + btGpu3DGridBroadphase::prepareAABB(); + btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles)); return; } // btCudaBroadphase::prepareAABB() //-------------------------------------------------------------------------- -void btCudaBroadphase::setParameters(btCudaBroadphaseParams* hostParams) +void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams) { btCuda_setParameters(hostParams); return; @@ -183,7 +181,7 @@ void btCudaBroadphase::computePairCacheChanges() void btCudaBroadphase::scanOverlappingPairBuff() { btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1)); - bt3DGridBroadphase::scanOverlappingPairBuff(); + btGpu3DGridBroadphase::scanOverlappingPairBuff(); btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1)); return; } // btCudaBroadphase::scanOverlappingPairBuff() @@ -199,3 +197,11 @@ void btCudaBroadphase::squeezeOverlappingPairBuff() } // btCudaBroadphase::squeezeOverlappingPairBuff() //-------------------------------------------------------------------------- + +void btCudaBroadphase::resetPool(btDispatcher* dispatcher) +{ + btGpu3DGridBroadphase::resetPool(dispatcher); + btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); +} // btCudaBroadphase::resetPool() + +//-------------------------------------------------------------------------- diff --git a/Extras/CUDA/btCudaBroadphase.cu b/Extras/CUDA/btCudaBroadphase.cu index 225a5495c..7d73dfcde 100644 --- a/Extras/CUDA/btCudaBroadphase.cu +++ b/Extras/CUDA/btCudaBroadphase.cu @@ -1,31 +1,17 @@ /* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ #include #include @@ -34,41 +20,27 @@ #include "cutil_math.h" #include "math_constants.h" -#if defined(__APPLE__) || defined(MACOSX) -#include -#else -#include -#endif - -#include - -#include "btCudaBroadphaseKernel.h" -//#include "radixsort.cu" +#include //---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -// K E R N E L F U N C T I O N S -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- -//---------------------------------------------------------------------------------------- + +#include "btCudaDefines.h" + //---------------------------------------------------------------------------------------- -#ifdef __DEVICE_EMULATION__ - #define B_CUDA_USE_TEX 0 -#else - #define B_CUDA_USE_TEX 1 -#endif +#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" +#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h" -__device__ inline btCuda3F1U tex_fetch3F1U(float4 a) { return *((btCuda3F1U*)(&a)); } +//---------------------------------------------------------------------------------------- -#if B_CUDA_USE_TEX - #define BT3DGRIDFETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i)) -#else - #define BT3DGRIDFETCH(t, i) t[i] -#endif +__device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); } + +//---------------------------------------------------------------------------------------- + +void btCuda_exit(int val); + +//---------------------------------------------------------------------------------------- texture particleHashTex; texture cellStartTex; @@ -76,98 +48,30 @@ texture pAABBTex; //---------------------------------------------------------------------------------------- -__constant__ btCudaBroadphaseParams params; +__constant__ bt3DGridBroadphaseParams params; //---------------------------------------------------------------------------------------- -#define BT3DGRID__device__ __device__ -#define BT3DGRIDmax(a, b) max(a, b) -#define BT3DGRIDmin(a, b) min(a, b) -#define BT3DGRIDparams params -#define BT3DGRID__mul24(a, b) __mul24(a, b) -#define BT3DGRID__global__ __global__ -#define BT3DGRID__shared__ __shared__ -#define BT3DGRID__syncthreads() __syncthreads() -#define BT3DGRIDmake_uint2(x, y) make_uint2(x, y) -#define BT3DGRIDmake_int3(x, y, z) make_int3(x, y, z) -#define BT3DGRIDPREF(func) btCuda_##func -#define BT3DGPRDMemset cudaMemset -#define BT3DGRIDblockIdx blockIdx -#define BT3DGRIDblockDim blockDim -#define BT3DGRIDthreadIdx threadIdx -#define BT3DGRIDEXECKERNEL(numb, numt, kfunc, args) kfunc<<>>args +extern "C" +{ //---------------------------------------------------------------------------------------- -//! Check for CUDA error -# define CUT_CHECK_ERROR(errorMessage) do { \ - cudaError_t err = cudaGetLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - btCuda_exit(EXIT_FAILURE); \ - } \ - err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - btCuda_exit(EXIT_FAILURE); \ - } } while (0) - - -# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \ - cudaError err = call; \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - btCuda_exit(EXIT_FAILURE); \ - } } while (0) - -# define MY_CUDA_SAFE_CALL( call) do { \ - MY_CUDA_SAFE_CALL_NO_SYNC(call); \ - cudaError err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - btCuda_exit(EXIT_FAILURE); \ - } } while (0) - -//---------------------------------------------------------------------------------------- - -void btCuda_exit(int val) -{ - exit(val); -} - -void btCuda_allocateArray(void** devPtr, unsigned int size) -{ - MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size)); -} - -void btCuda_freeArray(void* devPtr) -{ - MY_CUDA_SAFE_CALL(cudaFree(devPtr)); -} - -void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size) -{ - MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost)); -} - -void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size) -{ - MY_CUDA_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice)); -} - -void btCuda_setParameters(btCudaBroadphaseParams* hostParams) +void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams) { // copy parameters to constant memory - MY_CUDA_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(btCudaBroadphaseParams))); -} + BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams))); +} // btCuda_setParameters() //---------------------------------------------------------------------------------------- -#include "bt3DGridBroadphaseFunc.h" +} // extern "C" //---------------------------------------------------------------------------------------- +#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h" + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + diff --git a/Extras/CUDA/btCudaBroadphase.h b/Extras/CUDA/btCudaBroadphase.h index 89ca382b8..1cdc6284d 100644 --- a/Extras/CUDA/btCudaBroadphase.h +++ b/Extras/CUDA/btCudaBroadphase.h @@ -1,6 +1,6 @@ /* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. @@ -13,108 +13,23 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +//---------------------------------------------------------------------------------------- + #ifndef CUDA_BROADPHASE_H #define CUDA_BROADPHASE_H +//---------------------------------------------------------------------------------------- + #include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" -#include "btCudaBroadphaseKernel.h" +#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h" +#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h" +//---------------------------------------------------------------------------------------- -///The bt3DGridBroadphase uses CUDA to compute overlapping pairs using a GPU. -class bt3DGridBroadphase : public btSimpleBroadphase -{ -protected: - bool m_bInitialized; - unsigned int m_numBodies; - unsigned int m_numCells; - unsigned int m_maxPairsPerBody; - btScalar m_cellFactorAABB; - unsigned int m_maxBodiesPerCell; - btCudaBroadphaseParams m_params; - btScalar m_maxRadius; - // CPU data - unsigned int* m_hBodiesHash; - unsigned int* m_hCellStart; - unsigned int* m_hPairBuffStartCurr; - btCuda3F1U* m_hAABB; - unsigned int* m_hPairBuff; - unsigned int* m_hPairScan; - unsigned int* m_hPairOut; -// large proxies - int m_numLargeHandles; - int m_maxLargeHandles; - int m_LastLargeHandleIndex; - btSimpleBroadphaseProxy* m_pLargeHandles; - void* m_pLargeHandlesRawPtr; - int m_firstFreeLargeHandle; - int allocLargeHandle() - { - btAssert(m_numLargeHandles < m_maxLargeHandles); - int freeLargeHandle = m_firstFreeLargeHandle; - m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree(); - m_numLargeHandles++; - if(freeLargeHandle > m_LastLargeHandleIndex) - { - m_LastLargeHandleIndex = freeLargeHandle; - } - return freeLargeHandle; - } - void freeLargeHandle(btSimpleBroadphaseProxy* proxy) - { - int handle = int(proxy - m_pLargeHandles); - btAssert((handle >= 0) && (handle < m_maxHandles)); - if(handle == m_LastLargeHandleIndex) - { - m_LastLargeHandleIndex--; - } - proxy->SetNextFree(m_firstFreeLargeHandle); - m_firstFreeLargeHandle = handle; - proxy->m_clientObject = 0; - m_numLargeHandles--; - } - bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax); - bool isLargeProxy(btBroadphaseProxy* proxy); -// debug - unsigned int m_numPairsAdded; - unsigned int m_numPairsRemoved; - unsigned int m_numOverflows; -// -public: - bt3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, - int gridSizeX, int gridSizeY, int gridSizeZ, - int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, - int maxBodiesPerCell = 8, - btScalar cellFactorAABB = btScalar(1.0f)); - virtual ~bt3DGridBroadphase(); - virtual void calculateOverlappingPairs(btDispatcher* dispatcher); +///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs - virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy); - virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher); - virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback); -protected: - void _initialize(); - void _finalize(); - void addPairsToCache(btDispatcher* dispatcher); - void addLarge2LargePairsToCache(btDispatcher* dispatcher); - -// overrides for CPU version - virtual void setParameters(btCudaBroadphaseParams* hostParams); - virtual void prepareAABB(); - virtual void calcHashAABB(); - virtual void sortHash(); - virtual void findCellStart(); - virtual void findOverlappingPairs(); - virtual void findPairsLarge(); - virtual void computePairCacheChanges(); - virtual void scanOverlappingPairBuff(); - virtual void squeezeOverlappingPairBuff(); -}; - - - -///The btCudaBroadphase uses CUDA to compute overlapping pairs using a GPU. -class btCudaBroadphase : public bt3DGridBroadphase +class btCudaBroadphase : public btGpu3DGridBroadphase { protected: // GPU data @@ -122,15 +37,15 @@ protected: unsigned int* m_dCellStart; unsigned int* m_dPairBuff; unsigned int* m_dPairBuffStartCurr; - btCuda3F1U* m_dAABB; + bt3DGrid3F1U* m_dAABB; unsigned int* m_dPairScan; unsigned int* m_dPairOut; public: - btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, - int gridSizeX, int gridSizeY, int gridSizeZ, - int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, - int maxBodiesPerCell = 8, - btScalar cellFactorAABB = btScalar(1.0f)); + btCudaBroadphase( btOverlappingPairCache* overlappingPairCache, + const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies, + int maxSmallProxiesPerCell = 8); virtual ~btCudaBroadphase(); protected: void _initialize(); @@ -138,7 +53,7 @@ protected: void allocateArray(void** devPtr, unsigned int size); void freeArray(void* devPtr); // overrides for CUDA version - virtual void setParameters(btCudaBroadphaseParams* hostParams); + virtual void setParameters(bt3DGridBroadphaseParams* hostParams); virtual void prepareAABB(); virtual void calcHashAABB(); virtual void sortHash(); @@ -148,5 +63,7 @@ protected: virtual void computePairCacheChanges(); virtual void scanOverlappingPairBuff(); virtual void squeezeOverlappingPairBuff(); + virtual void resetPool(btDispatcher* dispatcher); }; + #endif //CUDA_BROADPHASE_H \ No newline at end of file diff --git a/Extras/CUDA/btCudaBroadphaseKernel.h b/Extras/CUDA/btCudaBroadphaseKernel.h deleted file mode 100644 index a54370097..000000000 --- a/Extras/CUDA/btCudaBroadphaseKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -// Keep this file free from Bullet headers -// it is included into CUDA program -//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -#ifndef CUDA_BROADPHASE_KERNEL_H -#define CUDA_BROADPHASE_KERNEL_H - -#define CUDA_BROADPHASE_USE_CUDA 1 - -#define BT_CUDA_PAIR_FOUND_FLG (0x40000000) -#define BT_CUDA_PAIR_NEW_FLG (0x20000000) -#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG) - -struct btCudaBroadphaseParams -{ - unsigned int m_gridSizeX; - unsigned int m_gridSizeY; - unsigned int m_gridSizeZ; - unsigned int m_numCells; - float m_worldOriginX; - float m_worldOriginY; - float m_worldOriginZ; - float m_cellSizeX; - float m_cellSizeY; - float m_cellSizeZ; - unsigned int m_numBodies; - unsigned int m_maxBodiesPerCell; -}; - -struct btCuda3F1U -{ - float fx; - float fy; - float fz; - unsigned int uw; -}; - - -extern "C" -{ -// CPU functions - void bt3DGrid_setParameters(btCudaBroadphaseParams* hostParams); - void bt3DGrid_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies); - void bt3DGrid_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells); - void bt3DGrid_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash, - unsigned int* pCellStart, - unsigned int* pPairBuff, - unsigned int* pPairBuffStartCurr, - unsigned int numBodies); - void bt3DGrid_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash, - unsigned int* pCellStart, - unsigned int* pPairBuff, - unsigned int* pPairBuffStartCurr, - unsigned int numBodies, - unsigned int numLarge); - - void bt3DGrid_computePairCacheChanges( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, - unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies); - void bt3DGrid_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, - unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies); - -// CUDA functions - void btCuda_allocateArray(void** devPtr, unsigned int size); - void btCuda_freeArray(void* devPtr); - void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size); - void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size); - void btCuda_setParameters(btCudaBroadphaseParams* hostParams); - void btCuda_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies); - void btCuda_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells); - void btCuda_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash, - unsigned int* pCellStart, - unsigned int* pPairBuff, - unsigned int* pPairBuffStartCurr, - unsigned int numBodies); - void btCuda_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash, - unsigned int* pCellStart, - unsigned int* pPairBuff, - unsigned int* pPairBuffStartCurr, - unsigned int numBodies, - unsigned int numLarge); - - void btCuda_computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, - unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies); - void btCuda_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, - unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies); -} - - -#endif // CUDA_BROADPHASE_KERNEL_H \ No newline at end of file diff --git a/Extras/CUDA/btCudaDefines.h b/Extras/CUDA/btCudaDefines.h new file mode 100644 index 000000000..e0e09714e --- /dev/null +++ b/Extras/CUDA/btCudaDefines.h @@ -0,0 +1,138 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +// Common preprocessor definitions for CUDA compiler + +//---------------------------------------------------------------------------------------- + +#ifndef BTCUDADEFINES_H +#define BTCUDADEFINES_H + +//---------------------------------------------------------------------------------------- + +#ifdef __DEVICE_EMULATION__ + #define B_CUDA_USE_TEX 0 +#else + #define B_CUDA_USE_TEX 1 +#endif + + +#if B_CUDA_USE_TEX + #define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i)) + #define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i) +#else + #define BT_GPU_FETCH(t, i) t[i] + #define BT_GPU_FETCH4(t, i) t[i] +#endif + +//---------------------------------------------------------------------------------------- + +#define BT_GPU___device__ __device__ +#define BT_GPU___devdata__ __device__ +#define BT_GPU___constant__ __constant__ +#define BT_GPU_max(a, b) max(a, b) +#define BT_GPU_min(a, b) min(a, b) +#define BT_GPU_params params +#define BT_GPU___mul24(a, b) __mul24(a, b) +#define BT_GPU___global__ __global__ +#define BT_GPU___shared__ __shared__ +#define BT_GPU___syncthreads() __syncthreads() +#define BT_GPU_make_uint2(x, y) make_uint2(x, y) +#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z) +#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z) +#define BT_GPU_make_float34(x) make_float3(x) +#define BT_GPU_make_float31(x) make_float3(x) +#define BT_GPU_make_float42(a, b) make_float4(a, b) +#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d) +#define BT_GPU_PREF(func) btCuda_##func +#define BT_GPU_Memset cudaMemset +#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c) +#define BT_GPU_blockIdx blockIdx +#define BT_GPU_blockDim blockDim +#define BT_GPU_threadIdx threadIdx +#define BT_GPU_dot(a, b) dot(a, b) +#define BT_GPU_dot4(a, b) dot(a, b) +#define BT_GPU_cross(a, b) cross(a, b) +#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d) +#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a) +#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<>>args + +//---------------------------------------------------------------------------------------- + +//! Check for CUDA error +#define BT_GPU_CHECK_ERROR(errorMessage) \ + do \ + { \ + cudaError_t err = cudaGetLastError(); \ + if(err != cudaSuccess) \ + { \ + fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\ + errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\ + btCuda_exit(EXIT_FAILURE); \ + } \ + err = cudaThreadSynchronize(); \ + if(err != cudaSuccess) \ + { \ + fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\ + errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\ + btCuda_exit(EXIT_FAILURE); \ + } \ + } \ + while(0) + +//---------------------------------------------------------------------------------------- + +#define BT_GPU_SAFE_CALL_NO_SYNC(call) \ + do \ + { \ + cudaError err = call; \ + if(err != cudaSuccess) \ + { \ + fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ + __FILE__, __LINE__, cudaGetErrorString( err) ); \ + btCuda_exit(EXIT_FAILURE); \ + } \ + } \ + while(0) + +//---------------------------------------------------------------------------------------- + +#define BT_GPU_SAFE_CALL(call) \ + do \ + { \ + BT_GPU_SAFE_CALL_NO_SYNC(call); \ + cudaError err = cudaThreadSynchronize(); \ + if(err != cudaSuccess) \ + { \ + fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\ + __FILE__, __LINE__, cudaGetErrorString( err) ); \ + btCuda_exit(EXIT_FAILURE); \ + } \ + } while (0) + +//---------------------------------------------------------------------------------------- + +extern "C" void btCuda_exit(int val); + +//---------------------------------------------------------------------------------------- + +#endif // BTCUDADEFINES_H + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + + diff --git a/Extras/CUDA/btCudaUtils.cu b/Extras/CUDA/btCudaUtils.cu new file mode 100644 index 000000000..f1ff53757 --- /dev/null +++ b/Extras/CUDA/btCudaUtils.cu @@ -0,0 +1,91 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include +#include +#include + +#include +#include + +#include "cutil_math.h" +#include "math_constants.h" + +#include + +//---------------------------------------------------------------------------------------- + + +#include "btCudaDefines.h" +#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + + +//---------------------------------------------------------------------------------------- + +void btCuda_exit(int val) +{ + fprintf(stderr, "Press ENTER key to terminate the program\n"); + getchar(); + exit(val); +} + +void btCuda_allocateArray(void** devPtr, unsigned int size) +{ + BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size)); +} + +void btCuda_freeArray(void* devPtr) +{ + BT_GPU_SAFE_CALL(cudaFree(devPtr)); +} + +void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size) +{ + BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost)); +} + +void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size) +{ + BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice)); +} + + +void btCuda_registerGLBufferObject(unsigned int vbo) +{ + BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo)); +} + +void* btCuda_mapGLBufferObject(unsigned int vbo) +{ + void *ptr; + BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo)); + return ptr; +} + +void btCuda_unmapGLBufferObject(unsigned int vbo) +{ + BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo)); +} + +//---------------------------------------------------------------------------------------- + +#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h" + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + diff --git a/Demos/Gpu2dDemo/btGpuDemo2dCudaFunc.cu b/Extras/CUDA/btGpuDemo2dCudaFunc.cu similarity index 87% rename from Demos/Gpu2dDemo/btGpuDemo2dCudaFunc.cu rename to Extras/CUDA/btGpuDemo2dCudaFunc.cu index ff4c9b538..f7232d959 100644 --- a/Demos/Gpu2dDemo/btGpuDemo2dCudaFunc.cu +++ b/Extras/CUDA/btGpuDemo2dCudaFunc.cu @@ -17,20 +17,20 @@ subject to the following restrictions: #include #include -#include "../../Extras/CUDA/cutil_math.h" +#include "cutil_math.h" #include "math_constants.h" #include //---------------------------------------------------------------------------------------- -#include "../../Extras/CUDA/btCudaDefines.h" +#include "btCudaDefines.h" //---------------------------------------------------------------------------------------- #include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" -#include "btGpuDemo2dSharedTypes.h" -#include "btGpuDemo2dSharedDefs.h" +#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h" +#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h" //---------------------------------------------------------------------------------------- @@ -38,7 +38,7 @@ texture posTex; //---------------------------------------------------------------------------------------- -#include "btGpuDemo2dSharedCode.h" +#include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h" //---------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------- diff --git a/Extras/CUDA/libbulletcuda.vcproj b/Extras/CUDA/libbulletcuda.vcproj index 3466f0f3c..82518d122 100644 --- a/Extras/CUDA/libbulletcuda.vcproj +++ b/Extras/CUDA/libbulletcuda.vcproj @@ -506,14 +506,6 @@ - - - - @@ -545,8 +537,52 @@ + + + + + + + + + + + + + + + + diff --git a/Extras/CUDA/param.cpp b/Extras/CUDA/param.cpp deleted file mode 100644 index bad5fecc4..000000000 --- a/Extras/CUDA/param.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include - -const Param dummy("error"); diff --git a/Extras/CUDA/param.h b/Extras/CUDA/param.h deleted file mode 100644 index 0570af1dd..000000000 --- a/Extras/CUDA/param.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - Simple parameter system - sgreen@nvidia.com 4/2001 -*/ - -#ifndef PARAM_H -#define PARAM_H - -#ifdef _WIN32 -# pragma warning(disable:4786) // No stupid debug warnings -#endif - -#include -#include -#include -#include -#include -#include - -// base class for named parameter -class ParamBase { -public: - ParamBase(char *name) { m_name = name; } - - virtual ~ParamBase() { } - - std::string *GetName() { return &m_name; } - - virtual float GetFloatValue() = 0; - virtual int GetIntValue() = 0; - - virtual std::string GetValueString() = 0; - - virtual void Reset() = 0; - virtual void Increment() = 0; - virtual void Decrement() = 0; - - virtual float GetPercentage() = 0; - virtual void SetPercentage(float p) = 0; - - virtual void Write(std::ostream &stream) = 0; - virtual void Read(std::istream &stream) = 0; - - virtual bool IsList() = 0; - -protected: - std::string m_name; -}; - -// derived class for single-valued parameter -template class Param : public ParamBase { -public: - Param(char *name, T value = 0, T min = 0, T max = 10000, T step = 1, T* ptr = 0) - : ParamBase(name) - { - if (ptr) { - m_ptr = ptr; - } else { - m_ptr = &m_value; - } - *m_ptr = value; - m_default = value; - - m_min = min; - m_max = max; - m_step = step; - } - - virtual ~Param() { } - - virtual float GetFloatValue() { return (float) *m_ptr; } - virtual int GetIntValue() { return (int) *m_ptr; } - - T GetValue() const { return *m_ptr; } - T SetValue(const T value) { *m_ptr = value; } - - // inherited functions - std::string GetValueString() - { - std::ostringstream ost; - ost< m_max) - *m_ptr = m_max; - } - - void Decrement() - { - *m_ptr -= m_step; - if (*m_ptr < m_min) - *m_ptr = m_min; - } - - void Write(std::ostream &stream) { stream << m_name << " " << *m_ptr << '\n'; } - void Read(std::istream &stream) { stream >> m_name >> *m_ptr; } - - bool IsList() { return false; } - -private: - T m_value; - T *m_ptr; // pointer to value declared elsewhere - T m_default, m_min, m_max; - T m_step; -}; - - -extern const Param dummy; - -// list of parameters -class ParamList : public ParamBase { -public: - ParamList(char *name = "") - : ParamBase(name) - { - active = true; - } - - virtual ~ParamList() { } - - virtual float GetFloatValue() { return 0.0f; } - virtual int GetIntValue() { return 0; } - - void AddParam(ParamBase *param) - { - m_params.push_back(param); - m_map[*param->GetName()] = param; - m_current = m_params.begin(); - } - - // look-up parameter based on name - ParamBase *GetParam(char *name) - { - ParamBase *p = m_map[name]; - - if (p) - return p; - else - return (ParamBase *) &dummy; - } - - ParamBase *GetParam(int i) - { - return m_params[i]; - } - - ParamBase *GetCurrent() - { - return *m_current; - } - - int GetSize() { return (int)m_params.size(); } - - // inherited functions - std::string GetValueString() - { -// return m_name; - return "list"; - } - - void Reset() - { - m_current = m_params.begin(); - } - - void Increment() - { - m_current++; - if (m_current == m_params.end()) - m_current = m_params.begin(); - } - - void Decrement() - { - if (m_current == m_params.begin()) - m_current = m_params.end()-1; - else - m_current--; - - } - - float GetPercentage() { return 0.0f; } - void SetPercentage(float /*p*/) { ; } - - void Write(std::ostream &stream) - { - stream << m_name << '\n'; - for(std::vector::const_iterator p = m_params.begin(); p != m_params.end(); ++p) { - (*p)->Write(stream); - } - } - - void Read(std::istream &stream) - { - stream >> m_name; - for(std::vector::const_iterator p = m_params.begin(); p != m_params.end(); ++p) { - (*p)->Read(stream); - } - } - - bool IsList() { return true; } - -protected: - bool active; - std::vector m_params; - std::map m_map; - std::vector::const_iterator m_current; -}; - -#endif diff --git a/Extras/CUDA/paramgl.cpp b/Extras/CUDA/paramgl.cpp deleted file mode 100644 index 39e74e5d6..000000000 --- a/Extras/CUDA/paramgl.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - ParamListGL - - class derived from ParamList to do simple OpenGL rendering of a parameter list - sgg 8/2001 -*/ - -#include -#include - -void beginWinCoords(void) -{ - glMatrixMode(GL_MODELVIEW); - glPushMatrix(); - glLoadIdentity(); - glTranslatef(0.0, glutGet(GLUT_WINDOW_HEIGHT) - 1, 0.0); - glScalef(1.0, -1.0, 1.0); - - glMatrixMode(GL_PROJECTION); - glPushMatrix(); - glLoadIdentity(); - glOrtho(0, glutGet(GLUT_WINDOW_WIDTH), 0, glutGet(GLUT_WINDOW_HEIGHT), -1, 1); - - glMatrixMode(GL_MODELVIEW); -} - -void endWinCoords(void) -{ - glMatrixMode(GL_PROJECTION); - glPopMatrix(); - - glMatrixMode(GL_MODELVIEW); - glPopMatrix(); -} - -void glPrint(int x, int y, const char *s, void *font) -{ - int i, len; - - glRasterPos2f(x, y); - len = (int) strlen(s); - for (i = 0; i < len; i++) { - glutBitmapCharacter(font, s[i]); - } -} - -void glPrintShadowed(int x, int y, const char *s, void *font, float *color) -{ - glColor3f(0.0, 0.0, 0.0); - glPrint(x-1, y-1, s, font); - - glColor3fv((GLfloat *) color); - glPrint(x, y, s, font); -} - - -ParamListGL::ParamListGL(char *name) : ParamList(name) -{ - font = (void *) GLUT_BITMAP_9_BY_15; -// font = (void *) GLUT_BITMAP_8_BY_13; - - bar_x = 250; - bar_w = 250; - bar_h = 10; - bar_offset = 5; - text_x = 5; - separation = 15; - value_x = 200; - font_h = 15; - start_x = 0; - start_y = 0; - - text_col_selected[0] = 1.0; - text_col_selected[1] = 1.0; - text_col_selected[2] = 1.0; - - text_col_unselected[0] = 0.75; - text_col_unselected[1] = 0.75; - text_col_unselected[2] = 0.75; - - bar_col_outer[0] = 0.0; - bar_col_outer[1] = 0.0; - bar_col_outer[2] = 0.0; - - bar_col_inner[0] = 0.0; - bar_col_inner[1] = 0.0; - bar_col_inner[2] = 0.0; - - text_col_shadow[0] = 0.0; - text_col_shadow[1] = 0.0; - text_col_shadow[2] = 0.0; -} - -void -ParamListGL::Render(int x, int y, bool shadow) -{ - beginWinCoords(); - - start_x = x; start_y = y; - - for(std::vector::const_iterator p = m_params.begin(); p != m_params.end(); ++p) { - if ((*p)->IsList()) { - ParamListGL *list = (ParamListGL *) (*p); - list->Render(x+10, y); - y += separation*list->GetSize(); - - } else { - if (p == m_current) - glColor3fv(text_col_selected); - else - glColor3fv(text_col_unselected); - - if (shadow) { - glPrintShadowed(x + text_x, y + font_h, (*p)->GetName()->c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected); - glPrintShadowed(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected); - } - else { - glPrint(x + text_x, y + font_h, (*p)->GetName()->c_str(), font); - glPrint(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font); - } - -// glColor3fv((GLfloat *) &bar_col_outer); - glBegin(GL_LINE_LOOP); - glVertex2f(x + bar_x, y + bar_offset); - glVertex2f(x + bar_x + bar_w, y + bar_offset); - glVertex2f(x + bar_x + bar_w, y + bar_offset + bar_h); - glVertex2f(x + bar_x, y + bar_offset + bar_h); - glEnd(); - -// glColor3fv((GLfloat *) &bar_col_inner); - glRectf(x + bar_x, y + bar_offset + bar_h, x + bar_x + (bar_w*(*p)->GetPercentage()), y + bar_offset); - - y += separation; - } - - } - - endWinCoords(); -} - - -bool -ParamListGL::Mouse(int x, int y, int button, int state) -{ - if ((y < start_y) || (y > (int)(start_y + (separation * m_params.size()) - 1))) - return false; - - int i = (y - start_y) / separation; - - if ((button==GLUT_LEFT_BUTTON) && (state==GLUT_DOWN)) { -#if defined(__GNUC__) && (__GNUC__ < 3) - m_current = &m_params[i]; -#else - // MJH: workaround since the version of vector::at used here is non-standard - for (m_current = m_params.begin(); m_current != m_params.end() && i > 0; m_current++, i--); - //m_current = (std::vector::const_iterator)&m_params.at(i); -#endif - - if ((x > bar_x) && (x < bar_x + bar_w)) { - Motion(x, y); - } - } - return true; -} - -bool -ParamListGL::Motion(int x, int y) -{ - if ((y < start_y) || (y > start_y + (separation * (int)m_params.size()) - 1) ) - return false; - - if (x < bar_x) { - (*m_current)->SetPercentage(0.0); - return true; - } - - if (x > bar_x + bar_w) { - (*m_current)->SetPercentage(1.0); - return true; - } - - (*m_current)->SetPercentage((x-bar_x) / (float) bar_w); - return true; -} - -void -ParamListGL::Special(int key, int /*x*/, int /*y*/) -{ - switch(key) { - case GLUT_KEY_DOWN: - Increment(); - break; - case GLUT_KEY_UP: - Decrement(); - break; - case GLUT_KEY_RIGHT: - GetCurrent()->Increment(); - break; - case GLUT_KEY_LEFT: - GetCurrent()->Decrement(); - break; - case GLUT_KEY_HOME: - GetCurrent()->Reset(); - break; - case GLUT_KEY_END: - GetCurrent()->SetPercentage(1.0); - break; - } - glutPostRedisplay(); -} diff --git a/Extras/CUDA/paramgl.h b/Extras/CUDA/paramgl.h deleted file mode 100644 index 31aa00a54..000000000 --- a/Extras/CUDA/paramgl.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - ParamListGL - - class derived from ParamList to do simple OpenGL rendering of a parameter list - sgg 8/2001 -*/ - -#ifndef PARAMGL_H -#define PARAMGL_H - -#if defined(__APPLE__) || defined(MACOSX) -#include -#else -#include -#endif - -#include - -void beginWinCoords(); -void endWinCoords(); -void glPrint(int x, int y, const char *s, void *font); -void glPrintShadowed(int x, int y, const char *s, void *font, float *color); - -class ParamListGL : public ParamList { -public: - ParamListGL(char *name = ""); - - void Render(int x, int y, bool shadow = false); - bool Mouse(int x, int y, int button=GLUT_LEFT_BUTTON, int state=GLUT_DOWN); - bool Motion(int x, int y); - void Special(int key, int x, int y); - - void SetSelectedColor(float r, float g, float b) { text_col_selected[0] = r; text_col_selected[1] = g; text_col_selected[2] = b; } - void SetUnSelectedColor(float r, float g, float b) { text_col_unselected[0] = r; text_col_unselected[1] = g; text_col_unselected[2] = b; } - - int bar_x; - int bar_w; - int bar_h; - int text_x; - int separation; - int value_x; - int font_h; - int start_x, start_y; - int bar_offset; - - float text_col_selected[3]; - float text_col_unselected[3]; - float text_col_shadow[3]; - float bar_col_outer[3]; - float bar_col_inner[3]; - - void *font; -}; - -#endif diff --git a/Extras/CUDA/particleSystem.cpp b/Extras/CUDA/particleSystem.cpp deleted file mode 100644 index c75bfd7dd..000000000 --- a/Extras/CUDA/particleSystem.cpp +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -#include "particleSystem.h" -#include "particleSystem.cuh" -#include "radixsort.cuh" -#include "particles_kernel.cuh" - -//#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include "../../Demos/OpenGL/GLDebugDrawer.h" - -#include "btCudaBroadphase.h" - - -#ifndef CUDART_PI_F -#define CUDART_PI_F 3.141592654f -#endif - -#define USE_BULLET 1 - -#define VEL_DIR_FACT (30.0F) -#define ACC_DIR_FACT (VEL_DIR_FACT*VEL_DIR_FACT) -#define VEL_INV_FACT (1.0F/VEL_DIR_FACT) -#define ACC_INV_FACT (1.0F/ACC_DIR_FACT) - -GLDebugDrawer debugDrawer; - - -ParticleSystem::ParticleSystem(uint numParticles, uint3 gridSize) : - m_bInitialized(false), - m_numParticles(numParticles), - m_hPos(0), - m_hVel(0), - m_currentPosRead(0), - m_currentVelRead(0), - m_currentPosWrite(1), - m_currentVelWrite(1), - m_gridSize(gridSize), - m_maxParticlesPerCell(4), - m_timer(0), - m_solverIterations(1), -// m_simulationMode(SIMULATION_CUDA) - m_simulationMode(SIMULATION_BULLET_CPU) -{ - m_dPos[0] = m_dPos[1] = 0; - m_dVel[0] = m_dVel[1] = 0; - - m_numGridCells = m_gridSize.x*m_gridSize.y*m_gridSize.z; - float3 worldSize = make_float3(2.0f, 2.0f, 2.0f); - - // set simulation parameters - m_params.gridSize = m_gridSize; - m_params.numCells = m_numGridCells; - m_params.numBodies = m_numParticles; - m_params.maxParticlesPerCell = m_maxParticlesPerCell; - - m_params.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f); - m_params.cellSize = make_float3(worldSize.x / m_gridSize.x, worldSize.y / m_gridSize.y, worldSize.z / m_gridSize.z); - - m_params.particleRadius = m_params.cellSize.x * 0.5f; - m_params.colliderPos = make_float4(0.0f, -0.7f, 0.0f, 1.0f); - m_params.colliderRadius = 0.2f; - - m_params.spring = 0.5f; - m_params.damping = 0.02f; - m_params.shear = 0.1f; - m_params.attraction = 0.0f; - m_params.boundaryDamping = -0.5f; - - m_params.gravity = make_float3(0.0f, -0.0003f, 0.0f); - m_params.globalDamping = 1.0f; - - _initialize(numParticles); - -#if USE_BULLET - initializeBullet(); -#endif -} - -ParticleSystem::~ParticleSystem() -{ -#if USE_BULLET - finalizeBullet(); -#endif - _finalize(); - m_numParticles = 0; -} - -uint -ParticleSystem::createVBO(uint size) -{ - GLuint vbo; - glGenBuffers(1, &vbo); - glBindBuffer(GL_ARRAY_BUFFER, vbo); - glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - registerGLBufferObject(vbo); - return vbo; -} - -inline float lerp(float a, float b, float t) -{ - return a + t*(b-a); -} - -void colorRamp(float t, float *r) -{ - const int ncolors = 7; - float c[ncolors][3] = { - { 1.0, 0.0, 0.0, }, - { 1.0, 0.5, 0.0, }, - { 1.0, 1.0, 0.0, }, - { 0.0, 1.0, 0.0, }, - { 0.0, 1.0, 1.0, }, - { 0.0, 0.0, 1.0, }, - { 1.0, 0.0, 1.0, }, - }; - t = t * (ncolors-1); - int i = (int) t; - float u = t - floor(t); - r[0] = lerp(c[i][0], c[i+1][0], u); - r[1] = lerp(c[i][1], c[i+1][1], u); - r[2] = lerp(c[i][2], c[i+1][2], u); -} - -void -ParticleSystem::_initialize(int numParticles) -{ - assert(!m_bInitialized); - - m_numParticles = numParticles; - - // allocate host storage - m_hPos = new float[m_numParticles*4]; - m_hVel = new float[m_numParticles*4]; - memset(m_hPos, 0, m_numParticles*4*sizeof(float)); - memset(m_hVel, 0, m_numParticles*4*sizeof(float)); - - m_hGridCounters = new uint[m_numGridCells]; - m_hGridCells = new uint[m_numGridCells*m_maxParticlesPerCell]; - memset(m_hGridCounters, 0, m_numGridCells*sizeof(uint)); - memset(m_hGridCells, 0, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); - - m_hParticleHash = new uint[m_numParticles*2]; - memset(m_hParticleHash, 0, m_numParticles*2*sizeof(uint)); - - m_hCellStart = new uint[m_numGridCells]; - memset(m_hCellStart, 0, m_numGridCells*sizeof(uint)); - - // allocate GPU data - unsigned int memSize = sizeof(float) * 4 * m_numParticles; - - m_posVbo[0] = createVBO(memSize); - m_posVbo[1] = createVBO(memSize); - - allocateArray((void**)&m_dVel[0], memSize); - allocateArray((void**)&m_dVel[1], memSize); - - allocateArray((void**)&m_dSortedPos, memSize); - allocateArray((void**)&m_dSortedVel, memSize); - -#if USE_SORT - allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint)); - allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint)); - allocateArray((void**)&m_dCellStart, m_numGridCells*sizeof(uint)); -#else - allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint)); - allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); -#endif - - m_colorVBO = createVBO(m_numParticles*4*sizeof(float)); - -#if 1 - // fill color buffer - glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO); - float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY); - float *ptr = data; - for(uint i=0; i 1.0f - m_params.particleRadius) { pos.x = 1.0f - m_params.particleRadius; vel.x *= m_params.boundaryDamping; } -// if (pos.x < -1.0f + m_params.particleRadius) { pos.x = -1.0f + m_params.particleRadius; vel.x *= m_params.boundaryDamping;} -// if (pos.y > 1.0f - m_params.particleRadius) { pos.y = 1.0f - m_params.particleRadius; vel.y *= m_params.boundaryDamping; } -// if (pos.y < -1.0f + m_params.particleRadius) { pos.y = -1.0f + m_params.particleRadius; vel.y *= m_params.boundaryDamping;} -// if (pos.z > 1.0f - m_params.particleRadius) { pos.z = 1.0f - m_params.particleRadius; vel.z *= m_params.boundaryDamping; } -// if (pos.z < -1.0f + m_params.particleRadius) { pos.z = -1.0f + m_params.particleRadius; vel.z *= m_params.boundaryDamping;} - btTransform& trans = m_bulletParticles[i]->getWorldTransform(); - trans.setOrigin(btVector3(pos.x, pos.y, pos.z)); - m_bulletParticles[i]->setLinearVelocity(btVector3(vel.x, vel.y, vel.z)*btScalar(VEL_DIR_FACT)); - m_bulletParticles[i]->setAngularVelocity(btVector3(0,0,0)); - } - - glUnmapBufferARB(GL_ARRAY_BUFFER); - - std::swap(m_currentPosRead, m_currentPosWrite); - std::swap(m_currentVelRead, m_currentVelWrite); - - btTransform& collTrans = m_bulletCollider->getWorldTransform(); - collTrans.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z)); - - m_dynamicsWorld->stepSimulation(deltaTime); - - glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]); - hPos = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY); - - //sync transform and velocity from Bullet to particle system - for (uint i=0;igetWorldTransform(); - hPos[i*4] = trans.getOrigin().getX(); - hPos[i*4+1] = trans.getOrigin().getY(); - hPos[i*4+2] = trans.getOrigin().getZ(); - hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX() * VEL_INV_FACT; - hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY() * VEL_INV_FACT; - hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ() * VEL_INV_FACT; - } - copyBuffersFromHostToDevice(); - - collTrans = m_bulletCollider->getWorldTransform(); - m_params.colliderPos.x = collTrans.getOrigin().getX(); - m_params.colliderPos.y = collTrans.getOrigin().getY(); - m_params.colliderPos.z = collTrans.getOrigin().getZ(); - -} - - - -void -ParticleSystem::updateCuda(float deltaTime) -{ -#ifndef BT_NO_PROFILE - CProfileManager::Reset(); -#endif //BT_NO_PROFILE - BT_PROFILE("update CUDA"); - // update constants - setParameters(&m_params); - - // integrate - { - BT_PROFILE("integrate"); - integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite], - m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite], - deltaTime, - m_numParticles); - } - std::swap(m_currentPosRead, m_currentPosWrite); - std::swap(m_currentVelRead, m_currentVelWrite); - -#if USE_SORT - // sort and search method - - // calculate hash - { - BT_PROFILE("calcHash"); - calcHash(m_posVbo[m_currentPosRead], - m_dParticleHash[0], - m_numParticles); - } - -#if DEBUG_GRID - copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); - printf("particle hash:\n"); - for(uint i=0; i maxPerCell) - maxPerCell = m_hGridCounters[i]; - if (m_hGridCounters[i] > 0) { - printf("%d (%d): ", i, m_hGridCounters[i]); - for(uint j=0; jsetDebugDrawer(&debugDrawer); - //debugDrawer.setDebugMode(btIDebugDraw::DBG_DrawPairs); - - -// m_dynamicsWorld->setGravity(100*btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z)); - m_dynamicsWorld->setGravity(btScalar(ACC_DIR_FACT) * btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z)); - m_dynamicsWorld->getSolverInfo().m_numIterations=1; - - btRigidBody* body; - - btCollisionShape* boxShape = new btBoxShape(btVector3(btScalar(1.2),btScalar(0.05),btScalar(1.2))); -// boxShape->setMargin(0.03f); - - btScalar mass(0.); - btVector3 localInertia(0,0,0); - btRigidBody::btRigidBodyConstructionInfo boxRbcInfo(mass, 0, boxShape, localInertia); - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, -1.05f,0)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 1.05f,0)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(-1.05f, 0, 0)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(1.05f, 0, 0)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, -1.05f)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - boxRbcInfo.m_startWorldTransform.setIdentity(); - boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0); - boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, 1.05f)); - boxRbcInfo.m_friction = 0.0f; - body = new btRigidBody(boxRbcInfo); - m_dynamicsWorld->addRigidBody(body); - - - - unsigned int i; - - btSphereShape* particleSphere = new btSphereShape(m_params.particleRadius); - particleSphere->setMargin(0.0); - particleSphere->calculateLocalInertia(1,localInertia); - - reset(CONFIG_GRID); - - for (i=0;isetActivationState(DISABLE_DEACTIVATION); - m_bulletParticles.push_back(body); - m_dynamicsWorld->addRigidBody(body); - } - - btSphereShape* colliderSphere = new btSphereShape(m_params.colliderRadius); - colliderSphere->setMargin(0.0); - colliderSphere->calculateLocalInertia(10., localInertia); - btRigidBody::btRigidBodyConstructionInfo rbci(5., 0, colliderSphere,localInertia); - rbci.m_startWorldTransform.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z)); - body = new btRigidBody(rbci); - body->setActivationState(DISABLE_DEACTIVATION); - m_bulletCollider = body; - m_dynamicsWorld->addRigidBody(body); - -/* for (i=0;i<6;i++) - { - btVector4 planeEq; - worldBox->getPlaneEquation(planeEq,i); - - planeShape = new btStaticPlaneShape(-planeEq,planeEq.getW()); - planeShape->setMargin(0.f); - btRigidBody::btRigidBodyConstructionInfo rbci(0.f,0,planeShape); - body = new btRigidBody(rbci); - m_dynamicsWorld->addRigidBody(body); - } -*/ -} - -void ParticleSystem::finalizeBullet() -{ - delete m_dynamicsWorld; - delete m_constraintSolver; - delete m_broadphase; - delete m_dispatcher ; - delete m_collisionConfiguration; -} - -float* ParticleSystem::copyBuffersFromDeviceToHost() -{ - copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*m_numParticles); - // fill color buffer - glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]); - float* hPosData = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY); - return hPosData; -} - -void ParticleSystem::copyBuffersFromHostToDevice() -{ - glUnmapBufferARB(GL_ARRAY_BUFFER); - copyArrayToDevice(m_dVel[m_currentVelRead],m_hVel, 0, sizeof(float)*4*m_numParticles); -} - - -void ParticleSystem::debugDraw() -{ -#if USE_BULLET - glDisable(GL_DEPTH_TEST); - m_dynamicsWorld->debugDrawWorld(); - glEnable(GL_DEPTH_TEST); -#endif -} - - diff --git a/Extras/CUDA/particleSystem.cu b/Extras/CUDA/particleSystem.cu deleted file mode 100644 index e6430cfd2..000000000 --- a/Extras/CUDA/particleSystem.cu +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -//#include -#include -#include -#include - -#if defined(__APPLE__) || defined(MACOSX) -#include -#else -#include -#endif - -#include - -#include "particles_kernel.cu" -//#include "radixsort.cu" - - //! Check for CUDA error -# define CUT_CHECK_ERROR(errorMessage) do { \ - cudaError_t err = cudaGetLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - mm_exit(EXIT_FAILURE); \ - } \ - err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - mm_exit(EXIT_FAILURE); \ - } } while (0) - - -# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \ - cudaError err = call; \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - mm_exit(EXIT_FAILURE); \ - } } while (0) - -# define MY_CUDA_SAFE_CALL( call) do { \ - MY_CUDA_SAFE_CALL_NO_SYNC(call); \ - cudaError err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - mm_exit(EXIT_FAILURE); \ - } } while (0) - - - -extern "C" -{ -void mm_exit(int val) -{ - exit(val); -} - -void cudaInit(int argc, char **argv) -{ -// CUT_DEVICE_INIT(argc, argv); -} - -void allocateArray(void **devPtr, size_t size) -{ - MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size)); -} - -void freeArray(void *devPtr) -{ - MY_CUDA_SAFE_CALL(cudaFree(devPtr)); -} - -void threadSync() -{ - MY_CUDA_SAFE_CALL(cudaThreadSynchronize()); -} - -void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size) -{ - if (vbo) - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&device, vbo)); - MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost)); - if (vbo) - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vbo)); -} - -void copyArrayToDevice(void* device, const void* host, int offset, int size) -{ - MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice)); -} - -void registerGLBufferObject(uint vbo) -{ - MY_CUDA_SAFE_CALL(cudaGLRegisterBufferObject(vbo)); -} - -void unregisterGLBufferObject(uint vbo) -{ - MY_CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(vbo)); -} - -void setParameters(SimParams *hostParams) -{ - // copy parameters to constant memory - MY_CUDA_SAFE_CALL( cudaMemcpyToSymbol(params, hostParams, sizeof(SimParams)) ); -} - -//Round a / b to nearest higher integer value -int iDivUp(int a, int b){ - return (a % b != 0) ? (a / b + 1) : (a / b); -} - -// compute grid and thread block size for a given number of elements -void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads) -{ - numThreads = min(blockSize, n); - numBlocks = iDivUp(n, numThreads); -} - -void -integrateSystem(uint vboOldPos, uint vboNewPos, - float* oldVel, float* newVel, - float deltaTime, - int numBodies) -{ - int numThreads, numBlocks; - computeGridSize(numBodies, 256, numBlocks, numThreads); - - float *oldPos, *newPos; - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos)); - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos)); - - // execute the kernel - integrate<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel, - (float4*)oldPos, (float4*)oldVel, - deltaTime); - - // check if kernel invocation generated an error - CUT_CHECK_ERROR("integrate kernel execution failed"); - - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos)); - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos)); -} - -void -updateGrid(uint vboPos, - uint* gridCounters, - uint* gridCells, - uint numBodies, - uint numCells) -{ - int numThreads, numBlocks; - computeGridSize(numBodies, 256, numBlocks, numThreads); - - float *pos; - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos)); - - MY_CUDA_SAFE_CALL(cudaMemset(gridCounters, 0, numCells*sizeof(uint))); - - // execute the kernel - updateGridD<<< numBlocks, numThreads >>>((float4 *) pos, - gridCounters, - gridCells); - - // check if kernel invocation generated an error - CUT_CHECK_ERROR("Kernel execution failed"); - - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos)); -} - - -void -calcHash(uint vboPos, - uint* particleHash, - int numBodies) -{ - int numThreads, numBlocks; - computeGridSize(numBodies, 256, numBlocks, numThreads); - - float *pos; - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos)); - - // execute the kernel - calcHashD<<< numBlocks, numThreads >>>((float4 *) pos, - (uint2 *) particleHash); - - // check if kernel invocation generated an error - CUT_CHECK_ERROR("Kernel execution failed"); - - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos)); -} - -void -reorderDataAndFindCellStart(uint* particleHash, - uint vboOldPos, - float* oldVel, - float* sortedPos, - float* sortedVel, - uint* cellStart, - uint numBodies, - uint numCells) -{ - int numThreads, numBlocks; - computeGridSize(numBodies, 256, numBlocks, numThreads); - - MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint))); - - float *oldPos; - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos)); - -#if USE_TEX - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4))); - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4))); -#endif - - reorderDataAndFindCellStartD<<< numBlocks, numThreads >>>( - (uint2 *) particleHash, - (float4 *) oldPos, - (float4 *) oldVel, - (float4 *) sortedPos, - (float4 *) sortedVel, - (uint *) cellStart); - CUT_CHECK_ERROR("Kernel execution failed: reorderDataAndFindCellStartD"); - -#if USE_TEX - MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex)); - MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex)); -#endif - - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos)); -} - -void -collide(uint vboOldPos, uint vboNewPos, - float* sortedPos, float* sortedVel, - float* oldVel, float* newVel, - uint* gridCounters, - uint* gridCells, - uint* particleHash, - uint* cellStart, - uint numBodies, - uint numCells, - uint maxParticlesPerCell) -{ - float4 *oldPos, *newPos; - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos)); - MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos)); - -#if USE_TEX - -#if USE_SORT - // use sorted arrays - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, sortedPos, numBodies*sizeof(float4))); - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, sortedVel, numBodies*sizeof(float4))); - - MY_CUDA_SAFE_CALL(cudaBindTexture(0, particleHashTex, particleHash, numBodies*sizeof(uint2))); - MY_CUDA_SAFE_CALL(cudaBindTexture(0, cellStartTex, cellStart, numCells*sizeof(uint))); -#else - - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4))); - MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4))); - - MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCountersTex, gridCounters,numCells*sizeof(uint))); - MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCellsTex, gridCells, numCells*maxParticlesPerCell*sizeof(uint))); -#endif - -#endif - - // thread per particle - int numThreads, numBlocks; - computeGridSize(numBodies, BLOCKDIM, numBlocks, numThreads); - - // execute the kernel - collideD<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel, -#if USE_SORT - (float4*)sortedPos, (float4*)sortedVel, - (uint2 *) particleHash, - cellStart -#else - (float4*)oldPos, (float4*)oldVel, - gridCounters, - gridCells -#endif - ); - - // check if kernel invocation generated an error - CUT_CHECK_ERROR("Kernel execution failed"); - - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos)); - MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos)); - -#if USE_TEX - MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex)); - MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex)); - -#if USE_SORT - MY_CUDA_SAFE_CALL(cudaUnbindTexture(particleHashTex)); - MY_CUDA_SAFE_CALL(cudaUnbindTexture(cellStartTex)); -#else - MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCountersTex)); - MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCellsTex)); -#endif -#endif -} - -} // extern "C" diff --git a/Extras/CUDA/particleSystem.cuh b/Extras/CUDA/particleSystem.cuh deleted file mode 100644 index e2b297fc6..000000000 --- a/Extras/CUDA/particleSystem.cuh +++ /dev/null @@ -1,57 +0,0 @@ -extern "C" -{ -void cudaInit(int argc, char **argv); - -void allocateArray(void **devPtr, int size); -void freeArray(void *devPtr); - -void threadSync(); - -void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size); -void copyArrayToDevice(void* device, const void* host, int offset, int size); -void registerGLBufferObject(unsigned int vbo); -void unregisterGLBufferObject(unsigned int vbo); - -void setParameters(SimParams *hostParams); - -void -integrateSystem(uint vboOldPos, uint vboNewPos, - float* oldVel, float* newVel, - float deltaTime, - int numBodies); - -void -updateGrid(uint vboPos, - uint* gridCounters, - uint* gridCells, - uint numBodies, - uint numCells); - -void -calcHash(uint vboPos, - uint* particleHash, - int numBodies); - -void -reorderDataAndFindCellStart(uint* particleHash, - uint vboOldPos, - float* oldVel, - float* sortedPos, - float* sortedVel, - uint* cellStart, - uint numBodies, - uint numCells); - -void -collide(uint vboOldPos, uint vboNewPos, - float* sortedPos, float* sortedVel, - float* oldVel, float* newVel, - uint* gridCounters, - uint* gridCells, - uint* particleHash, - uint* cellStart, - uint numBodies, - uint numCells, - uint maxParticlesPerCell); - -} diff --git a/Extras/CUDA/particleSystem.h b/Extras/CUDA/particleSystem.h deleted file mode 100644 index cebf01b7d..000000000 --- a/Extras/CUDA/particleSystem.h +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -#ifndef __BODYSYSTEMCUDA_H__ -#define __BODYSYSTEMCUDA_H__ - -#define DEBUG_GRID 0 -#define DO_TIMING 0 - -#include "particles_kernel.cuh" -#include "vector_functions.h" - -#include "LinearMath/btAlignedObjectArray.h" - -// CUDA BodySystem: runs on the GPU -class ParticleSystem -{ -public: - ParticleSystem(uint numParticles, uint3 gridSize); - ~ParticleSystem(); - - enum ParticleConfig - { - CONFIG_RANDOM, - CONFIG_GRID, - _NUM_CONFIGS - }; - - enum ParticleArray - { - POSITION, - VELOCITY, - }; - - enum SimulationMode - { - SIMULATION_CUDA, - SIMULATION_BULLET_CPU, - SIMULATION_NUM_MODES - }; - - - void update(float deltaTime); - void updateCuda(float deltaTime); - void updateBullet(float deltaTime); - void reset(ParticleConfig config); - - float* getArray(ParticleArray array); - void setArray(ParticleArray array, const float* data, int start, int count); - - int getNumParticles() const { return m_numParticles; } - - unsigned int getCurrentReadBuffer() const { return m_posVbo[m_currentPosRead]; } - unsigned int getColorBuffer() const { return m_colorVBO; } - - void dumpGrid(); - void dumpParticles(uint start, uint count); - - void setIterations(int i) { m_solverIterations = i; } - - void setDamping(float x) { m_params.globalDamping = x; } - void setGravity(float x) { m_params.gravity = make_float3(0.0f, x, 0.0f); } - - void setCollideSpring(float x) { m_params.spring = x; } - void setCollideDamping(float x) { m_params.damping = x; } - void setCollideShear(float x) { m_params.shear = x; } - void setCollideAttraction(float x) { m_params.attraction = x; } - - void setColliderPos(float4 x) { m_params.colliderPos = x; } - - float getParticleRadius() { return m_params.particleRadius; } - float4 getColliderPos() { return m_params.colliderPos; } - float getColliderRadius() { return m_params.colliderRadius; } - uint3 getGridSize() { return m_params.gridSize; } - float3 getWorldOrigin() { return m_params.worldOrigin; } - float3 getCellSize() { return m_params.cellSize; } - - void addSphere(int index, float *pos, float *vel, int r, float spacing); - - SimulationMode getSimulationMode() const - { - return m_simulationMode; - } - - void setSimulationMode(SimulationMode mode) - { - m_simulationMode=mode; - } - - void debugDraw(); - -protected: // methods - ParticleSystem() {} - uint createVBO(uint size); - - void _initialize(int numParticles); - void _finalize(); - - void initGrid(uint *size, float spacing, float jitter, uint numParticles); - - -protected: - // Bullet data - void initializeBullet(); - void finalizeBullet(); - class btDiscreteDynamicsWorld* m_dynamicsWorld; - class btDefaultCollisionConfiguration* m_collisionConfiguration; - class btCollisionDispatcher* m_dispatcher; -// class btCudaBroadphase* m_broadphase; - class btBroadphaseInterface* m_broadphase; - class btSequentialImpulseConstraintSolver* m_constraintSolver; - btAlignedObjectArray m_bulletParticles; - btRigidBody* m_bulletCollider; - - - float* copyBuffersFromDeviceToHost(); - void copyBuffersFromHostToDevice(); - -protected: // data - bool m_bInitialized; - uint m_numParticles; - - // CPU data - float* m_hPos; - float* m_hVel; - - uint* m_hGridCounters; - uint* m_hGridCells; - - uint* m_hParticleHash; - uint* m_hCellStart; - - // GPU data - float* m_dPos[2]; - float* m_dVel[2]; - - float* m_dSortedPos; - float* m_dSortedVel; - - // uniform grid data - uint* m_dGridCounters; // counts number of entries per grid cell - uint* m_dGridCells; // contains indices of up to "m_maxParticlesPerCell" particles per cell - - uint* m_dParticleHash[2]; - uint* m_dCellStart; - - uint m_posVbo[2]; - uint m_colorVBO; - - uint m_currentPosRead, m_currentVelRead; - uint m_currentPosWrite, m_currentVelWrite; - - // params - SimParams m_params; - uint3 m_gridSize; - uint m_numGridCells; - uint m_maxParticlesPerCell; - - uint m_timer; - - uint m_solverIterations; - - SimulationMode m_simulationMode; - -}; - -#endif // __BODYSYSTEMCUDA_H__ diff --git a/Extras/CUDA/particles.cpp b/Extras/CUDA/particles.cpp deleted file mode 100644 index 09f1bf9f0..000000000 --- a/Extras/CUDA/particles.cpp +++ /dev/null @@ -1,571 +0,0 @@ -/* - * Copyright 1993-2007 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -/* - Particle system example with collisions using uniform grid -*/ - -#include -#include -#include -#include -//#include - -#include - -#if defined(__APPLE__) || defined(MACOSX) -#include -#else -#include -#endif - -#include "LinearMath/btQuickprof.h" - -#include "particleSystem.h" -#include "render_particles.h" -#include "paramgl.h" - -// view params -int ox, oy; -int buttonState = 0; -float camera_trans[] = {0, 0, -3}; -float camera_rot[] = {0, 0, 0}; -float camera_trans_lag[] = {0, 0, -3}; -float camera_rot_lag[] = {0, 0, 0}; -const float inertia = 0.1; -ParticleRenderer::DisplayMode displayMode = ParticleRenderer::PARTICLE_SPHERES; - -int mode = 0; -bool displayEnabled = true; -bool bPause = false; -bool displaySliders = false; -bool wireframe = false; - -enum { M_VIEW = 0, M_MOVE }; - -uint numParticles = 0; -uint3 gridSize; -int numIterations = 0; // run until exit - -// simulation parameters -float timestep = 0.5f; -float damping = 1.0f; -float gravity = 0.0003f; -int iterations = 1; -int ballr = 10; - -float collideSpring = 0.5f;; -float collideDamping = 0.02f;; -float collideShear = 0.1f; -float collideAttraction = 0.0f; - -ParticleSystem *psystem = 0; - -// fps -static int fpsCount = 0; -static int fpsLimit = 1; -unsigned int timer; - -ParticleRenderer *renderer = 0; - -float modelView[16]; - -ParamListGL *params; - -extern "C" void cudaInit(int argc, char **argv); - -void init(int numParticles, uint3 gridSize) -{ - psystem = new ParticleSystem(numParticles, gridSize); - psystem->reset(ParticleSystem::CONFIG_GRID); - - renderer = new ParticleRenderer; - renderer->setParticleRadius(psystem->getParticleRadius()); - renderer->setColorBuffer(psystem->getColorBuffer()); - -// CUT_SAFE_CALL(cutCreateTimer(&timer)); -} - -void initGL() -{ - glewInit(); - if (!glewIsSupported("GL_VERSION_2_0 GL_VERSION_1_5 GL_ARB_multitexture GL_ARB_vertex_buffer_object")) { - fprintf(stderr, "Required OpenGL extensions missing."); - exit(-1); - } - - glEnable(GL_DEPTH_TEST); - glClearColor(0.25, 0.25, 0.25, 1.0); - - glutReportErrors(); -} - - -void display() -{ - // update the simulation - if (!bPause) - { - psystem->setIterations(iterations); - psystem->setDamping(damping); - psystem->setGravity(-gravity); - psystem->setCollideSpring(collideSpring); - psystem->setCollideDamping(collideDamping); - psystem->setCollideShear(collideShear); - psystem->setCollideAttraction(collideAttraction); - - psystem->update(timestep); - renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles()); - } - - // render - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - // view transform - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - for (int c = 0; c < 3; ++c) - { - camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia; - camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia; - } - glTranslatef(camera_trans_lag[0], camera_trans_lag[1], camera_trans_lag[2]); - glRotatef(camera_rot_lag[0], 1.0, 0.0, 0.0); - glRotatef(camera_rot_lag[1], 0.0, 1.0, 0.0); - - glGetFloatv(GL_MODELVIEW_MATRIX, modelView); - - // cube - glColor3f(1.0, 1.0, 1.0); - glutWireCube(2.0); - - // collider - glPushMatrix(); - float4 p = psystem->getColliderPos(); - glTranslatef(p.x, p.y, p.z); - glColor3f(1.0, 0.0, 0.0); - glutSolidSphere(psystem->getColliderRadius(), 20, 10); - glPopMatrix(); - - if (displayEnabled) - { - renderer->display(displayMode); - } - - if (displaySliders) { - glDisable(GL_DEPTH_TEST); - glBlendFunc(GL_ONE_MINUS_DST_COLOR, GL_ZERO); // invert color - glEnable(GL_BLEND); - params->Render(0, 0); - glDisable(GL_BLEND); - glEnable(GL_DEPTH_TEST); - } - - psystem->debugDraw(); - - glDisable(GL_DEPTH_TEST); - float offsX = 10.f; - float offsY = 10.f; - renderer->showProfileInfo(offsX, offsY, 20.f); - glEnable(GL_DEPTH_TEST); - - - glutSwapBuffers(); - - { - char fps[256]; - //float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f); - switch (psystem->getSimulationMode()) - { - case ParticleSystem::SIMULATION_CUDA: - { - sprintf(fps, "CUDA particles (%d particles)", numParticles); - break; - } - case ParticleSystem::SIMULATION_BULLET_CPU: - { - sprintf(fps, "Bullet btCudaBroadphase (%d btSphereShapes)", numParticles); - break; - } - default: - { - sprintf(fps, "Unknown simulation mode"); - } - } - glutSetWindowTitle(fps); - } - - glutReportErrors(); - -} - -void reshape(int w, int h) -{ - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - gluPerspective(60.0, (float) w / (float) h, 0.1, 10.0); - - glMatrixMode(GL_MODELVIEW); - glViewport(0, 0, w, h); - - renderer->setWindowSize(w, h); - renderer->setFOV(60.0); -} - -void mouse(int button, int state, int x, int y) -{ - int mods; - - if (state == GLUT_DOWN) - buttonState |= 1<Mouse(x, y, button, state)) { - glutPostRedisplay(); - return; - } - } - - glutPostRedisplay(); -} - -// transfrom vector by matrix -void xform(float *v, float *r, GLfloat *m) -{ - r[0] = v[0]*m[0] + v[1]*m[4] + v[2]*m[8] + m[12]; - r[1] = v[0]*m[1] + v[1]*m[5] + v[2]*m[9] + m[13]; - r[2] = v[0]*m[2] + v[1]*m[6] + v[2]*m[10] + m[14]; -} - -// transform vector by transpose of matrix -void ixform(float *v, float *r, GLfloat *m) -{ - r[0] = v[0]*m[0] + v[1]*m[1] + v[2]*m[2]; - r[1] = v[0]*m[4] + v[1]*m[5] + v[2]*m[6]; - r[2] = v[0]*m[8] + v[1]*m[9] + v[2]*m[10]; -} - -void ixformPoint(float *v, float *r, GLfloat *m) -{ - float x[4]; - x[0] = v[0] - m[12]; - x[1] = v[1] - m[13]; - x[2] = v[2] - m[14]; - x[3] = 1.0f; - ixform(x, r, m); -} - -void motion(int x, int y) -{ - float dx, dy; - dx = x - ox; - dy = y - oy; - - if (displaySliders) { - if (params->Motion(x, y)) { - ox = x; oy = y; - glutPostRedisplay(); - return; - } - } - - switch(mode) - { - case M_VIEW: - if (buttonState == 3) { - // left+middle = zoom - camera_trans[2] += (dy / 100.0) * 0.5 * fabs(camera_trans[2]); - } - else if (buttonState & 2) { - // middle = translate - camera_trans[0] += dx / 100.0; - camera_trans[1] -= dy / 100.0; - } - else if (buttonState & 1) { - // left = rotate - camera_rot[0] += dy / 5.0; - camera_rot[1] += dx / 5.0; - } - break; - - case M_MOVE: - { - float translateSpeed = 0.003f; - float4 p = psystem->getColliderPos(); - if (buttonState==1) { - float v[3], r[3]; - v[0] = dx*translateSpeed; - v[1] = -dy*translateSpeed; - v[2] = 0.0f; - ixform(v, r, modelView); - p.x += r[0]; - p.y += r[1]; - p.z += r[2]; - } else if (buttonState==2) { - float v[3], r[3]; - v[0] = 0.0f; - v[1] = 0.0f; - v[2] = dy*translateSpeed; - ixform(v, r, modelView); - p.x += r[0]; - p.y += r[1]; - p.z += r[2]; - } - psystem->setColliderPos(p); - } - break; - } - - ox = x; oy = y; - glutPostRedisplay(); -} - -inline float frand() -{ - return rand() / (float) RAND_MAX; -} - -// commented out to remove unused parameter warnings in Linux -void key(unsigned char key, int /*x*/, int /*y*/) -{ -#ifndef BT_NO_PROFILE - if (key >= 0x31 && key < 0x37) - { - int child = key-0x31; - renderer->m_profileIterator->Enter_Child(child); - } - if (key==0x30) - { - renderer->m_profileIterator->Enter_Parent(); - } -#endif //BT_NO_PROFILE - - switch (key) - { - case ' ': - bPause = !bPause; - break; - case 13: - psystem->update(timestep); - renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles()); - break; - case '\033': - case 'q': - exit(0); - break; - case 'v': - mode = M_VIEW; - break; - case 'm': - mode = M_MOVE; - break; - case 'p': - displayMode = (ParticleRenderer::DisplayMode) - ((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES); - break; - case 'd': - psystem->dumpGrid(); - break; - case 'u': - psystem->dumpParticles(0, 1); - break; - - case 'r': - displayEnabled = !displayEnabled; - break; - - case 'g': - psystem->reset(ParticleSystem::CONFIG_GRID); - break; - case 'a': - psystem->reset(ParticleSystem::CONFIG_RANDOM); - break; - case 'e': - { - // inject a sphere of particles - float pr = psystem->getParticleRadius(); - float tr = pr+(pr*2.0f)*ballr; - float pos[4], vel[4]; - pos[0] = -1.0 + tr + frand()*(2.0f - tr*2.0f); - pos[1] = 1.0f - tr; - pos[2] = -1.0 + tr + frand()*(2.0f - tr*2.0f); - pos[3] = 0.0f; - vel[0] = vel[1] = vel[2] = vel[3] = 0.0f; - psystem->addSphere(0, pos, vel, ballr, pr*2.0f); - } - break; - case 'b': - { - // shoot ball from camera - float pr = psystem->getParticleRadius(); - float vel[4], velw[4], pos[4], posw[4]; - vel[0] = 0.0f; - vel[1] = 0.0f; - vel[2] = -0.05f; - vel[3] = 0.0f; - ixform(vel, velw, modelView); - - pos[0] = 0.0f; - pos[1] = 0.0f; - pos[2] = -2.5f; - pos[3] = 1.0; - ixformPoint(pos, posw, modelView); - posw[3] = 0.0f; - - psystem->addSphere(0, posw, velw, ballr, pr*2.0f); - } - break; - - case 'w': - wireframe = !wireframe; - break; - - case 'h': - displaySliders = !displaySliders; - break; - case 's': - psystem->setSimulationMode((ParticleSystem::SimulationMode) ((psystem->getSimulationMode() + 1) % ParticleSystem::SIMULATION_NUM_MODES)); - CProfileManager::CleanupMemory(); - break; - } - - glutPostRedisplay(); -} - -void special(int k, int x, int y) -{ - if (displaySliders) { - params->Special(k, x, y); - } -} - -void idle(void) -{ - glutPostRedisplay(); -} - -void initParams() -{ - // create a new parameter list - params = new ParamListGL("misc"); - params->AddParam(new Param("time step", timestep, 0.0, 1.0, 0.01, ×tep)); - params->AddParam(new Param("iterations", iterations, 0, 10, 1, &iterations)); - params->AddParam(new Param("damping", damping, 0.0, 1.0, 0.001, &damping)); - params->AddParam(new Param("gravity", gravity, 0.0, 0.001, 0.0001, &gravity)); - params->AddParam(new Param("ball r", ballr, 1, 20, 1, &ballr)); - - params->AddParam(new Param("collide spring", collideSpring, 0.0, 1.0, 0.001, &collideSpring)); - params->AddParam(new Param("collide damping", collideDamping, 0.0, 0.1, 0.001, &collideDamping)); - params->AddParam(new Param("collide shear", collideShear, 0.0, 0.1, 0.001, &collideShear)); - params->AddParam(new Param("collide attract", collideAttraction, 0.0, 0.1, 0.001, &collideAttraction)); -} - -void mainMenu(int i) -{ - key((unsigned char) i, 0, 0); -} - -void initMenus() -{ - glutCreateMenu(mainMenu); - glutAddMenuEntry("Reset block [g]", 'g'); - glutAddMenuEntry("Reset random [a]", 'a'); - glutAddMenuEntry("Add sphere [e]", 'e'); - glutAddMenuEntry("Shoot ball [b]", 'b'); - glutAddMenuEntry("View mode [v]", 'v'); - glutAddMenuEntry("Move cursor mode [m]", 'm'); - glutAddMenuEntry("Toggle point rendering [p]", 'p'); - glutAddMenuEntry("Toggle Bullet simulation[s]", 's'); - glutAddMenuEntry("Toggle animation [ ]", ' '); - glutAddMenuEntry("Step animation [ret]", 13); - glutAddMenuEntry("Toggle sliders [h]", 'h'); - glutAddMenuEntry("Quit (esc)", '\033'); - glutAttachMenu(GLUT_RIGHT_BUTTON); -} - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int -main(int argc, char** argv) -{ -// numParticles = 65536*2; -// numParticles = 65536; -// numParticles = 32768; -// numParticles = 8192; -// numParticles = 4096; - numParticles = 2048; -// numParticles = 1024; -// numParticles = 256; -// numParticles = 32; -// numParticles = 2; - uint gridDim = 64; - numIterations = 0; - - gridSize.x = gridSize.y = gridSize.z = gridDim; - printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z); - - cudaInit(argc, argv); - - glutInit(&argc, argv); - glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE); - glutInitWindowSize(640, 480); - glutCreateWindow("CUDA particles"); - - initGL(); - init(numParticles, gridSize); - initParams(); - initMenus(); - - glutDisplayFunc(display); - glutReshapeFunc(reshape); - glutMouseFunc(mouse); - glutMotionFunc(motion); - glutKeyboardFunc(key); - glutSpecialFunc(special); - glutIdleFunc(idle); - - glutMainLoop(); - - if (psystem) - delete psystem; - - return 0; -} diff --git a/Extras/CUDA/particles.sln b/Extras/CUDA/particles.sln deleted file mode 100644 index 82c494c6d..000000000 --- a/Extras/CUDA/particles.sln +++ /dev/null @@ -1,228 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 9.00 -# Visual Studio 2005 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "particles", "particles.vcproj", "{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}" - ProjectSection(ProjectDependencies) = postProject - {F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165} - {7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3} - {61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319} - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcollision", "..\..\msvc\8\libbulletcollision.vcproj", "{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletmath", "..\..\msvc\8\libbulletmath.vcproj", "{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletdynamics", "..\..\msvc\8\libbulletdynamics.vcproj", "{61BD1097-CF2E-B296-DAA9-73A6FE135319}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcuda", "libbulletcuda.vcproj", "{F74E8E02-0B47-4816-BD0B-FAEAE3343165}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletopenglsupport", "..\..\msvc\8\libbulletopenglsupport.vcproj", "{7C428E76-9271-6284-20F0-9B38ED6931E3}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "appBasicDemo", "..\..\msvc\8\appBasicDemo.vcproj", "{3578834A-4B06-DE6F-78AC-FE11F7226D35}" - ProjectSection(ProjectDependencies) = postProject - {F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165} - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} - {61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319} - {7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - DebugDll|Win32 = DebugDll|Win32 - DebugDll|x64 = DebugDll|x64 - DebugDoublePrecision|Win32 = DebugDoublePrecision|Win32 - DebugDoublePrecision|x64 = DebugDoublePrecision|x64 - EmuDebug|Win32 = EmuDebug|Win32 - EmuDebug|x64 = EmuDebug|x64 - EmuRelease|Win32 = EmuRelease|Win32 - EmuRelease|x64 = EmuRelease|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - ReleaseDll|Win32 = ReleaseDll|Win32 - ReleaseDll|x64 = ReleaseDll|x64 - ReleaseDoublePrecision|Win32 = ReleaseDoublePrecision|Win32 - ReleaseDoublePrecision|x64 = ReleaseDoublePrecision|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.ActiveCfg = Debug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.Build.0 = Debug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.Build.0 = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|Win32.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.Build.0 = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|Win32.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.Build.0 = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.ActiveCfg = EmuDebug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.Build.0 = EmuDebug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.ActiveCfg = EmuDebug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.Build.0 = EmuDebug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.ActiveCfg = EmuRelease|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.Build.0 = EmuRelease|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.ActiveCfg = EmuRelease|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.Build.0 = EmuRelease|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.ActiveCfg = Release|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.Build.0 = Release|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.Build.0 = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|Win32.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.Build.0 = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|Win32.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.Build.0 = Release|x64 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.Build.0 = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|x64.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.Build.0 = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.Build.0 = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|x64.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.Build.0 = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|x64.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.Build.0 = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|x64.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.Build.0 = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.Build.0 = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|x64.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.Build.0 = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|x64.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.Build.0 = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|x64.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.Build.0 = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.Build.0 = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|x64.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.Build.0 = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|x64.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.ActiveCfg = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.Build.0 = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|x64.ActiveCfg = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.Build.0 = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.Build.0 = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|x64.ActiveCfg = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.ActiveCfg = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.Build.0 = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|x64.ActiveCfg = Release|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.ActiveCfg = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.Build.0 = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|x64.ActiveCfg = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.Build.0 = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.Build.0 = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|x64.ActiveCfg = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.ActiveCfg = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.Build.0 = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|x64.ActiveCfg = Release|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.ActiveCfg = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.Build.0 = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|x64.ActiveCfg = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.Build.0 = DebugDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|x64.ActiveCfg = DebugDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.Build.0 = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.Build.0 = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|x64.ActiveCfg = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.ActiveCfg = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.Build.0 = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|x64.ActiveCfg = Release|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/Extras/CUDA/particles.vcproj b/Extras/CUDA/particles.vcproj deleted file mode 100644 index 5dc6ab5e6..000000000 --- a/Extras/CUDA/particles.vcproj +++ /dev/null @@ -1,880 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Extras/CUDA/particles_kernel.cu b/Extras/CUDA/particles_kernel.cu deleted file mode 100644 index 9817cb384..000000000 --- a/Extras/CUDA/particles_kernel.cu +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -/* - * Device code. - */ - -#ifndef _PARTICLES_KERNEL_H_ -#define _PARTICLES_KERNEL_H_ - -#include -#include -#include "cutil_math.h" -#include "math_constants.h" -#include "particles_kernel.cuh" - -#if USE_TEX -// textures for particle position and velocity -texture oldPosTex; -texture oldVelTex; - -texture particleHashTex; -texture cellStartTex; - -texture gridCountersTex; -texture gridCellsTex; -#endif - -__constant__ SimParams params; - -// integrate particle attributes -__global__ void -integrate(float4* newPos, float4* newVel, - float4* oldPos, float4* oldVel, - float deltaTime) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - - float4 pos4 = oldPos[index]; - float4 vel4 = oldVel[index]; - float3 pos = make_float3(pos4); - float3 vel = make_float3(vel4); - - vel += params.gravity * deltaTime; - vel *= params.globalDamping; - - // new position = old position + velocity * deltaTime - pos += vel * deltaTime; - - // bounce off cube sides - if (pos.x > 1.0f - params.particleRadius) { pos.x = 1.0f - params.particleRadius; vel.x *= params.boundaryDamping; } - if (pos.x < -1.0f + params.particleRadius) { pos.x = -1.0f + params.particleRadius; vel.x *= params.boundaryDamping;} - if (pos.y > 1.0f - params.particleRadius) { pos.y = 1.0f - params.particleRadius; vel.y *= params.boundaryDamping; } - if (pos.y < -1.0f + params.particleRadius) { pos.y = -1.0f + params.particleRadius; vel.y *= params.boundaryDamping;} - if (pos.z > 1.0f - params.particleRadius) { pos.z = 1.0f - params.particleRadius; vel.z *= params.boundaryDamping; } - if (pos.z < -1.0f + params.particleRadius) { pos.z = -1.0f + params.particleRadius; vel.z *= params.boundaryDamping;} - - // store new position and velocity - newPos[index] = make_float4(pos, pos4.w); - newVel[index] = make_float4(vel, vel4.w); -} - -// calculate position in uniform grid -__device__ int3 calcGridPos(float4 p) -{ - int3 gridPos; - gridPos.x = floor((p.x - params.worldOrigin.x) / params.cellSize.x); - gridPos.y = floor((p.y - params.worldOrigin.y) / params.cellSize.y); - gridPos.z = floor((p.z - params.worldOrigin.z) / params.cellSize.z); - return gridPos; -} - -// calculate address in grid from position (clamping to edges) -__device__ uint calcGridHash(int3 gridPos) -{ - gridPos.x = max(0, min(gridPos.x, params.gridSize.x-1)); - gridPos.y = max(0, min(gridPos.y, params.gridSize.y-1)); - gridPos.z = max(0, min(gridPos.z, params.gridSize.z-1)); - return __mul24(__mul24(gridPos.z, params.gridSize.y), params.gridSize.x) + __mul24(gridPos.y, params.gridSize.x) + gridPos.x; -} - -// add particle to cell using atomics -__device__ void addParticleToCell(int3 gridPos, - uint index, - uint* gridCounters, - uint* gridCells) -{ - // calculate grid hash - uint gridHash = calcGridHash(gridPos); - - // increment cell counter using atomics -#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS - int counter = 0; -#else - int counter = atomicAdd(&gridCounters[gridHash], 1); // returns previous value - counter = min(counter, params.maxParticlesPerCell-1); -#endif - - // write particle index into this cell (very uncoalesced!) - gridCells[gridHash*params.maxParticlesPerCell + counter] = index; -} - - -// update uniform grid -__global__ void -updateGridD(float4* pos, - uint* gridCounters, - uint* gridCells) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - float4 p = pos[index]; - - // get address in grid - int3 gridPos = calcGridPos(p); - - addParticleToCell(gridPos, index, gridCounters, gridCells); -} - -// calculate grid hash value for each particle -__global__ void -calcHashD(float4* pos, - uint2* particleHash) -{ - int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; - float4 p = pos[index]; - - // get address in grid - int3 gridPos = calcGridPos(p); - uint gridHash = calcGridHash(gridPos); - - // store grid hash and particle index - particleHash[index] = make_uint2(gridHash, index); -} - -// rearrange particle data into sorted order, and find the start of each cell in the -// sorted hash array -__global__ void -reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by hash - float4* oldPos, - float4* oldVel, - float4* sortedPos, - float4* sortedVel, - uint* cellStart) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - - uint2 sortedData = particleHash[index]; - - // Load hash data into shared memory so that we can look - // at neighboring particle's hash value without loading - // two hash values per thread - __shared__ uint sharedHash[257]; - sharedHash[threadIdx.x+1] = sortedData.x; - if (index > 0 && threadIdx.x == 0) - { - // first thread in block must load neighbor particle hash - volatile uint2 prevData = particleHash[index-1]; - sharedHash[0] = prevData.x; - } - - __syncthreads(); - if (index == 0 || sortedData.x != sharedHash[threadIdx.x]) - { - cellStart[sortedData.x] = index; - } - - // Now use the sorted index to reorder the pos and vel data - float4 pos = FETCH(oldPos, sortedData.y); // macro does either global read or texture fetch - float4 vel = FETCH(oldVel, sortedData.y); // see particles_kernel.cuh - - sortedPos[index] = pos; - sortedVel[index] = vel; - -} - -// collide two spheres using DEM method -__device__ float3 collideSpheres(float4 posA, float4 posB, - float4 velA, float4 velB, - float radiusA, float radiusB, - float attraction) -{ - // calculate relative position - float3 relPos; - relPos.x = posB.x - posA.x; - relPos.y = posB.y - posA.y; - relPos.z = posB.z - posA.z; - - float dist = length(relPos); - float collideDist = radiusA + radiusB; - - float3 force = make_float3(0.0f); - if (dist < collideDist) { - float3 norm = relPos / dist; - - // relative velocity - float3 relVel; - relVel.x = velB.x - velA.x; - relVel.y = velB.y - velA.y; - relVel.z = velB.z - velA.z; - - // relative tangential velocity - float3 tanVel = relVel - (dot(relVel, norm) * norm); - - // spring force - force = -params.spring*(collideDist - dist) * norm; - // dashpot (damping) force - force += params.damping*relVel; - // tangential shear force - force += params.shear*tanVel; - // attraction - force += attraction*relPos; - } - - return force; -} - - -// collide particle with all particles in a given cell -// version using grid built with atomics -__device__ -float3 collideCell(int3 gridPos, - uint index, - float4 pos, - float4 vel, - float4* oldPos, - float4* oldVel, - uint* gridCounters, - uint* gridCells) -{ - float3 force = make_float3(0.0f); - - if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) { - return force; - } - - uint gridHash = calcGridHash(gridPos); - - // iterate over particles in this cell - uint particlesInCell = FETCH(gridCounters, gridHash); - particlesInCell = min(particlesInCell, params.maxParticlesPerCell-1); - - for(uint i=0; i params.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) { - return force; - } - - uint gridHash = calcGridHash(gridPos); - - // get start of bucket for this cell - uint bucketStart = FETCH(cellStart, gridHash); - if (bucketStart == 0xffffffff) - return force; // cell empty - - // iterate over particles in this cell - for(uint i=0; i - -#include -#include -#include - -#include -#include "BMF_Api.h" - -#include "LinearMath/btQuickprof.h" - -#include "render_particles.h" -#include "shaders.h" - -#ifndef M_PI -#define M_PI 3.1415926535897932384626433832795 -#endif - -ParticleRenderer::ParticleRenderer() -: m_pos(0), - m_numParticles(0), - m_pointSize(1.0f), - m_particleRadius(0.125f * 0.5f), - m_program(0), - m_vbo(0), - m_colorVBO(0) -{ - _initGL(); -#ifndef BT_NO_PROFILE - m_profileIterator = CProfileManager::Get_Iterator(); -#endif //BT_NO_PROFILE -} - -ParticleRenderer::~ParticleRenderer() -{ - m_pos = 0; -} - -void ParticleRenderer::setPositions(float *pos, int numParticles) -{ - m_pos = pos; - m_numParticles = numParticles; -} - -void ParticleRenderer::setVertexBuffer(unsigned int vbo, int numParticles) -{ - m_vbo = vbo; - m_numParticles = numParticles; -} - -void ParticleRenderer::_drawPoints() -{ - if (!m_vbo) - { - glBegin(GL_POINTS); - { - int k = 0; - for (int i = 0; i < m_numParticles; ++i) - { - glVertex3fv(&m_pos[k]); - k += 4; - } - } - glEnd(); - } - else - { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_vbo); - glVertexPointer(4, GL_FLOAT, 0, 0); - glEnableClientState(GL_VERTEX_ARRAY); - - if (m_colorVBO) { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_colorVBO); - glColorPointer(4, GL_FLOAT, 0, 0); - glEnableClientState(GL_COLOR_ARRAY); - } - - glDrawArrays(GL_POINTS, 0, m_numParticles); - - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - glDisableClientState(GL_VERTEX_ARRAY); - glDisableClientState(GL_COLOR_ARRAY); - } -} - -void ParticleRenderer::display(DisplayMode mode /* = PARTICLE_POINTS */) -{ - switch (mode) - { - case PARTICLE_POINTS: - glColor3f(1, 1, 1); - glPointSize(m_pointSize); - _drawPoints(); - break; - - default: - case PARTICLE_SPHERES: - glEnable(GL_POINT_SPRITE_ARB); - glTexEnvi(GL_POINT_SPRITE_ARB, GL_COORD_REPLACE_ARB, GL_TRUE); - glEnable(GL_VERTEX_PROGRAM_POINT_SIZE_NV); - glDepthMask(GL_TRUE); - glEnable(GL_DEPTH_TEST); - - glUseProgram(m_program); - glUniform1f( glGetUniformLocation(m_program, "pointScale"), m_window_h / tanf(m_fov*0.5f*(float)M_PI/180.0f) ); - glUniform1f( glGetUniformLocation(m_program, "pointRadius"), m_particleRadius ); - - glColor3f(1, 1, 1); - _drawPoints(); - - glUseProgram(0); - glDisable(GL_POINT_SPRITE_ARB); - break; - } -} - -GLuint -ParticleRenderer::_compileProgram(const char *vsource, const char *fsource) -{ - GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER); - GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER); - - glShaderSource(vertexShader, 1, &vsource, 0); - glShaderSource(fragmentShader, 1, &fsource, 0); - - glCompileShader(vertexShader); - glCompileShader(fragmentShader); - - GLuint program = glCreateProgram(); - - glAttachShader(program, vertexShader); - glAttachShader(program, fragmentShader); - - glLinkProgram(program); - - // check if program linked - GLint success = 0; - glGetProgramiv(program, GL_LINK_STATUS, &success); - - if (!success) { - char temp[256]; - glGetProgramInfoLog(program, 256, 0, temp); - printf("Failed to link program:\n%s\n", temp); - glDeleteProgram(program); - program = 0; - } - - return program; -} - -void ParticleRenderer::_initGL() -{ - m_program = _compileProgram(vertexShader, spherePixelShader); - -#if !defined(__APPLE__) && !defined(MACOSX) - glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FALSE); - glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE); -#endif -} - - -void ParticleRenderer::showProfileInfo(float& xOffset,float& yStart, float yIncr) -{ -#ifndef BT_NO_PROFILE - - static double time_since_reset = 0.f; -// if (!m_idle) - { - time_since_reset = CProfileManager::Get_Time_Since_Reset(); - } - beginWinCoords(); - - { - //recompute profiling data, and store profile strings - - char blockTime[128]; - - double totalTime = 0; - - int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset(); - - m_profileIterator->First(); - - double parent_time = m_profileIterator->Is_Root() ? time_since_reset : m_profileIterator->Get_Current_Parent_Total_Time(); - - { - sprintf(blockTime,"--- Profiling: %s (total running time: %.3f ms) ---", m_profileIterator->Get_Current_Parent_Name(), parent_time ); - displayProfileString(xOffset,yStart,blockTime); - yStart += yIncr; - sprintf(blockTime,"press number (1,2...) to display child timings, or 0 to go up to parent" ); - displayProfileString(xOffset,yStart,blockTime); - yStart += yIncr; - - } - double accumulated_time = 0.f; - - for (int i = 0; !m_profileIterator->Is_Done(); m_profileIterator->Next()) - { - double current_total_time = m_profileIterator->Get_Current_Total_Time(); - accumulated_time += current_total_time; - double fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; - - sprintf(blockTime,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)", - ++i, m_profileIterator->Get_Current_Name(), fraction, - (current_total_time / (double)frames_since_reset),m_profileIterator->Get_Current_Total_Calls()); - displayProfileString(xOffset,yStart,blockTime); - yStart += yIncr; - totalTime += current_total_time; - } - - sprintf(blockTime,"%s (%.3f %%) :: %.3f ms", "Unaccounted", - // (min(0, time_since_reset - totalTime) / time_since_reset) * 100); - parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); - - displayProfileString(xOffset,yStart,blockTime); - yStart += yIncr; - sprintf(blockTime,"-------------------------------------------------"); - displayProfileString(xOffset,yStart,blockTime); - yStart += yIncr; - - } - endWinCoords(); -#endif//BT_NO_PROFILE -} - - -void ParticleRenderer::displayProfileString(int xOffset,int yStart,char* message) -{ - glRasterPos3f(xOffset,yStart,0); - BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),message); -} diff --git a/Extras/CUDA/render_particles.h b/Extras/CUDA/render_particles.h deleted file mode 100644 index 6511478b1..000000000 --- a/Extras/CUDA/render_particles.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -#ifndef __RENDER_PARTICLES__ -#define __RENDER_PARTICLES__ - -class ParticleRenderer -{ -public: - ParticleRenderer(); - ~ParticleRenderer(); - - void setPositions(float *pos, int numParticles); - void setVertexBuffer(unsigned int vbo, int numParticles); - void setColorBuffer(unsigned int vbo) { m_colorVBO = vbo; } - - enum DisplayMode - { - PARTICLE_POINTS, - PARTICLE_SPHERES, - PARTICLE_NUM_MODES - }; - - void display(DisplayMode mode = PARTICLE_POINTS); - void displayGrid(); - - void setPointSize(float size) { m_pointSize = size; } - void setParticleRadius(float r) { m_particleRadius = r; } - void setFOV(float fov) { m_fov = fov; } - void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; } - - void showProfileInfo(float& xOffset,float& yStart, float yIncr); - void displayProfileString(int xOffset,int yStart,char* message); - class CProfileIterator* m_profileIterator; - -protected: // methods - void _initGL(); - void _drawPoints(); - GLuint _compileProgram(const char *vsource, const char *fsource); - -protected: // data - float *m_pos; - int m_numParticles; - - float m_pointSize; - float m_particleRadius; - float m_fov; - int m_window_w, m_window_h; - - GLuint m_program; - - GLuint m_vbo; - GLuint m_colorVBO; -}; - -#endif //__ RENDER_PARTICLES__ diff --git a/Extras/CUDA/shaders.cpp b/Extras/CUDA/shaders.cpp deleted file mode 100644 index 57a97572f..000000000 --- a/Extras/CUDA/shaders.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#define STRINGIFY(A) #A - -// vertex shader -const char *vertexShader = STRINGIFY( -uniform float pointRadius; // point size in world space -uniform float pointScale; // scale to calculate size in pixels -uniform float densityScale; -uniform float densityOffset; -void main() -{ - // calculate window-space point size - vec3 posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0)); - float dist = length(posEye); - gl_PointSize = pointRadius * (pointScale / dist); - - gl_TexCoord[0] = gl_MultiTexCoord0; - gl_Position = gl_ModelViewProjectionMatrix * vec4(gl_Vertex.xyz, 1.0); - - gl_FrontColor = gl_Color; -} -); - -// pixel shader for rendering points as shaded spheres -const char *spherePixelShader = STRINGIFY( -void main() -{ - const vec3 lightDir = vec3(0.577, 0.577, 0.577); - - // calculate normal from texture coordinates - vec3 N; - N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0); - float mag = dot(N.xy, N.xy); - if (mag > 1.0) discard; // kill pixels outside circle - N.z = sqrt(1.0-mag); - - // calculate lighting - float diffuse = max(0.0, dot(lightDir, N)); - - gl_FragColor = gl_Color * diffuse; -} -); diff --git a/Extras/CUDA/shaders.h b/Extras/CUDA/shaders.h deleted file mode 100644 index d90b40cef..000000000 --- a/Extras/CUDA/shaders.h +++ /dev/null @@ -1,2 +0,0 @@ -extern const char *vertexShader; -extern const char *spherePixelShader; diff --git a/src/BulletMultiThreaded/btGpuUtils.cpp b/src/BulletMultiThreaded/btGpuUtils.cpp new file mode 100644 index 000000000..4852ff811 --- /dev/null +++ b/src/BulletMultiThreaded/btGpuUtils.cpp @@ -0,0 +1,28 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//---------------------------------------------------------------------------------------- + +#include "btGpuDefines.h" +#include "btGpuUtilsSharedDefs.h" + +//---------------------------------------------------------------------------------------- + +#include "btGpuUtilsSharedCode.h" + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +