Added a possibility to build appGpu2dDemo with CUDA solver.

It is disabled by default, see Demos/Gpu2dDemo/btGpuDemoDynamicsWorld.h for instructions how to enable it

Cleaned up Extras/CUDA folder and libbulletcuda project
This commit is contained in:
rponom
2009-05-14 20:20:56 +00:00
parent 42ece54556
commit a95b2a33bd
35 changed files with 528 additions and 5845 deletions

View File

@@ -97,7 +97,7 @@ void ConstraintDemo::initPhysics()
trans.setOrigin(btVector3(0,20,0)); trans.setOrigin(btVector3(0,20,0));
float mass = 1.f; float mass = 1.f;
#if 1 #if 0
//point to point constraint (ball socket) //point to point constraint (ball socket)
{ {
btRigidBody* body0 = localCreateRigidBody( mass,trans,shape); btRigidBody* body0 = localCreateRigidBody( mass,trans,shape);
@@ -135,7 +135,7 @@ void ConstraintDemo::initPhysics()
} }
#endif #endif
#if 1 #if 0
//create a slider, using the generic D6 constraint //create a slider, using the generic D6 constraint
{ {
mass = 1.f; mass = 1.f;
@@ -179,7 +179,7 @@ void ConstraintDemo::initPhysics()
} }
#endif #endif
#if 1 #if 0
{ // create a door using hinge constraint attached to the world { // create a door using hinge constraint attached to the world
btCollisionShape* pDoorShape = new btBoxShape(btVector3(2.0f, 5.0f, 0.2f)); btCollisionShape* pDoorShape = new btBoxShape(btVector3(2.0f, 5.0f, 0.2f));
m_collisionShapes.push_back(pDoorShape); m_collisionShapes.push_back(pDoorShape);
@@ -201,7 +201,7 @@ void ConstraintDemo::initPhysics()
//btRigidBody* pDropBody = localCreateRigidBody( 10.0, doorTrans, shape); //btRigidBody* pDropBody = localCreateRigidBody( 10.0, doorTrans, shape);
} }
#endif #endif
#if 1 #if 0
{ // create a generic 6DOF constraint { // create a generic 6DOF constraint
btTransform tr; btTransform tr;
@@ -262,7 +262,7 @@ void ConstraintDemo::initPhysics()
pGen6DOF->setDbgDrawSize(btScalar(5.f)); pGen6DOF->setDbgDrawSize(btScalar(5.f));
} }
#endif #endif
#if 1 #if 0
{ // create a ConeTwist constraint { // create a ConeTwist constraint
btTransform tr; btTransform tr;
@@ -294,7 +294,7 @@ void ConstraintDemo::initPhysics()
pCT->setDbgDrawSize(btScalar(5.f)); pCT->setDbgDrawSize(btScalar(5.f));
} }
#endif #endif
#if 1 #if 0
{ // Hinge connected to the world, with motor (to hinge motor with new and old constraint solver) { // Hinge connected to the world, with motor (to hinge motor with new and old constraint solver)
btTransform tr; btTransform tr;
tr.setIdentity(); tr.setIdentity();
@@ -311,6 +311,63 @@ void ConstraintDemo::initPhysics()
pHinge->setDbgDrawSize(btScalar(5.f)); pHinge->setDbgDrawSize(btScalar(5.f));
} }
#endif #endif
#if 1
{
// create a universal joint using generic 6DOF constraint
// create two rigid bodies
// static bodyA (parent) on top:
btTransform tr;
tr.setIdentity();
tr.setOrigin(btVector3(btScalar(0.), btScalar(4.), btScalar(0.)));
btRigidBody* pBodyA = localCreateRigidBody( 0.0, tr, shape);
pBodyA->setActivationState(DISABLE_DEACTIVATION);
// dynamic bodyB (child) below it :
tr.setIdentity();
tr.setOrigin(btVector3(btScalar(0.), btScalar(0.), btScalar(0.)));
btRigidBody* pBodyB = localCreateRigidBody(1.0, tr, shape);
pBodyB->setActivationState(DISABLE_DEACTIVATION);
// add some (arbitrary) data to build constraint frames
btVector3 parentAxis(1.f, 0.f, 0.f);
btVector3 childAxis(0.f, 0.f, 1.f);
btVector3 anchor(0.f, 2.f, 0.f);
// build frame basis
// 6DOF constraint uses Euler angles and to define limits
// it is assumed that rotational order is :
// Z - first, allowed limits are (-PI,PI);
// new position of Y - second (allowed limits are (-PI/2 + epsilon, PI/2 - epsilon), where epsilon is a small positive number
// used to prevent constraint from instability on poles;
// new position of X, allowed limits are (-PI,PI);
// So to simulate ODE Universal joint we should use parent axis as Z, child axis as Y and limit all other DOFs
// Build the frame in world coordinate system first
btVector3 zAxis = parentAxis.normalize();
btVector3 yAxis = childAxis.normalize();
btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
btTransform frameInW;
frameInW.setIdentity();
frameInW.getBasis().setValue( xAxis[0], yAxis[0], zAxis[0],
xAxis[1], yAxis[1], zAxis[1],
xAxis[2], yAxis[2], zAxis[2]);
frameInW.setOrigin(anchor);
// now get constraint frame in local coordinate systems
btTransform frameInA = pBodyA->getCenterOfMassTransform().inverse() * frameInW;
btTransform frameInB = pBodyB->getCenterOfMassTransform().inverse() * frameInW;
// now create the constraint
btGeneric6DofConstraint* pGen6DOF = new btGeneric6DofConstraint(*pBodyA, *pBodyB, frameInA, frameInB, true);
// linear limits in our case are allowed offset of origin of frameInB in frameInA, so set them to zero
pGen6DOF->setLinearLowerLimit(btVector3(0., 0., 0.));
pGen6DOF->setLinearUpperLimit(btVector3(0., 0., 0.));
// set limits for parent (axis z) and child (axis Y)
pGen6DOF->setAngularLowerLimit(btVector3(0.f, -SIMD_HALF_PI * 0.5f, -SIMD_HALF_PI * 0.5f));
pGen6DOF->setAngularUpperLimit(btVector3(0.f, SIMD_HALF_PI * 0.5f, SIMD_HALF_PI * 0.5f));
// add constraint to world
m_dynamicsWorld->addConstraint(pGen6DOF, true);
// draw constraint frames and limits for debugging
pGen6DOF->setDbgDrawSize(btScalar(10.f));
}
#endif
} }
ConstraintDemo::~ConstraintDemo() ConstraintDemo::~ConstraintDemo()

View File

@@ -13,9 +13,9 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
#include "BulletMultiThreaded/btGpuDefines.h" //#include "BulletMultiThreaded/btGpuDefines.h"
#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" //#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "BulletMultiThreaded/btGpuUtilsSharedCode.h" //#include "BulletMultiThreaded/btGpuUtilsSharedCode.h"
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
@@ -144,6 +144,7 @@ public:
} }
btMultiSphereShape* multiSphere = new btMultiSphereShape(inertiaHalfExtents,positions,radii,numSpheres); btMultiSphereShape* multiSphere = new btMultiSphereShape(inertiaHalfExtents,positions,radii,numSpheres);
m_demo->addCollisionShape(multiSphere);
btVector3 localInertia(0,0,0); btVector3 localInertia(0,0,0);
if (mass) if (mass)
@@ -241,6 +242,13 @@ void BasicDemo::displayCallback(void) {
btOverlappingPairCache* gPairCache; btOverlappingPairCache* gPairCache;
static btScalar fRandMinMax(btScalar fMin, btScalar fMax)
{
btScalar fr = btScalar(rand()) / btScalar(RAND_MAX);
return fMax - (fMax - fMin) * fr;
}
void BasicDemo::initPhysics() void BasicDemo::initPhysics()
{ {
setTexturing(false); setTexturing(false);
@@ -363,6 +371,31 @@ void BasicDemo::initPhysics()
{ {
loader.processFile("../../test1.oec"); loader.processFile("../../test1.oec");
} }
#if 0 // perfomance test : work-in-progress
{ // add more object, but share their shapes
int numNewObjects = 500;
mass = 1.f;
for(int n_obj = 0; n_obj < numNewObjects; n_obj++)
{
btDefaultMotionState* myMotionState= 0;
btVector3 localInertia(0,0,0);
btTransform worldTransform;
worldTransform.setIdentity();
btScalar fx = fRandMinMax(-30., 30.);
btScalar fy = fRandMinMax(5., 30.);
worldTransform.setOrigin(btVector3(fx, fy, 0.f));
int sz = m_collisionShapes.size();
btMultiSphereShape* multiSphere = (btMultiSphereShape*)m_collisionShapes[1];
myMotionState = new btDefaultMotionState(worldTransform);
multiSphere->calculateLocalInertia(mass, localInertia);
btRigidBody* body = new btRigidBody(mass,myMotionState,multiSphere,localInertia);
body->setLinearFactor(btVector3(1,1,0));
body->setAngularFactor(btVector3(0,0,1));
body->setWorldTransform(worldTransform);
getDynamicsWorld()->addRigidBody(body);
}
}
#endif
#else #else
#if (!SPEC_TEST) #if (!SPEC_TEST)

View File

@@ -21,8 +21,8 @@ subject to the following restrictions:
#ifdef BT_USE_CUDA #ifdef BT_USE_CUDA
#include "btCudaDemoPairCache.h" //#include "btCudaDemoPairCache.h"
#include <vector_types.h> //#include <vector_types.h>
#endif //BT_USE_CUDA #endif //BT_USE_CUDA
class btBroadphaseInterface; class btBroadphaseInterface;
@@ -88,7 +88,7 @@ class BasicDemo : public GlutDemoApplication
void DrawConstraintInfo(); void DrawConstraintInfo();
void outputDebugInfo(int & xOffset,int & yStart, int yIncr); void outputDebugInfo(int & xOffset,int & yStart, int yIncr);
virtual void renderme(); virtual void renderme();
void addCollisionShape(btCollisionShape* pShape) { m_collisionShapes.push_back(pShape); }
}; };

View File

@@ -19,7 +19,7 @@ subject to the following restrictions:
#define FRICTION_BOX_GROUND_FACT 0.05f #define FRICTION_BOX_GROUND_FACT 0.05f
#define FRICTION_BOX_BOX_FACT 0.05f #define FRICTION_BOX_BOX_FACT 0.05f
#define USE_CENTERS 1 #define USE_CENTERS 1
#include "LinearMath/btMinMax.h" //#include "LinearMath/btMinMax.h"
//------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------
@@ -187,7 +187,8 @@ BT_GPU___device__ float computeImpulse1(float3 rVel,
if(positionConstraint > 0) if(positionConstraint > 0)
return lambdaDt; return lambdaDt;
positionConstraint = btMin(0.0f,positionConstraint+penetrationError); // positionConstraint = btMin(0.0f,positionConstraint+penetrationError);
positionConstraint = (positionConstraint+penetrationError) < 0.f ? (positionConstraint+penetrationError) : 0.0f;
lambdaDt = -(BT_GPU_dot(cNormal,rVel)*(1+collisionConstant)); lambdaDt = -(BT_GPU_dot(cNormal,rVel)*(1+collisionConstant));
lambdaDt -= (baumgarteConstant/dt*positionConstraint); lambdaDt -= (baumgarteConstant/dt*positionConstraint);
@@ -328,8 +329,6 @@ BT_GPU___device__ void collisionResolutionBox( int constrId,
int bId=constraints[constrId].y; int bId=constraints[constrId].y;
float3 aPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,aId)); float3 aPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,aId));
float3 bPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,bId)); float3 bPos=BT_GPU_make_float34(BT_GPU_FETCH4(pos,bId));
float aRot=rotation[aId];
float bRot=rotation[bId];
float3 aVel=BT_GPU_make_float34(vel[aId]); float3 aVel=BT_GPU_make_float34(vel[aId]);
float3 bVel=BT_GPU_make_float34(vel[bId]); float3 bVel=BT_GPU_make_float34(vel[bId]);
float aAngVel=angularVel[aId]; float aAngVel=angularVel[aId];
@@ -354,7 +353,8 @@ BT_GPU___device__ void collisionResolutionBox( int constrId,
{ {
float rLambdaDt=lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx]; float rLambdaDt=lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx];
float pLambdaDt=rLambdaDt; float pLambdaDt=rLambdaDt;
rLambdaDt=btMax(pLambdaDt+lambdaDt,0.0f); // rLambdaDt=btMax(pLambdaDt+lambdaDt,0.0f);
rLambdaDt=(pLambdaDt+lambdaDt) > 0.0f ? (pLambdaDt+lambdaDt) : 0.0f;
lambdaDt=rLambdaDt-pLambdaDt; lambdaDt=rLambdaDt-pLambdaDt;
lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx]=rLambdaDt; lambdaDtBox[(MAX_VTX_PER_OBJ)*(2*constrId)+iVtx]=rLambdaDt;
} }

View File

@@ -413,7 +413,7 @@ void btGpuDemoDynamicsWorld::solveConstraints2(btContactSolverInfo& solverInfo)
for(int i=0;i<nIter;i++) for(int i=0;i<nIter;i++)
{ {
btCuda_collisionWithWallBox(m_dcPos, m_dcVel, m_dcRot, m_dcAngVel,m_dShapeBuffer, m_dShapeIds, btCuda_collisionWithWallBox(m_dcPos, m_dcVel, m_dcRot, m_dcAngVel,m_dShapeBuffer, m_dShapeIds, m_dInvMass,
partProps, boxProps, m_numObj + 1, timeStep); partProps, boxProps, m_numObj + 1, timeStep);
int* pBatchIds = m_dBatchIds; int* pBatchIds = m_dBatchIds;
for(int iBatch=0;iBatch < m_maxBatches;iBatch++) for(int iBatch=0;iBatch < m_maxBatches;iBatch++)
@@ -423,7 +423,7 @@ void btGpuDemoDynamicsWorld::solveConstraints2(btContactSolverInfo& solverInfo)
m_dcPos, m_dcVel, m_dcPos, m_dcVel,
m_dcRot, m_dcAngVel, m_dcRot, m_dcAngVel,
m_dLambdaDtBox, m_dLambdaDtBox,
m_dContact, m_dContact, m_dInvMass,
partProps, iBatch, timeStep); partProps, iBatch, timeStep);
pBatchIds += numConstraints; pBatchIds += numConstraints;
} }

View File

@@ -25,17 +25,27 @@ subject to the following restrictions:
#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h" #include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h"
//#define BT_USE_CUDA 1
// To enable CUDA :
// 1. Uncomment //#define BT_USE_CUDA 1
// 2. Build and add libbulletcuda (Extras/CUDA) to project
// 3. Add $(CUDA_LIB_PATH) and cudart.lib to linker properties
#ifdef BT_USE_CUDA #ifdef BT_USE_CUDA
#include "btCudaDemoPairCache.h" // #include "btCudaDemoPairCache.h"
#include <vector_types.h> // #include <vector_types.h>
#define BT_GPU_PREF(func) btCuda_##func #include "BulletMultiThreaded/btGpuDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" #undef BT_GPU_PREF
#define BT_GPU_PREF(func) btCuda_##func
#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#else #else
#include "BulletMultiThreaded/btGpuDefines.h" #include "BulletMultiThreaded/btGpuDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" #include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#endif #endif
#undef BT_GPU_PREF #undef BT_GPU_PREF
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------

View File

@@ -1,13 +0,0 @@
btCudaBroadphase is some R&D work, a first attempt to use CUDA in Bullet.
It uses the NVidia CUDA particle demo grid broadphase as Bullet broadphase.
Press 's' to toggle between the original CUDA particle demo and using Bullet+btCudaBroadphase.
Press '1', '2','3','4' for different start configurations.
Right click for options menu.
See some related discussion here:
http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=500&start=105
Get NVidia CUDA from here:
http://www.nvidia.com/object/cuda_get.html

View File

@@ -1,604 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//--------------------------------------------------------------------------
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btCudaBroadphaseKernel.h"
#include "btCudaBroadphase.h"
//--------------------------------------------------------------------------
#include <stdio.h>
//--------------------------------------------------------------------------
static btCudaBroadphaseParams s3DGridBroadphaseParams;
//--------------------------------------------------------------------------
bt3DGridBroadphase::bt3DGridBroadphase( const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
int maxBodiesPerCell,
btScalar cellFactorAABB) :
btSimpleBroadphase(maxSmallProxies,
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
m_bInitialized(false),
m_numBodies(0)
{
m_ownsPairCache = true;
m_params.m_gridSizeX = gridSizeX;
m_params.m_gridSizeY = gridSizeY;
m_params.m_gridSizeZ = gridSizeZ;
m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
btVector3 w_org = worldAabbMin;
m_params.m_worldOriginX = w_org.getX();
m_params.m_worldOriginY = w_org.getY();
m_params.m_worldOriginZ = w_org.getZ();
btVector3 w_size = worldAabbMax - worldAabbMin;
m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX;
m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY;
m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ;
m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ);
m_maxRadius *= btScalar(0.5f);
m_params.m_numBodies = m_numBodies;
m_params.m_maxBodiesPerCell = maxBodiesPerCell;
m_numLargeHandles = 0;
m_maxLargeHandles = maxLargeProxies;
m_maxPairsPerBody = maxPairsPerBody;
m_cellFactorAABB = cellFactorAABB;
m_LastLargeHandleIndex = -1;
_initialize();
} // bt3DGridBroadphase::bt3DGridBroadphase()
//--------------------------------------------------------------------------
bt3DGridBroadphase::~bt3DGridBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
} // bt3DGridBroadphase::~bt3DGridBroadphase()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::_initialize()
{
assert(!m_bInitialized);
// allocate host storage
m_hBodiesHash = new unsigned int[m_maxHandles * 2];
memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
m_hCellStart = new unsigned int[m_params.m_numCells];
memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
// --------------- for now, init with m_maxPairsPerBody for each body
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(int i = 1; i <= m_maxHandles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
//----------------
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
m_hAABB = new btCuda3F1U[numAABB * 2]; // AABB Min & Max
m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
m_hPairScan = new unsigned int[m_maxHandles + 1];
m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody];
// large proxies
// allocate handles buffer and put all handles on free list
m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
m_firstFreeLargeHandle = 0;
{
for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
{
m_pLargeHandles[i].SetNextFree(i + 1);
m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
}
m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
}
// debug data
m_numPairsAdded = 0;
m_numOverflows = 0;
m_bInitialized = true;
} // bt3DGridBroadphase::_initialize()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::_finalize()
{
assert(m_bInitialized);
delete [] m_hBodiesHash;
delete [] m_hCellStart;
delete [] m_hPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScan;
delete [] m_hPairOut;
btAlignedFree(m_pLargeHandlesRawPtr);
m_bInitialized = false;
} // bt3DGridBroadphase::_finalize()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
{
if(m_numHandles <= 0)
{
BT_PROFILE("addLarge2LargePairsToCache");
addLarge2LargePairsToCache(dispatcher);
return;
}
// update constants
setParameters(&m_params);
// prepare AABB array
prepareAABB();
// calculate hash
calcHashAABB();
// sort bodies based on hash
sortHash();
// find start of each cell
findCellStart();
// findOverlappingPairs (small/small)
findOverlappingPairs();
// findOverlappingPairs (small/large)
findPairsLarge();
// add pairs to CPU cache
computePairCacheChanges();
scanOverlappingPairBuff();
squeezeOverlappingPairBuff();
addPairsToCache(dispatcher);
// find and add large/large pairs to CPU cache
addLarge2LargePairsToCache(dispatcher);
return;
} // bt3DGridBroadphase::calculateOverlappingPairs()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
{
m_numPairsAdded = 0;
m_numPairsRemoved = 0;
for(int i = 0; i < m_numHandles; i++)
{
unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
if(!num)
{
continue;
}
unsigned int* pInp = m_hPairOut + m_hPairScan[i];
unsigned int index0 = m_hAABB[i * 2].uw;
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
for(unsigned int j = 0; j < num; j++)
{
unsigned int indx1_s = pInp[j];
unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG);
btSimpleBroadphaseProxy* proxy1;
if(index1 < (unsigned int)m_maxHandles)
{
proxy1 = &m_pHandles[index1];
}
else
{
index1 -= m_maxHandles;
btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
proxy1 = &m_pLargeHandles[index1];
}
if(indx1_s & BT_CUDA_PAIR_NEW_FLG)
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
m_numPairsAdded++;
}
else
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
m_numPairsRemoved++;
}
}
}
} // bt3DGridBroadphase::addPairsToCache()
//--------------------------------------------------------------------------
btBroadphaseProxy* bt3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
{
btBroadphaseProxy* proxy;
bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
if(bIsLarge)
{
if (m_numLargeHandles >= m_maxLargeHandles)
{
btAssert(0);
return 0; //should never happen, but don't let the game crash ;-)
}
btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
int newHandleIndex = allocLargeHandle();
proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
}
else
{
proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
}
return proxy;
} // bt3DGridBroadphase::createProxy()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
{
bool bIsLarge = isLargeProxy(proxy);
if(bIsLarge)
{
btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
freeLargeHandle(proxy0);
m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
}
else
{
btSimpleBroadphase::destroyProxy(proxy, dispatcher);
}
return;
} // bt3DGridBroadphase::destroyProxy()
//--------------------------------------------------------------------------
bool bt3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
{
btVector3 diag = aabbMax - aabbMin;
btScalar radius = diag.length() * btScalar(0.5f);
radius *= m_cellFactorAABB; // user-defined factor
return (radius > m_maxRadius);
} // bt3DGridBroadphase::isLargeProxy()
//--------------------------------------------------------------------------
bool bt3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
{
return (proxy->getUid() >= (m_maxHandles+2));
} // bt3DGridBroadphase::isLargeProxy()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
{
int i,j;
if (m_numLargeHandles <= 0)
{
return;
}
int new_largest_index = -1;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
if(!proxy0->m_clientObject)
{
continue;
}
new_largest_index = i;
for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
{
btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
if(!proxy1->m_clientObject)
{
continue;
}
btAssert(proxy0 != proxy1);
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
if(aabbOverlap(p0,p1))
{
if (!m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
}
}
else
{
if(m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
}
m_LastLargeHandleIndex = new_largest_index;
return;
} // bt3DGridBroadphase::addLarge2LargePairsToCache()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
{
btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
for (int i=0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
if(!proxy->m_clientObject)
{
continue;
}
rayCallback.process(proxy);
}
} // bt3DGridBroadphase::rayTest()
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
//
// overrides for CPU version
//
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
void bt3DGridBroadphase::prepareAABB()
{
BT_PROFILE("prepareAABB");
btCuda3F1U* pBB = m_hAABB;
int i;
int new_largest_index = -1;
unsigned int num_small = 0;
for(i = 0; i <= m_LastHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
if(!proxy0->m_clientObject)
{
continue;
}
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_small;
pBB++;
num_small++;
}
m_LastHandleIndex = new_largest_index;
new_largest_index = -1;
unsigned int num_large = 0;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
if(!proxy0->m_clientObject)
{
continue;
}
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i + m_maxHandles;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_large + m_maxHandles;
pBB++;
num_large++;
}
m_LastLargeHandleIndex = new_largest_index;
// paranoid checks
btAssert(num_small == m_numHandles);
btAssert(num_large == m_numLargeHandles);
return;
} // bt3DGridBroadphase::prepareAABB()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::setParameters(btCudaBroadphaseParams* hostParams)
{
s3DGridBroadphaseParams = *hostParams;
return;
} // bt3DGridBroadphase::setParameters()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::calcHashAABB()
{
BT_PROFILE("bt3DGrid_calcHashAABB");
bt3DGrid_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
return;
} // bt3DGridBroadphase::calcHashAABB()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::sortHash()
{
class bt3DGridHashKey
{
public:
unsigned int hash;
unsigned int index;
void quickSort(bt3DGridHashKey* pData, int lo, int hi)
{
int i=lo, j=hi;
bt3DGridHashKey x = pData[(lo+hi)/2];
do
{
while(pData[i].hash > x.hash) i++;
while(x.hash > pData[j].hash) j--;
if(i <= j)
{
bt3DGridHashKey t = pData[i];
pData[i] = pData[j];
pData[j] = t;
i++; j--;
}
} while(i <= j);
if(lo < j) pData->quickSort(pData, lo, j);
if(i < hi) pData->quickSort(pData, i, hi);
}
};
BT_PROFILE("bt3DGrid_sortHash");
bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
pHash->quickSort(pHash, 0, m_numHandles - 1);
return;
} // bt3DGridBroadphase::sortHash()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::findCellStart()
{
BT_PROFILE("bt3DGrid_findCellStart");
bt3DGrid_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
return;
} // bt3DGridBroadphase::findCellStart()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::findOverlappingPairs()
{
BT_PROFILE("bt3DGrid_findOverlappingPairs");
bt3DGrid_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
return;
} // bt3DGridBroadphase::findOverlappingPairs()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::findPairsLarge()
{
BT_PROFILE("bt3DGrid_findPairsLarge");
bt3DGrid_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
return;
} // bt3DGridBroadphase::findPairsLarge()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::computePairCacheChanges()
{
BT_PROFILE("bt3DGrid_computePairCacheChanges");
bt3DGrid_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles);
return;
} // bt3DGridBroadphase::computePairCacheChanges()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::scanOverlappingPairBuff()
{
BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
m_hPairScan[0] = 0;
for(int i = 1; i <= m_numHandles; i++)
{
unsigned int delta = m_hPairScan[i];
m_hPairScan[i] = m_hPairScan[i-1] + delta;
}
return;
} // bt3DGridBroadphase::scanOverlappingPairBuff()
//--------------------------------------------------------------------------
void bt3DGridBroadphase::squeezeOverlappingPairBuff()
{
BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
bt3DGrid_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles);
return;
} // bt3DGridBroadphase::squeezeOverlappingPairBuff()
//--------------------------------------------------------------------------
typedef unsigned int uint;
struct uint2
{
unsigned int x, y;
};
struct int3
{
int x, y, z;
};
struct uint3
{
unsigned int x, y, z;
};
struct float4
{
float x, y, z, w;
};
#define BT3DGRID__device__ inline
#define BT3DGRIDmax(a, b) ((a) > (b) ? (a) : (b))
#define BT3DGRIDmin(a, b) ((a) < (b) ? (a) : (b))
#define BT3DGRIDparams s3DGridBroadphaseParams
#define BT3DGRID__mul24(a, b) ((a)*(b))
#define BT3DGRID__global__ inline
#define BT3DGRID__shared__ static
#define BT3DGRID__syncthreads()
static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
{
uint2 t; t.x = x; t.y = y; return t;
}
#define BT3DGRIDmake_uint2(x, y) bt3dGrid_make_uint2(x, y)
static inline int3 bt3dGrid_make_int3(int x, int y, int z)
{
int3 t; t.x = x; t.y = y; t.z = z; return t;
}
inline int3 operator+(int3 a, int3 b)
{
return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
#define BT3DGRIDmake_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
#define BT3DGRIDFETCH(a, b) a[b]
#define BT3DGRIDPREF(func) bt3DGrid_##func
#define MY_CUDA_SAFE_CALL(func) func
#define BT3DGPRDMemset memset
static uint2 s_blockIdx, s_blockDim, s_threadIdx;
#define BT3DGRIDblockIdx s_blockIdx
#define BT3DGRIDblockDim s_blockDim
#define BT3DGRIDthreadIdx s_threadIdx
#define BT3DGRIDEXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
#define CUT_CHECK_ERROR(s)
//--------------------------------------------------------------------------
#include "bt3DGridBroadphaseFunc.h"
//--------------------------------------------------------------------------

View File

@@ -1,444 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#include "btCudaBroadphaseKernel.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// calculate position in uniform grid
BT3DGRID__device__ int3 btCuda_calcGridPos(float4 p)
{
int3 gridPos;
gridPos.x = (int)floor((p.x - BT3DGRIDparams.m_worldOriginX) / BT3DGRIDparams.m_cellSizeX);
gridPos.y = (int)floor((p.y - BT3DGRIDparams.m_worldOriginY) / BT3DGRIDparams.m_cellSizeY);
gridPos.z = (int)floor((p.z - BT3DGRIDparams.m_worldOriginZ) / BT3DGRIDparams.m_cellSizeZ);
return gridPos;
}
//----------------------------------------------------------------------------------------
// calculate address in grid from position (clamping to edges)
BT3DGRID__device__ uint btCuda_calcGridHash(int3 gridPos)
{
gridPos.x = BT3DGRIDmax(0, BT3DGRIDmin(gridPos.x, (int)BT3DGRIDparams.m_gridSizeX - 1));
gridPos.y = BT3DGRIDmax(0, BT3DGRIDmin(gridPos.y, (int)BT3DGRIDparams.m_gridSizeY - 1));
gridPos.z = BT3DGRIDmax(0, BT3DGRIDmin(gridPos.z, (int)BT3DGRIDparams.m_gridSizeZ - 1));
return BT3DGRID__mul24(BT3DGRID__mul24(gridPos.z, BT3DGRIDparams.m_gridSizeY), BT3DGRIDparams.m_gridSizeX) + BT3DGRID__mul24(gridPos.y, BT3DGRIDparams.m_gridSizeX) + gridPos.x;
}
//----------------------------------------------------------------------------------------
// calculate grid hash value for each body using its AABB
BT3DGRID__global__ void calcHashAABBD(btCuda3F1U* pAABB, uint2* pHash, uint numBodies)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
btCuda3F1U bbMin = pAABB[index*2];
btCuda3F1U bbMax = pAABB[index*2 + 1];
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = btCuda_calcGridPos(pos);
uint gridHash = btCuda_calcGridHash(gridPos);
// store grid hash and body index
pHash[index] = BT3DGRIDmake_uint2(gridHash, index);
}
//----------------------------------------------------------------------------------------
BT3DGRID__global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
BT3DGRID__shared__ uint sharedHash[257];
sharedHash[BT3DGRIDthreadIdx.x+1] = sortedData.x;
if((index > 0) && (BT3DGRIDthreadIdx.x == 0))
{
// first thread in block must load neighbor body hash
volatile uint2 prevData = pHash[index-1];
sharedHash[0] = prevData.x;
}
BT3DGRID__syncthreads();
if((index == 0) || (sortedData.x != sharedHash[BT3DGRIDthreadIdx.x]))
{
cellStart[sortedData.x] = index;
}
}
//----------------------------------------------------------------------------------------
BT3DGRID__device__ uint cudaTestAABBOverlap(btCuda3F1U min0, btCuda3F1U max0, btCuda3F1U min1, btCuda3F1U max1)
{
return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
(min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
}
//----------------------------------------------------------------------------------------
BT3DGRID__device__ void findPairsInCell(int3 gridPos,
uint index,
uint2* pHash,
uint* pCellStart,
btCuda3F1U* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numBodies)
{
if ( (gridPos.x < 0) || (gridPos.x > (int)BT3DGRIDparams.m_gridSizeX - 1)
|| (gridPos.y < 0) || (gridPos.y > (int)BT3DGRIDparams.m_gridSizeY - 1)
|| (gridPos.z < 0) || (gridPos.z > (int)BT3DGRIDparams.m_gridSizeZ - 1))
{
return;
}
uint gridHash = btCuda_calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
{
return; // cell empty
}
// iterate over bodies in this cell
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
btCuda3F1U min0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2);
btCuda3F1U max0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
uint bucketEnd = bucketStart + BT3DGRIDparams.m_maxBodiesPerCell;
bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
uint unsorted_indx2 = cellData.y;
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
{
btCuda3F1U min1 = BT3DGRIDFETCH(pAABB, unsorted_indx2*2);
btCuda3F1U max1 = BT3DGRIDFETCH(pAABB, unsorted_indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint handleIndex2 = min1.uw;
uint k;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
}
pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, curr);
return;
}
//----------------------------------------------------------------------------------------
BT3DGRID__global__ void
findOverlappingPairsD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
uint2* pPairBuffStartCurr, uint numBodies)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
btCuda3F1U bbMin = BT3DGRIDFETCH(pAABB, unsorted_indx*2);
btCuda3F1U bbMax = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1);
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = btCuda_calcGridPos(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
findPairsInCell(gridPos + BT3DGRIDmake_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
}
}
}
}
//----------------------------------------------------------------------------------------
BT3DGRID__global__ void
findPairsLargeD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
btCuda3F1U min0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2);
btCuda3F1U max0 = BT3DGRIDFETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
for(uint i = 0; i < numLarge; i++)
{
uint indx2 = numBodies + i;
btCuda3F1U min1 = BT3DGRIDFETCH(pAABB, indx2*2);
btCuda3F1U max1 = BT3DGRIDFETCH(pAABB, indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint k;
uint handleIndex2 = min1.uw;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, curr);
return;
}
//----------------------------------------------------------------------------------------
BT3DGRID__global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, btCuda3F1U* pAABB, uint numBodies)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
btCuda3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
//----------------------------------------------------------------------------------------
BT3DGRID__global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, btCuda3F1U* pAABB, uint numBodies)
{
int index = BT3DGRID__mul24(BT3DGRIDblockIdx.x, BT3DGRIDblockDim.x) + BT3DGRIDthreadIdx.x;
if(index >= (int)numBodies)
{
return;
}
btCuda3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint* pOut = pPairOut + pPairScan[index];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[handleIndex] = BT3DGRIDmake_uint2(start, num);
} // squeezeOverlappingPairBuffD()
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// E N D O F K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
extern "C"
{
//Round a / b to nearest higher integer value
int BT3DGRIDPREF(iDivUp)(int a, int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
}
// compute grid and thread block size for a given number of elements
void BT3DGRIDPREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
{
numThreads = BT3DGRIDmin(blockSize, n);
numBlocks = BT3DGRIDPREF(iDivUp)(n, numThreads);
}
void BT3DGRIDPREF(calcHashAABB)(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
{
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
// execute the kernel
BT3DGRIDEXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
// check if kernel invocation generated an error
CUT_CHECK_ERROR("calcHashAABBD kernel execution failed");
}
void BT3DGRIDPREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
{
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(BT3DGPRDMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
BT3DGRIDEXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
}
void BT3DGRIDPREF(findOverlappingPairs(btCuda3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
{
#if B_CUDA_USE_TEX
MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(btCuda3F1U)));
#endif
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT3DGRIDEXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
CUT_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
#if B_CUDA_USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
}
void BT3DGRIDPREF(findPairsLarge(btCuda3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
{
#if B_CUDA_USE_TEX
MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(btCuda3F1U)));
#endif
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT3DGRIDEXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
CUT_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
#if B_CUDA_USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
}
void BT3DGRIDPREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT3DGRIDEXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
}
void BT3DGRIDPREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT3DGRIDPREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT3DGRIDEXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
} // btCuda_squeezeOverlappingPairBuff()
} // extern "C"

View File

@@ -1,6 +1,6 @@
/* /*
Bullet Continuous Collision Detection and Physics Library Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty. This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software. In no event will the authors be held liable for any damages arising from the use of this software.
@@ -18,23 +18,32 @@ subject to the following restrictions:
#include "LinearMath/btAlignedAllocator.h" #include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h" #include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" #include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btCudaBroadphaseKernel.h"
#include "btCudaBroadphase.h" #include "btCudaBroadphase.h"
#include "radixsort.cuh" #include "radixsort.cuh"
//#include "vector_functions.h"
//--------------------------------------------------------------------------
#define BT_GPU_PREF(func) btCuda_##func
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
#undef BT_GPU_PREF
extern "C" void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams);
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
#include <stdio.h> #include <stdio.h>
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
btCudaBroadphase::btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, btCudaBroadphase::btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
int gridSizeX, int gridSizeY, int gridSizeZ, const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, int gridSizeX, int gridSizeY, int gridSizeZ,
int maxBodiesPerCell, int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar cellFactorAABB) : int maxSmallProxiesPerCell) :
bt3DGridBroadphase(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerBody, maxBodiesPerCell,cellFactorAABB) btGpu3DGridBroadphase(overlappingPairCache, worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxiesPerCell)
{ {
_initialize(); _initialize();
} // btCudaBroadphase::btCudaBroadphase() } // btCudaBroadphase::btCudaBroadphase()
@@ -65,7 +74,7 @@ void btCudaBroadphase::_initialize()
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
unsigned int numAABB = m_maxHandles + m_maxLargeHandles; unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(btCuda3F1U) * 2); btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(bt3DGrid3F1U) * 2);
btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int)); btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int));
@@ -77,13 +86,6 @@ void btCudaBroadphase::_initialize()
void btCudaBroadphase::_finalize() void btCudaBroadphase::_finalize()
{ {
assert(m_bInitialized); assert(m_bInitialized);
delete [] m_hBodiesHash;
delete [] m_hCellStart;
delete [] m_hPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScan;
delete [] m_hPairOut;
btCuda_freeArray(m_dBodiesHash[0]); btCuda_freeArray(m_dBodiesHash[0]);
btCuda_freeArray(m_dBodiesHash[1]); btCuda_freeArray(m_dBodiesHash[1]);
btCuda_freeArray(m_dCellStart); btCuda_freeArray(m_dCellStart);
@@ -92,10 +94,6 @@ void btCudaBroadphase::_finalize()
btCuda_freeArray(m_dPairBuff); btCuda_freeArray(m_dPairBuff);
btCuda_freeArray(m_dPairScan); btCuda_freeArray(m_dPairScan);
btCuda_freeArray(m_dPairOut); btCuda_freeArray(m_dPairOut);
btAlignedFree(m_pLargeHandlesRawPtr);
m_bInitialized = false;
} // btCudaBroadphase::_finalize() } // btCudaBroadphase::_finalize()
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
@@ -108,14 +106,14 @@ void btCudaBroadphase::_finalize()
void btCudaBroadphase::prepareAABB() void btCudaBroadphase::prepareAABB()
{ {
bt3DGridBroadphase::prepareAABB(); btGpu3DGridBroadphase::prepareAABB();
btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(btCuda3F1U) * 2 * (m_numHandles + m_numLargeHandles)); btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
return; return;
} // btCudaBroadphase::prepareAABB() } // btCudaBroadphase::prepareAABB()
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
void btCudaBroadphase::setParameters(btCudaBroadphaseParams* hostParams) void btCudaBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
{ {
btCuda_setParameters(hostParams); btCuda_setParameters(hostParams);
return; return;
@@ -183,7 +181,7 @@ void btCudaBroadphase::computePairCacheChanges()
void btCudaBroadphase::scanOverlappingPairBuff() void btCudaBroadphase::scanOverlappingPairBuff()
{ {
btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1)); btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1));
bt3DGridBroadphase::scanOverlappingPairBuff(); btGpu3DGridBroadphase::scanOverlappingPairBuff();
btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1)); btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1));
return; return;
} // btCudaBroadphase::scanOverlappingPairBuff() } // btCudaBroadphase::scanOverlappingPairBuff()
@@ -199,3 +197,11 @@ void btCudaBroadphase::squeezeOverlappingPairBuff()
} // btCudaBroadphase::squeezeOverlappingPairBuff() } // btCudaBroadphase::squeezeOverlappingPairBuff()
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
void btCudaBroadphase::resetPool(btDispatcher* dispatcher)
{
btGpu3DGridBroadphase::resetPool(dispatcher);
btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
} // btCudaBroadphase::resetPool()
//--------------------------------------------------------------------------

View File

@@ -1,31 +1,17 @@
/* /*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved. Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
* Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
* NOTICE TO USER:
* This software is provided 'as-is', without any express or implied warranty.
* This source code is subject to NVIDIA ownership rights under U.S. and In no event will the authors be held liable for any damages arising from the use of this software.
* international Copyright laws. Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it freely,
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE subject to the following restrictions:
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 3. This notice may not be removed or altered from any source distribution.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, */
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#include <cstdlib> #include <cstdlib>
#include <cstdio> #include <cstdio>
@@ -34,41 +20,27 @@
#include "cutil_math.h" #include "cutil_math.h"
#include "math_constants.h" #include "math_constants.h"
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "btCudaBroadphaseKernel.h"
//#include "radixsort.cu"
#include <vector_types.h>
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------- #include "btCudaDefines.h"
//----------------------------------------------------------------------------------------
// K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#ifdef __DEVICE_EMULATION__ #include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#define B_CUDA_USE_TEX 0 #include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h"
#else
#define B_CUDA_USE_TEX 1
#endif
__device__ inline btCuda3F1U tex_fetch3F1U(float4 a) { return *((btCuda3F1U*)(&a)); } //----------------------------------------------------------------------------------------
#if B_CUDA_USE_TEX __device__ inline bt3DGrid3F1U tex_fetch3F1U(float4 a) { return *((bt3DGrid3F1U*)(&a)); }
#define BT3DGRIDFETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
#else //----------------------------------------------------------------------------------------
#define BT3DGRIDFETCH(t, i) t[i]
#endif void btCuda_exit(int val);
//----------------------------------------------------------------------------------------
texture<uint2, 1, cudaReadModeElementType> particleHashTex; texture<uint2, 1, cudaReadModeElementType> particleHashTex;
texture<uint, 1, cudaReadModeElementType> cellStartTex; texture<uint, 1, cudaReadModeElementType> cellStartTex;
@@ -76,98 +48,30 @@ texture<float4, 1, cudaReadModeElementType> pAABBTex;
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
__constant__ btCudaBroadphaseParams params; __constant__ bt3DGridBroadphaseParams params;
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#define BT3DGRID__device__ __device__ extern "C"
#define BT3DGRIDmax(a, b) max(a, b) {
#define BT3DGRIDmin(a, b) min(a, b)
#define BT3DGRIDparams params
#define BT3DGRID__mul24(a, b) __mul24(a, b)
#define BT3DGRID__global__ __global__
#define BT3DGRID__shared__ __shared__
#define BT3DGRID__syncthreads() __syncthreads()
#define BT3DGRIDmake_uint2(x, y) make_uint2(x, y)
#define BT3DGRIDmake_int3(x, y, z) make_int3(x, y, z)
#define BT3DGRIDPREF(func) btCuda_##func
#define BT3DGPRDMemset cudaMemset
#define BT3DGRIDblockIdx blockIdx
#define BT3DGRIDblockDim blockDim
#define BT3DGRIDthreadIdx threadIdx
#define BT3DGRIDEXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
//! Check for CUDA error void btCuda_setParameters(bt3DGridBroadphaseParams* hostParams)
# define CUT_CHECK_ERROR(errorMessage) do { \
cudaError_t err = cudaGetLastError(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
btCuda_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
btCuda_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL( call) do { \
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} } while (0)
//----------------------------------------------------------------------------------------
void btCuda_exit(int val)
{
exit(val);
}
void btCuda_allocateArray(void** devPtr, unsigned int size)
{
MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size));
}
void btCuda_freeArray(void* devPtr)
{
MY_CUDA_SAFE_CALL(cudaFree(devPtr));
}
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
{
MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
}
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
{
MY_CUDA_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
}
void btCuda_setParameters(btCudaBroadphaseParams* hostParams)
{ {
// copy parameters to constant memory // copy parameters to constant memory
MY_CUDA_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(btCudaBroadphaseParams))); BT_GPU_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(bt3DGridBroadphaseParams)));
} } // btCuda_setParameters()
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#include "bt3DGridBroadphaseFunc.h" } // extern "C"
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -1,6 +1,6 @@
/* /*
Bullet Continuous Collision Detection and Physics Library Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty. This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software. In no event will the authors be held liable for any damages arising from the use of this software.
@@ -13,108 +13,23 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
//----------------------------------------------------------------------------------------
#ifndef CUDA_BROADPHASE_H #ifndef CUDA_BROADPHASE_H
#define CUDA_BROADPHASE_H #define CUDA_BROADPHASE_H
//----------------------------------------------------------------------------------------
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" #include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "btCudaBroadphaseKernel.h" #include "../../src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h"
#include "../../src/BulletMultiThreaded/btGpu3DGridBroadphase.h"
//----------------------------------------------------------------------------------------
///The bt3DGridBroadphase uses CUDA to compute overlapping pairs using a GPU. ///The btCudaBroadphase uses CUDA-capable GPU to compute overlapping pairs
class bt3DGridBroadphase : public btSimpleBroadphase
{
protected:
bool m_bInitialized;
unsigned int m_numBodies;
unsigned int m_numCells;
unsigned int m_maxPairsPerBody;
btScalar m_cellFactorAABB;
unsigned int m_maxBodiesPerCell;
btCudaBroadphaseParams m_params;
btScalar m_maxRadius;
// CPU data
unsigned int* m_hBodiesHash;
unsigned int* m_hCellStart;
unsigned int* m_hPairBuffStartCurr;
btCuda3F1U* m_hAABB;
unsigned int* m_hPairBuff;
unsigned int* m_hPairScan;
unsigned int* m_hPairOut;
// large proxies
int m_numLargeHandles;
int m_maxLargeHandles;
int m_LastLargeHandleIndex;
btSimpleBroadphaseProxy* m_pLargeHandles;
void* m_pLargeHandlesRawPtr;
int m_firstFreeLargeHandle;
int allocLargeHandle()
{
btAssert(m_numLargeHandles < m_maxLargeHandles);
int freeLargeHandle = m_firstFreeLargeHandle;
m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
m_numLargeHandles++;
if(freeLargeHandle > m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex = freeLargeHandle;
}
return freeLargeHandle;
}
void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
{
int handle = int(proxy - m_pLargeHandles);
btAssert((handle >= 0) && (handle < m_maxHandles));
if(handle == m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex--;
}
proxy->SetNextFree(m_firstFreeLargeHandle);
m_firstFreeLargeHandle = handle;
proxy->m_clientObject = 0;
m_numLargeHandles--;
}
bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
bool isLargeProxy(btBroadphaseProxy* proxy);
// debug
unsigned int m_numPairsAdded;
unsigned int m_numPairsRemoved;
unsigned int m_numOverflows;
//
public:
bt3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
int maxBodiesPerCell = 8,
btScalar cellFactorAABB = btScalar(1.0f));
virtual ~bt3DGridBroadphase();
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy); class btCudaBroadphase : public btGpu3DGridBroadphase
virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
protected:
void _initialize();
void _finalize();
void addPairsToCache(btDispatcher* dispatcher);
void addLarge2LargePairsToCache(btDispatcher* dispatcher);
// overrides for CPU version
virtual void setParameters(btCudaBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff();
virtual void squeezeOverlappingPairBuff();
};
///The btCudaBroadphase uses CUDA to compute overlapping pairs using a GPU.
class btCudaBroadphase : public bt3DGridBroadphase
{ {
protected: protected:
// GPU data // GPU data
@@ -122,15 +37,15 @@ protected:
unsigned int* m_dCellStart; unsigned int* m_dCellStart;
unsigned int* m_dPairBuff; unsigned int* m_dPairBuff;
unsigned int* m_dPairBuffStartCurr; unsigned int* m_dPairBuffStartCurr;
btCuda3F1U* m_dAABB; bt3DGrid3F1U* m_dAABB;
unsigned int* m_dPairScan; unsigned int* m_dPairScan;
unsigned int* m_dPairOut; unsigned int* m_dPairOut;
public: public:
btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, btCudaBroadphase( btOverlappingPairCache* overlappingPairCache,
int gridSizeX, int gridSizeY, int gridSizeZ, const btVector3& worldAabbMin,const btVector3& worldAabbMax,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, int gridSizeX, int gridSizeY, int gridSizeZ,
int maxBodiesPerCell = 8, int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxies,
btScalar cellFactorAABB = btScalar(1.0f)); int maxSmallProxiesPerCell = 8);
virtual ~btCudaBroadphase(); virtual ~btCudaBroadphase();
protected: protected:
void _initialize(); void _initialize();
@@ -138,7 +53,7 @@ protected:
void allocateArray(void** devPtr, unsigned int size); void allocateArray(void** devPtr, unsigned int size);
void freeArray(void* devPtr); void freeArray(void* devPtr);
// overrides for CUDA version // overrides for CUDA version
virtual void setParameters(btCudaBroadphaseParams* hostParams); virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB(); virtual void prepareAABB();
virtual void calcHashAABB(); virtual void calcHashAABB();
virtual void sortHash(); virtual void sortHash();
@@ -148,5 +63,7 @@ protected:
virtual void computePairCacheChanges(); virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff(); virtual void scanOverlappingPairBuff();
virtual void squeezeOverlappingPairBuff(); virtual void squeezeOverlappingPairBuff();
virtual void resetPool(btDispatcher* dispatcher);
}; };
#endif //CUDA_BROADPHASE_H #endif //CUDA_BROADPHASE_H

View File

@@ -1,105 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into CUDA program
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifndef CUDA_BROADPHASE_KERNEL_H
#define CUDA_BROADPHASE_KERNEL_H
#define CUDA_BROADPHASE_USE_CUDA 1
#define BT_CUDA_PAIR_FOUND_FLG (0x40000000)
#define BT_CUDA_PAIR_NEW_FLG (0x20000000)
#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG)
struct btCudaBroadphaseParams
{
unsigned int m_gridSizeX;
unsigned int m_gridSizeY;
unsigned int m_gridSizeZ;
unsigned int m_numCells;
float m_worldOriginX;
float m_worldOriginY;
float m_worldOriginZ;
float m_cellSizeX;
float m_cellSizeY;
float m_cellSizeZ;
unsigned int m_numBodies;
unsigned int m_maxBodiesPerCell;
};
struct btCuda3F1U
{
float fx;
float fy;
float fz;
unsigned int uw;
};
extern "C"
{
// CPU functions
void bt3DGrid_setParameters(btCudaBroadphaseParams* hostParams);
void bt3DGrid_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
void bt3DGrid_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
void bt3DGrid_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash,
unsigned int* pCellStart,
unsigned int* pPairBuff,
unsigned int* pPairBuffStartCurr,
unsigned int numBodies);
void bt3DGrid_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash,
unsigned int* pCellStart,
unsigned int* pPairBuff,
unsigned int* pPairBuffStartCurr,
unsigned int numBodies,
unsigned int numLarge);
void bt3DGrid_computePairCacheChanges( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr,
unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies);
void bt3DGrid_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan,
unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies);
// CUDA functions
void btCuda_allocateArray(void** devPtr, unsigned int size);
void btCuda_freeArray(void* devPtr);
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size);
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size);
void btCuda_setParameters(btCudaBroadphaseParams* hostParams);
void btCuda_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
void btCuda_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
void btCuda_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash,
unsigned int* pCellStart,
unsigned int* pPairBuff,
unsigned int* pPairBuffStartCurr,
unsigned int numBodies);
void btCuda_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash,
unsigned int* pCellStart,
unsigned int* pPairBuff,
unsigned int* pPairBuffStartCurr,
unsigned int numBodies,
unsigned int numLarge);
void btCuda_computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr,
unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies);
void btCuda_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan,
unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies);
}
#endif // CUDA_BROADPHASE_KERNEL_H

138
Extras/CUDA/btCudaDefines.h Normal file
View File

@@ -0,0 +1,138 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Common preprocessor definitions for CUDA compiler
//----------------------------------------------------------------------------------------
#ifndef BTCUDADEFINES_H
#define BTCUDADEFINES_H
//----------------------------------------------------------------------------------------
#ifdef __DEVICE_EMULATION__
#define B_CUDA_USE_TEX 0
#else
#define B_CUDA_USE_TEX 1
#endif
#if B_CUDA_USE_TEX
#define BT_GPU_FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i))
#define BT_GPU_FETCH4(t, i) tex1Dfetch(t##Tex, i)
#else
#define BT_GPU_FETCH(t, i) t[i]
#define BT_GPU_FETCH4(t, i) t[i]
#endif
//----------------------------------------------------------------------------------------
#define BT_GPU___device__ __device__
#define BT_GPU___devdata__ __device__
#define BT_GPU___constant__ __constant__
#define BT_GPU_max(a, b) max(a, b)
#define BT_GPU_min(a, b) min(a, b)
#define BT_GPU_params params
#define BT_GPU___mul24(a, b) __mul24(a, b)
#define BT_GPU___global__ __global__
#define BT_GPU___shared__ __shared__
#define BT_GPU___syncthreads() __syncthreads()
#define BT_GPU_make_uint2(x, y) make_uint2(x, y)
#define BT_GPU_make_int3(x, y, z) make_int3(x, y, z)
#define BT_GPU_make_float3(x, y, z) make_float3(x, y, z)
#define BT_GPU_make_float34(x) make_float3(x)
#define BT_GPU_make_float31(x) make_float3(x)
#define BT_GPU_make_float42(a, b) make_float4(a, b)
#define BT_GPU_make_float44(a, b, c, d) make_float4(a, b, c, d)
#define BT_GPU_PREF(func) btCuda_##func
#define BT_GPU_Memset cudaMemset
#define BT_GPU_MemcpyToSymbol(a, b, c) cudaMemcpyToSymbol(a, b, c)
#define BT_GPU_blockIdx blockIdx
#define BT_GPU_blockDim blockDim
#define BT_GPU_threadIdx threadIdx
#define BT_GPU_dot(a, b) dot(a, b)
#define BT_GPU_dot4(a, b) dot(a, b)
#define BT_GPU_cross(a, b) cross(a, b)
#define BT_GPU_BindTexture(a, b, c, d) cudaBindTexture(a, b, c, d)
#define BT_GPU_UnbindTexture(a) cudaUnbindTexture(a)
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) kfunc<<<numb, numt>>>args
//----------------------------------------------------------------------------------------
//! Check for CUDA error
#define BT_GPU_CHECK_ERROR(errorMessage) \
do \
{ \
cudaError_t err = cudaGetLastError(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda error: %s in file '%s' in line %i : %s.\n",\
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err));\
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
//----------------------------------------------------------------------------------------
#define BT_GPU_SAFE_CALL_NO_SYNC(call) \
do \
{ \
cudaError err = call; \
if(err != cudaSuccess) \
{ \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} \
while(0)
//----------------------------------------------------------------------------------------
#define BT_GPU_SAFE_CALL(call) \
do \
{ \
BT_GPU_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if(err != cudaSuccess) \
{ \
fprintf(stderr,"Cuda errorSync in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
btCuda_exit(EXIT_FAILURE); \
} \
} while (0)
//----------------------------------------------------------------------------------------
extern "C" void btCuda_exit(int val);
//----------------------------------------------------------------------------------------
#endif // BTCUDADEFINES_H
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -0,0 +1,91 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <cstdlib>
#include <cstdio>
#include <string.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include "cutil_math.h"
#include "math_constants.h"
#include <vector_types.h>
//----------------------------------------------------------------------------------------
#include "btCudaDefines.h"
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
void btCuda_exit(int val)
{
fprintf(stderr, "Press ENTER key to terminate the program\n");
getchar();
exit(val);
}
void btCuda_allocateArray(void** devPtr, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMalloc(devPtr, size));
}
void btCuda_freeArray(void* devPtr)
{
BT_GPU_SAFE_CALL(cudaFree(devPtr));
}
void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
}
void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size)
{
BT_GPU_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice));
}
void btCuda_registerGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
}
void* btCuda_mapGLBufferObject(unsigned int vbo)
{
void *ptr;
BT_GPU_SAFE_CALL(cudaGLMapBufferObject(&ptr, vbo));
return ptr;
}
void btCuda_unmapGLBufferObject(unsigned int vbo)
{
BT_GPU_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
}
//----------------------------------------------------------------------------------------
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedCode.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -17,20 +17,20 @@ subject to the following restrictions:
#include <cstdio> #include <cstdio>
#include <string.h> #include <string.h>
#include "../../Extras/CUDA/cutil_math.h" #include "cutil_math.h"
#include "math_constants.h" #include "math_constants.h"
#include <vector_types.h> #include <vector_types.h>
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#include "../../Extras/CUDA/btCudaDefines.h" #include "btCudaDefines.h"
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h" #include "../../src/BulletMultiThreaded/btGpuUtilsSharedDefs.h"
#include "btGpuDemo2dSharedTypes.h" #include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedTypes.h"
#include "btGpuDemo2dSharedDefs.h" #include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedDefs.h"
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
@@ -38,7 +38,7 @@ texture<float4, 1, cudaReadModeElementType> posTex;
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
#include "btGpuDemo2dSharedCode.h" #include "../../Demos/Gpu2dDemo/btGpuDemo2dSharedCode.h"
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------

View File

@@ -506,14 +506,6 @@
<Filter <Filter
Name="Source Files" Name="Source Files"
> >
<File
RelativePath=".\bt3DGridBroadphase.cpp"
>
</File>
<File
RelativePath=".\bt3DGridBroadphaseFunc.h"
>
</File>
<File <File
RelativePath=".\btCudaBroadphase.cpp" RelativePath=".\btCudaBroadphase.cpp"
> >
@@ -545,8 +537,52 @@
</FileConfiguration> </FileConfiguration>
</File> </File>
<File <File
RelativePath=".\btCudaBroadphaseKernel.h" RelativePath=".\btCudaUtils.cu"
> >
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
Description="CUDA compiling"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(IntDir)\btCudaUtils_cu.obj btCudaUtils.cu&#x0D;&#x0A;"
Outputs="$(IntDir)\btCudaUtils_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
Description="CUDA compiling"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(IntDir)/btCudaUtils_cu.obj btCudaUtils.cu&#x0D;&#x0A;"
Outputs="$(IntDir)/btCudaUtils_cu.obj"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\btGpuDemo2dCudaFunc.cu"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
Description="CUDA compiling"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(IntDir)\btGpuDemo2dCudaFunc_cu.obj btGpuDemo2dCudaFunc.cu&#x0D;&#x0A;"
Outputs="$(IntDir)\btGpuDemo2dCudaFunc_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
Description="CUDA compiling"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(IntDir)/btGpuDemo2dCudaFunc_cu.obj btGpuDemo2dCudaFunc.cu&#x0D;&#x0A;"
Outputs="$(IntDir)/btGpuDemo2dCudaFunc_cu.obj"
/>
</FileConfiguration>
</File> </File>
<File <File
RelativePath=".\cutil_math.h" RelativePath=".\cutil_math.h"
@@ -594,6 +630,10 @@
RelativePath=".\btCudaBroadphase.h" RelativePath=".\btCudaBroadphase.h"
> >
</File> </File>
<File
RelativePath=".\btCudaDefines.h"
>
</File>
</Filter> </Filter>
</Files> </Files>
<Globals> <Globals>

View File

@@ -1,3 +0,0 @@
#include <param.h>
const Param<int> dummy("error");

View File

@@ -1,226 +0,0 @@
/*
Simple parameter system
sgreen@nvidia.com 4/2001
*/
#ifndef PARAM_H
#define PARAM_H
#ifdef _WIN32
# pragma warning(disable:4786) // No stupid debug warnings
#endif
#include <string>
#include <vector>
#include <map>
#include <iostream>
#include <sstream>
#include <iomanip>
// base class for named parameter
class ParamBase {
public:
ParamBase(char *name) { m_name = name; }
virtual ~ParamBase() { }
std::string *GetName() { return &m_name; }
virtual float GetFloatValue() = 0;
virtual int GetIntValue() = 0;
virtual std::string GetValueString() = 0;
virtual void Reset() = 0;
virtual void Increment() = 0;
virtual void Decrement() = 0;
virtual float GetPercentage() = 0;
virtual void SetPercentage(float p) = 0;
virtual void Write(std::ostream &stream) = 0;
virtual void Read(std::istream &stream) = 0;
virtual bool IsList() = 0;
protected:
std::string m_name;
};
// derived class for single-valued parameter
template<class T> class Param : public ParamBase {
public:
Param(char *name, T value = 0, T min = 0, T max = 10000, T step = 1, T* ptr = 0)
: ParamBase(name)
{
if (ptr) {
m_ptr = ptr;
} else {
m_ptr = &m_value;
}
*m_ptr = value;
m_default = value;
m_min = min;
m_max = max;
m_step = step;
}
virtual ~Param() { }
virtual float GetFloatValue() { return (float) *m_ptr; }
virtual int GetIntValue() { return (int) *m_ptr; }
T GetValue() const { return *m_ptr; }
T SetValue(const T value) { *m_ptr = value; }
// inherited functions
std::string GetValueString()
{
std::ostringstream ost;
ost<<std::setprecision(3)<<*m_ptr;
return ost.str();
}
float GetPercentage()
{
return (*m_ptr - m_min) / (float) (m_max - m_min);
}
void SetPercentage(float p)
{
*m_ptr = (T)(m_min + p * (m_max - m_min));
}
void Reset() { *m_ptr = m_default; }
void Increment()
{
*m_ptr += m_step;
if (*m_ptr > m_max)
*m_ptr = m_max;
}
void Decrement()
{
*m_ptr -= m_step;
if (*m_ptr < m_min)
*m_ptr = m_min;
}
void Write(std::ostream &stream) { stream << m_name << " " << *m_ptr << '\n'; }
void Read(std::istream &stream) { stream >> m_name >> *m_ptr; }
bool IsList() { return false; }
private:
T m_value;
T *m_ptr; // pointer to value declared elsewhere
T m_default, m_min, m_max;
T m_step;
};
extern const Param<int> dummy;
// list of parameters
class ParamList : public ParamBase {
public:
ParamList(char *name = "")
: ParamBase(name)
{
active = true;
}
virtual ~ParamList() { }
virtual float GetFloatValue() { return 0.0f; }
virtual int GetIntValue() { return 0; }
void AddParam(ParamBase *param)
{
m_params.push_back(param);
m_map[*param->GetName()] = param;
m_current = m_params.begin();
}
// look-up parameter based on name
ParamBase *GetParam(char *name)
{
ParamBase *p = m_map[name];
if (p)
return p;
else
return (ParamBase *) &dummy;
}
ParamBase *GetParam(int i)
{
return m_params[i];
}
ParamBase *GetCurrent()
{
return *m_current;
}
int GetSize() { return (int)m_params.size(); }
// inherited functions
std::string GetValueString()
{
// return m_name;
return "list";
}
void Reset()
{
m_current = m_params.begin();
}
void Increment()
{
m_current++;
if (m_current == m_params.end())
m_current = m_params.begin();
}
void Decrement()
{
if (m_current == m_params.begin())
m_current = m_params.end()-1;
else
m_current--;
}
float GetPercentage() { return 0.0f; }
void SetPercentage(float /*p*/) { ; }
void Write(std::ostream &stream)
{
stream << m_name << '\n';
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
(*p)->Write(stream);
}
}
void Read(std::istream &stream)
{
stream >> m_name;
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
(*p)->Read(stream);
}
}
bool IsList() { return true; }
protected:
bool active;
std::vector<ParamBase *> m_params;
std::map<std::string, ParamBase *> m_map;
std::vector<ParamBase *>::const_iterator m_current;
};
#endif

View File

@@ -1,209 +0,0 @@
/*
ParamListGL
- class derived from ParamList to do simple OpenGL rendering of a parameter list
sgg 8/2001
*/
#include <param.h>
#include <paramgl.h>
void beginWinCoords(void)
{
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();
glTranslatef(0.0, glutGet(GLUT_WINDOW_HEIGHT) - 1, 0.0);
glScalef(1.0, -1.0, 1.0);
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glOrtho(0, glutGet(GLUT_WINDOW_WIDTH), 0, glutGet(GLUT_WINDOW_HEIGHT), -1, 1);
glMatrixMode(GL_MODELVIEW);
}
void endWinCoords(void)
{
glMatrixMode(GL_PROJECTION);
glPopMatrix();
glMatrixMode(GL_MODELVIEW);
glPopMatrix();
}
void glPrint(int x, int y, const char *s, void *font)
{
int i, len;
glRasterPos2f(x, y);
len = (int) strlen(s);
for (i = 0; i < len; i++) {
glutBitmapCharacter(font, s[i]);
}
}
void glPrintShadowed(int x, int y, const char *s, void *font, float *color)
{
glColor3f(0.0, 0.0, 0.0);
glPrint(x-1, y-1, s, font);
glColor3fv((GLfloat *) color);
glPrint(x, y, s, font);
}
ParamListGL::ParamListGL(char *name) : ParamList(name)
{
font = (void *) GLUT_BITMAP_9_BY_15;
// font = (void *) GLUT_BITMAP_8_BY_13;
bar_x = 250;
bar_w = 250;
bar_h = 10;
bar_offset = 5;
text_x = 5;
separation = 15;
value_x = 200;
font_h = 15;
start_x = 0;
start_y = 0;
text_col_selected[0] = 1.0;
text_col_selected[1] = 1.0;
text_col_selected[2] = 1.0;
text_col_unselected[0] = 0.75;
text_col_unselected[1] = 0.75;
text_col_unselected[2] = 0.75;
bar_col_outer[0] = 0.0;
bar_col_outer[1] = 0.0;
bar_col_outer[2] = 0.0;
bar_col_inner[0] = 0.0;
bar_col_inner[1] = 0.0;
bar_col_inner[2] = 0.0;
text_col_shadow[0] = 0.0;
text_col_shadow[1] = 0.0;
text_col_shadow[2] = 0.0;
}
void
ParamListGL::Render(int x, int y, bool shadow)
{
beginWinCoords();
start_x = x; start_y = y;
for(std::vector<ParamBase *>::const_iterator p = m_params.begin(); p != m_params.end(); ++p) {
if ((*p)->IsList()) {
ParamListGL *list = (ParamListGL *) (*p);
list->Render(x+10, y);
y += separation*list->GetSize();
} else {
if (p == m_current)
glColor3fv(text_col_selected);
else
glColor3fv(text_col_unselected);
if (shadow) {
glPrintShadowed(x + text_x, y + font_h, (*p)->GetName()->c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected);
glPrintShadowed(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font, (p == m_current) ? text_col_selected : text_col_unselected);
}
else {
glPrint(x + text_x, y + font_h, (*p)->GetName()->c_str(), font);
glPrint(x + value_x, y + font_h, (*p)->GetValueString().c_str(), font);
}
// glColor3fv((GLfloat *) &bar_col_outer);
glBegin(GL_LINE_LOOP);
glVertex2f(x + bar_x, y + bar_offset);
glVertex2f(x + bar_x + bar_w, y + bar_offset);
glVertex2f(x + bar_x + bar_w, y + bar_offset + bar_h);
glVertex2f(x + bar_x, y + bar_offset + bar_h);
glEnd();
// glColor3fv((GLfloat *) &bar_col_inner);
glRectf(x + bar_x, y + bar_offset + bar_h, x + bar_x + (bar_w*(*p)->GetPercentage()), y + bar_offset);
y += separation;
}
}
endWinCoords();
}
bool
ParamListGL::Mouse(int x, int y, int button, int state)
{
if ((y < start_y) || (y > (int)(start_y + (separation * m_params.size()) - 1)))
return false;
int i = (y - start_y) / separation;
if ((button==GLUT_LEFT_BUTTON) && (state==GLUT_DOWN)) {
#if defined(__GNUC__) && (__GNUC__ < 3)
m_current = &m_params[i];
#else
// MJH: workaround since the version of vector::at used here is non-standard
for (m_current = m_params.begin(); m_current != m_params.end() && i > 0; m_current++, i--);
//m_current = (std::vector<ParamBase *>::const_iterator)&m_params.at(i);
#endif
if ((x > bar_x) && (x < bar_x + bar_w)) {
Motion(x, y);
}
}
return true;
}
bool
ParamListGL::Motion(int x, int y)
{
if ((y < start_y) || (y > start_y + (separation * (int)m_params.size()) - 1) )
return false;
if (x < bar_x) {
(*m_current)->SetPercentage(0.0);
return true;
}
if (x > bar_x + bar_w) {
(*m_current)->SetPercentage(1.0);
return true;
}
(*m_current)->SetPercentage((x-bar_x) / (float) bar_w);
return true;
}
void
ParamListGL::Special(int key, int /*x*/, int /*y*/)
{
switch(key) {
case GLUT_KEY_DOWN:
Increment();
break;
case GLUT_KEY_UP:
Decrement();
break;
case GLUT_KEY_RIGHT:
GetCurrent()->Increment();
break;
case GLUT_KEY_LEFT:
GetCurrent()->Decrement();
break;
case GLUT_KEY_HOME:
GetCurrent()->Reset();
break;
case GLUT_KEY_END:
GetCurrent()->SetPercentage(1.0);
break;
}
glutPostRedisplay();
}

View File

@@ -1,54 +0,0 @@
/*
ParamListGL
- class derived from ParamList to do simple OpenGL rendering of a parameter list
sgg 8/2001
*/
#ifndef PARAMGL_H
#define PARAMGL_H
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <param.h>
void beginWinCoords();
void endWinCoords();
void glPrint(int x, int y, const char *s, void *font);
void glPrintShadowed(int x, int y, const char *s, void *font, float *color);
class ParamListGL : public ParamList {
public:
ParamListGL(char *name = "");
void Render(int x, int y, bool shadow = false);
bool Mouse(int x, int y, int button=GLUT_LEFT_BUTTON, int state=GLUT_DOWN);
bool Motion(int x, int y);
void Special(int key, int x, int y);
void SetSelectedColor(float r, float g, float b) { text_col_selected[0] = r; text_col_selected[1] = g; text_col_selected[2] = b; }
void SetUnSelectedColor(float r, float g, float b) { text_col_unselected[0] = r; text_col_unselected[1] = g; text_col_unselected[2] = b; }
int bar_x;
int bar_w;
int bar_h;
int text_x;
int separation;
int value_x;
int font_h;
int start_x, start_y;
int bar_offset;
float text_col_selected[3];
float text_col_unselected[3];
float text_col_shadow[3];
float bar_col_outer[3];
float bar_col_inner[3];
void *font;
};
#endif

View File

@@ -1,806 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#include "particleSystem.h"
#include "particleSystem.cuh"
#include "radixsort.cuh"
#include "particles_kernel.cuh"
//#include <cutil.h>
#include <assert.h>
#include <math.h>
#include <memory.h>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <GL/glew.h>
#include <btBulletDynamicsCommon.h>
#include "../../Demos/OpenGL/GLDebugDrawer.h"
#include "btCudaBroadphase.h"
#ifndef CUDART_PI_F
#define CUDART_PI_F 3.141592654f
#endif
#define USE_BULLET 1
#define VEL_DIR_FACT (30.0F)
#define ACC_DIR_FACT (VEL_DIR_FACT*VEL_DIR_FACT)
#define VEL_INV_FACT (1.0F/VEL_DIR_FACT)
#define ACC_INV_FACT (1.0F/ACC_DIR_FACT)
GLDebugDrawer debugDrawer;
ParticleSystem::ParticleSystem(uint numParticles, uint3 gridSize) :
m_bInitialized(false),
m_numParticles(numParticles),
m_hPos(0),
m_hVel(0),
m_currentPosRead(0),
m_currentVelRead(0),
m_currentPosWrite(1),
m_currentVelWrite(1),
m_gridSize(gridSize),
m_maxParticlesPerCell(4),
m_timer(0),
m_solverIterations(1),
// m_simulationMode(SIMULATION_CUDA)
m_simulationMode(SIMULATION_BULLET_CPU)
{
m_dPos[0] = m_dPos[1] = 0;
m_dVel[0] = m_dVel[1] = 0;
m_numGridCells = m_gridSize.x*m_gridSize.y*m_gridSize.z;
float3 worldSize = make_float3(2.0f, 2.0f, 2.0f);
// set simulation parameters
m_params.gridSize = m_gridSize;
m_params.numCells = m_numGridCells;
m_params.numBodies = m_numParticles;
m_params.maxParticlesPerCell = m_maxParticlesPerCell;
m_params.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f);
m_params.cellSize = make_float3(worldSize.x / m_gridSize.x, worldSize.y / m_gridSize.y, worldSize.z / m_gridSize.z);
m_params.particleRadius = m_params.cellSize.x * 0.5f;
m_params.colliderPos = make_float4(0.0f, -0.7f, 0.0f, 1.0f);
m_params.colliderRadius = 0.2f;
m_params.spring = 0.5f;
m_params.damping = 0.02f;
m_params.shear = 0.1f;
m_params.attraction = 0.0f;
m_params.boundaryDamping = -0.5f;
m_params.gravity = make_float3(0.0f, -0.0003f, 0.0f);
m_params.globalDamping = 1.0f;
_initialize(numParticles);
#if USE_BULLET
initializeBullet();
#endif
}
ParticleSystem::~ParticleSystem()
{
#if USE_BULLET
finalizeBullet();
#endif
_finalize();
m_numParticles = 0;
}
uint
ParticleSystem::createVBO(uint size)
{
GLuint vbo;
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
registerGLBufferObject(vbo);
return vbo;
}
inline float lerp(float a, float b, float t)
{
return a + t*(b-a);
}
void colorRamp(float t, float *r)
{
const int ncolors = 7;
float c[ncolors][3] = {
{ 1.0, 0.0, 0.0, },
{ 1.0, 0.5, 0.0, },
{ 1.0, 1.0, 0.0, },
{ 0.0, 1.0, 0.0, },
{ 0.0, 1.0, 1.0, },
{ 0.0, 0.0, 1.0, },
{ 1.0, 0.0, 1.0, },
};
t = t * (ncolors-1);
int i = (int) t;
float u = t - floor(t);
r[0] = lerp(c[i][0], c[i+1][0], u);
r[1] = lerp(c[i][1], c[i+1][1], u);
r[2] = lerp(c[i][2], c[i+1][2], u);
}
void
ParticleSystem::_initialize(int numParticles)
{
assert(!m_bInitialized);
m_numParticles = numParticles;
// allocate host storage
m_hPos = new float[m_numParticles*4];
m_hVel = new float[m_numParticles*4];
memset(m_hPos, 0, m_numParticles*4*sizeof(float));
memset(m_hVel, 0, m_numParticles*4*sizeof(float));
m_hGridCounters = new uint[m_numGridCells];
m_hGridCells = new uint[m_numGridCells*m_maxParticlesPerCell];
memset(m_hGridCounters, 0, m_numGridCells*sizeof(uint));
memset(m_hGridCells, 0, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
m_hParticleHash = new uint[m_numParticles*2];
memset(m_hParticleHash, 0, m_numParticles*2*sizeof(uint));
m_hCellStart = new uint[m_numGridCells];
memset(m_hCellStart, 0, m_numGridCells*sizeof(uint));
// allocate GPU data
unsigned int memSize = sizeof(float) * 4 * m_numParticles;
m_posVbo[0] = createVBO(memSize);
m_posVbo[1] = createVBO(memSize);
allocateArray((void**)&m_dVel[0], memSize);
allocateArray((void**)&m_dVel[1], memSize);
allocateArray((void**)&m_dSortedPos, memSize);
allocateArray((void**)&m_dSortedVel, memSize);
#if USE_SORT
allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint));
allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint));
allocateArray((void**)&m_dCellStart, m_numGridCells*sizeof(uint));
#else
allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint));
allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
#endif
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
#if 1
// fill color buffer
glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO);
float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
float *ptr = data;
for(uint i=0; i<m_numParticles; i++) {
float t = i / (float) m_numParticles;
#if 0
*ptr++ = rand() / (float) RAND_MAX;
*ptr++ = rand() / (float) RAND_MAX;
*ptr++ = rand() / (float) RAND_MAX;
#else
colorRamp(t, ptr);
ptr+=3;
#endif
*ptr++ = 1.0f;
}
glUnmapBufferARB(GL_ARRAY_BUFFER);
#endif
// CUT_SAFE_CALL(cutCreateTimer(&m_timer));
setParameters(&m_params);
m_bInitialized = true;
}
void
ParticleSystem::_finalize()
{
assert(m_bInitialized);
delete [] m_hPos;
delete [] m_hVel;
delete [] m_hGridCounters;
delete [] m_hGridCells;
freeArray(m_dVel[0]);
freeArray(m_dVel[1]);
freeArray(m_dSortedPos);
freeArray(m_dSortedVel);
#if USE_SORT
freeArray(m_dParticleHash[0]);
freeArray(m_dParticleHash[1]);
freeArray(m_dCellStart);
#else
freeArray(m_dGridCounters);
freeArray(m_dGridCells);
#endif
unregisterGLBufferObject(m_posVbo[0]);
unregisterGLBufferObject(m_posVbo[1]);
glDeleteBuffers(2, (const GLuint*)m_posVbo);
glDeleteBuffers(1, (const GLuint*)&m_colorVBO);
}
void
ParticleSystem::update(float deltaTime)
{
assert(m_bInitialized);
#if USE_BULLET
switch (m_simulationMode)
{
case SIMULATION_CUDA:
{
updateCuda(deltaTime);
break;
}
case SIMULATION_BULLET_CPU:
{
updateBullet(deltaTime);
break;
}
default:
{
printf("unknown simulation method\n");
}
}
#else
updateCuda(deltaTime);
#endif
}
void
ParticleSystem::updateBullet(float deltaTime)
{
float* hPos = copyBuffersFromDeviceToHost();
float* hVel = m_hVel;
for (uint i=0;i<m_params.numBodies;i++)
{
float3 pos;
pos.x = hPos[i*4];
pos.y = hPos[i*4+1];
pos.z = hPos[i*4+2];
float3 vel;
vel.x = hVel[i*4];
vel.y = hVel[i*4+1];
vel.z = hVel[i*4+2];
// if (pos.x > 1.0f - m_params.particleRadius) { pos.x = 1.0f - m_params.particleRadius; vel.x *= m_params.boundaryDamping; }
// if (pos.x < -1.0f + m_params.particleRadius) { pos.x = -1.0f + m_params.particleRadius; vel.x *= m_params.boundaryDamping;}
// if (pos.y > 1.0f - m_params.particleRadius) { pos.y = 1.0f - m_params.particleRadius; vel.y *= m_params.boundaryDamping; }
// if (pos.y < -1.0f + m_params.particleRadius) { pos.y = -1.0f + m_params.particleRadius; vel.y *= m_params.boundaryDamping;}
// if (pos.z > 1.0f - m_params.particleRadius) { pos.z = 1.0f - m_params.particleRadius; vel.z *= m_params.boundaryDamping; }
// if (pos.z < -1.0f + m_params.particleRadius) { pos.z = -1.0f + m_params.particleRadius; vel.z *= m_params.boundaryDamping;}
btTransform& trans = m_bulletParticles[i]->getWorldTransform();
trans.setOrigin(btVector3(pos.x, pos.y, pos.z));
m_bulletParticles[i]->setLinearVelocity(btVector3(vel.x, vel.y, vel.z)*btScalar(VEL_DIR_FACT));
m_bulletParticles[i]->setAngularVelocity(btVector3(0,0,0));
}
glUnmapBufferARB(GL_ARRAY_BUFFER);
std::swap(m_currentPosRead, m_currentPosWrite);
std::swap(m_currentVelRead, m_currentVelWrite);
btTransform& collTrans = m_bulletCollider->getWorldTransform();
collTrans.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z));
m_dynamicsWorld->stepSimulation(deltaTime);
glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
hPos = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY);
//sync transform and velocity from Bullet to particle system
for (uint i=0;i<m_params.numBodies;i++)
{
btTransform& trans = m_bulletParticles[i]->getWorldTransform();
hPos[i*4] = trans.getOrigin().getX();
hPos[i*4+1] = trans.getOrigin().getY();
hPos[i*4+2] = trans.getOrigin().getZ();
hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX() * VEL_INV_FACT;
hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY() * VEL_INV_FACT;
hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ() * VEL_INV_FACT;
}
copyBuffersFromHostToDevice();
collTrans = m_bulletCollider->getWorldTransform();
m_params.colliderPos.x = collTrans.getOrigin().getX();
m_params.colliderPos.y = collTrans.getOrigin().getY();
m_params.colliderPos.z = collTrans.getOrigin().getZ();
}
void
ParticleSystem::updateCuda(float deltaTime)
{
#ifndef BT_NO_PROFILE
CProfileManager::Reset();
#endif //BT_NO_PROFILE
BT_PROFILE("update CUDA");
// update constants
setParameters(&m_params);
// integrate
{
BT_PROFILE("integrate");
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
deltaTime,
m_numParticles);
}
std::swap(m_currentPosRead, m_currentPosWrite);
std::swap(m_currentVelRead, m_currentVelWrite);
#if USE_SORT
// sort and search method
// calculate hash
{
BT_PROFILE("calcHash");
calcHash(m_posVbo[m_currentPosRead],
m_dParticleHash[0],
m_numParticles);
}
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// sort particles based on hash
{
BT_PROFILE("RadixSort");
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
}
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash sorted:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// reorder particle arrays into sorted order and
// find start of each cell
{
BT_PROFILE("reorder");
reorderDataAndFindCellStart(m_dParticleHash[0],
m_posVbo[m_currentPosRead],
m_dVel[m_currentVelRead],
m_dSortedPos,
m_dSortedVel,
m_dCellStart,
m_numParticles,
m_numGridCells);
}
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_numGridCells);
printf("cell start:\n");
for(uint i=0; i<m_numGridCells; i++) {
printf("%d: %d\n", i, m_hCellStart[i]);
}
#endif
#else
// update grid using atomics
updateGrid(m_posVbo[m_currentPosRead],
m_dGridCounters,
m_dGridCells,
m_numParticles,
m_numGridCells);
#endif
// process collisions
{
BT_PROFILE("collide");
for(uint i=0; i<m_solverIterations; i++) {
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dSortedPos, m_dSortedVel,
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
m_dGridCounters,
m_dGridCells,
m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_numGridCells,
m_maxParticlesPerCell
);
std::swap(m_currentVelRead, m_currentVelWrite);
}
}
#ifndef BT_NO_PROFILE
CProfileManager::Increment_Frame_Counter();
#endif //BT_NO_PROFILE
}
void
ParticleSystem::dumpGrid()
{
// debug
copyArrayFromDevice(m_hGridCounters, m_dGridCounters, 0, sizeof(uint)*m_numGridCells);
copyArrayFromDevice(m_hGridCells, m_dGridCells, 0, sizeof(uint)*m_numGridCells*m_maxParticlesPerCell);
uint total = 0;
uint maxPerCell = 0;
for(uint i=0; i<m_numGridCells; i++) {
if (m_hGridCounters[i] > maxPerCell)
maxPerCell = m_hGridCounters[i];
if (m_hGridCounters[i] > 0) {
printf("%d (%d): ", i, m_hGridCounters[i]);
for(uint j=0; j<m_hGridCounters[i]; j++) {
printf("%d ", m_hGridCells[i*m_maxParticlesPerCell + j]);
}
total += m_hGridCounters[i];
printf("\n");
}
}
printf("max per cell = %d\n", maxPerCell);
printf("total = %d\n", total);
}
void
ParticleSystem::dumpParticles(uint start, uint count)
{
// debug
copyArrayFromDevice(m_hPos, 0, m_posVbo[m_currentPosRead], sizeof(float)*4*count);
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*count);
for(uint i=start; i<start+count; i++) {
// printf("%d: ", i);
printf("pos: (%.4f, %.4f, %.4f, %.4f)\n", m_hPos[i*4+0], m_hPos[i*4+1], m_hPos[i*4+2], m_hPos[i*4+3]);
printf("vel: (%.4f, %.4f, %.4f, %.4f)\n", m_hVel[i*4+0], m_hVel[i*4+1], m_hVel[i*4+2], m_hVel[i*4+3]);
}
}
float*
ParticleSystem::getArray(ParticleArray array)
{
assert(m_bInitialized);
float* hdata = 0;
float* ddata = 0;
unsigned int vbo = 0;
switch (array)
{
default:
case POSITION:
hdata = m_hPos;
ddata = m_dPos[m_currentPosRead];
vbo = m_posVbo[m_currentPosRead];
break;
case VELOCITY:
hdata = m_hVel;
ddata = m_dVel[m_currentVelRead];
break;
}
copyArrayFromDevice(hdata, ddata, vbo, m_numParticles*4*sizeof(float));
return hdata;
}
void
ParticleSystem::setArray(ParticleArray array, const float* data, int start, int count)
{
assert(m_bInitialized);
switch (array)
{
default:
case POSITION:
{
unregisterGLBufferObject(m_posVbo[m_currentPosRead]);
glBindBuffer(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
glBufferSubData(GL_ARRAY_BUFFER, start*4*sizeof(float), count*4*sizeof(float), data);
glBindBuffer(GL_ARRAY_BUFFER, 0);
registerGLBufferObject(m_posVbo[m_currentPosRead]);
}
break;
case VELOCITY:
copyArrayToDevice(m_dVel[m_currentVelRead], data, start*4*sizeof(float), count*4*sizeof(float));
break;
}
}
inline float frand()
{
return rand() / (float) RAND_MAX;
}
void
ParticleSystem::initGrid(uint *size, float spacing, float jitter, uint numParticles)
{
srand(1973);
for(uint z=0; z<size[2]; z++) {
for(uint y=0; y<size[1]; y++) {
for(uint x=0; x<size[0]; x++) {
uint i = (z*size[1]*size[0]) + (y*size[0]) + x;
if (i < numParticles) {
m_hPos[i*4] = (spacing * x) + m_params.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+1] = (spacing * y) + m_params.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+2] = (spacing * z) + m_params.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+3] = 1.0f;
m_hVel[i*4] = 0.0f;
m_hVel[i*4+1] = 0.0f;
m_hVel[i*4+2] = 0.0f;
m_hVel[i*4+3] = 0.0f;
}
}
}
}
}
void
ParticleSystem::reset(ParticleConfig config)
{
switch(config)
{
default:
case CONFIG_RANDOM:
{
int p = 0, v = 0;
for(uint i=0; i < m_numParticles; i++)
{
float point[3];
point[0] = frand();
point[1] = frand();
point[2] = frand();
m_hPos[p++] = 2 * (point[0] - 0.5f);
m_hPos[p++] = 2 * (point[1] - 0.5f);
m_hPos[p++] = 2 * (point[2] - 0.5f);
m_hPos[p++] = 1.0f; // radius
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
}
}
break;
case CONFIG_GRID:
{
float jitter = m_params.particleRadius*0.01f;
uint s = (int) ceilf(powf((float) m_numParticles, 1.0f / 3.0f));
uint gridSize[3];
gridSize[0] = gridSize[1] = gridSize[2] = s;
initGrid(gridSize, m_params.particleRadius*2.0f, jitter, m_numParticles);
}
break;
}
setArray(POSITION, m_hPos, 0, m_numParticles);
setArray(VELOCITY, m_hVel, 0, m_numParticles);
}
void
ParticleSystem::addSphere(int start, float *pos, float *vel, int r, float spacing)
{
uint index = start;
for(int z=-r; z<=r; z++) {
for(int y=-r; y<=r; y++) {
for(int x=-r; x<=r; x++) {
float dx = x*spacing;
float dy = y*spacing;
float dz = z*spacing;
float l = sqrtf(dx*dx + dy*dy + dz*dz);
if ((l <= m_params.particleRadius*2.0f*r) && (index < m_numParticles)) {
m_hPos[index*4] = pos[0] + dx;
m_hPos[index*4+1] = pos[1] + dy;
m_hPos[index*4+2] = pos[2] + dz;
m_hPos[index*4+3] = pos[3];
m_hVel[index*4] = vel[0];
m_hVel[index*4+1] = vel[1];
m_hVel[index*4+2] = vel[2];
m_hVel[index*4+3] = vel[3];
index++;
}
}
}
}
setArray(POSITION, m_hPos, start, index);
setArray(VELOCITY, m_hVel, start, index);
}
void ParticleSystem::initializeBullet()
{
m_collisionConfiguration = new btDefaultCollisionConfiguration();
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
// m_broadphase = new btDbvtBroadphase();
// m_broadphase = new btAxisSweep3(btVector3(-3,-3,-3),btVector3(3,3,3));
m_broadphase = new btCudaBroadphase(btVector3(-1, -1, -1), btVector3(1, 1, 1), 64, 64, 64, m_params.numBodies, 16, 64, 8, btScalar(1.0f/1.733f));
// m_broadphase = new bt3DGridBroadphase(btVector3(-1, -1, -1), btVector3(1, 1, 1), 64, 64, 64, m_params.numBodies, 16, 64, 8, btScalar(1.0f/1.733f));
m_constraintSolver=new btSequentialImpulseConstraintSolver();
m_dynamicsWorld = new btDiscreteDynamicsWorld(m_dispatcher,m_broadphase,m_constraintSolver,m_collisionConfiguration);
m_dynamicsWorld->setDebugDrawer(&debugDrawer);
//debugDrawer.setDebugMode(btIDebugDraw::DBG_DrawPairs);
// m_dynamicsWorld->setGravity(100*btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z));
m_dynamicsWorld->setGravity(btScalar(ACC_DIR_FACT) * btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z));
m_dynamicsWorld->getSolverInfo().m_numIterations=1;
btRigidBody* body;
btCollisionShape* boxShape = new btBoxShape(btVector3(btScalar(1.2),btScalar(0.05),btScalar(1.2)));
// boxShape->setMargin(0.03f);
btScalar mass(0.);
btVector3 localInertia(0,0,0);
btRigidBody::btRigidBodyConstructionInfo boxRbcInfo(mass, 0, boxShape, localInertia);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, -1.05f,0));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 1.05f,0));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI);
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(-1.05f, 0, 0));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI);
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(1.05f, 0, 0));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0);
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, -1.05f));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
boxRbcInfo.m_startWorldTransform.setIdentity();
boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0);
boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, 1.05f));
boxRbcInfo.m_friction = 0.0f;
body = new btRigidBody(boxRbcInfo);
m_dynamicsWorld->addRigidBody(body);
unsigned int i;
btSphereShape* particleSphere = new btSphereShape(m_params.particleRadius);
particleSphere->setMargin(0.0);
particleSphere->calculateLocalInertia(1,localInertia);
reset(CONFIG_GRID);
for (i=0;i<m_params.numBodies;i++)
{
btRigidBody::btRigidBodyConstructionInfo rbci(1.,0,particleSphere,localInertia);
rbci.m_startWorldTransform.setOrigin(btVector3(m_hPos[i*4],m_hPos[i*4+1],m_hPos[i*4+2]));
body = new btRigidBody(rbci);
body->setActivationState(DISABLE_DEACTIVATION);
m_bulletParticles.push_back(body);
m_dynamicsWorld->addRigidBody(body);
}
btSphereShape* colliderSphere = new btSphereShape(m_params.colliderRadius);
colliderSphere->setMargin(0.0);
colliderSphere->calculateLocalInertia(10., localInertia);
btRigidBody::btRigidBodyConstructionInfo rbci(5., 0, colliderSphere,localInertia);
rbci.m_startWorldTransform.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z));
body = new btRigidBody(rbci);
body->setActivationState(DISABLE_DEACTIVATION);
m_bulletCollider = body;
m_dynamicsWorld->addRigidBody(body);
/* for (i=0;i<6;i++)
{
btVector4 planeEq;
worldBox->getPlaneEquation(planeEq,i);
planeShape = new btStaticPlaneShape(-planeEq,planeEq.getW());
planeShape->setMargin(0.f);
btRigidBody::btRigidBodyConstructionInfo rbci(0.f,0,planeShape);
body = new btRigidBody(rbci);
m_dynamicsWorld->addRigidBody(body);
}
*/
}
void ParticleSystem::finalizeBullet()
{
delete m_dynamicsWorld;
delete m_constraintSolver;
delete m_broadphase;
delete m_dispatcher ;
delete m_collisionConfiguration;
}
float* ParticleSystem::copyBuffersFromDeviceToHost()
{
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*m_numParticles);
// fill color buffer
glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
float* hPosData = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY);
return hPosData;
}
void ParticleSystem::copyBuffersFromHostToDevice()
{
glUnmapBufferARB(GL_ARRAY_BUFFER);
copyArrayToDevice(m_dVel[m_currentVelRead],m_hVel, 0, sizeof(float)*4*m_numParticles);
}
void ParticleSystem::debugDraw()
{
#if USE_BULLET
glDisable(GL_DEPTH_TEST);
m_dynamicsWorld->debugDrawWorld();
glEnable(GL_DEPTH_TEST);
#endif
}

View File

@@ -1,336 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
//#include <cutil.h>
#include <cstdlib>
#include <cstdio>
#include <string.h>
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include <cuda_gl_interop.h>
#include "particles_kernel.cu"
//#include "radixsort.cu"
//! Check for CUDA error
# define CUT_CHECK_ERROR(errorMessage) do { \
cudaError_t err = cudaGetLastError(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
mm_exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
mm_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
mm_exit(EXIT_FAILURE); \
} } while (0)
# define MY_CUDA_SAFE_CALL( call) do { \
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
mm_exit(EXIT_FAILURE); \
} } while (0)
extern "C"
{
void mm_exit(int val)
{
exit(val);
}
void cudaInit(int argc, char **argv)
{
// CUT_DEVICE_INIT(argc, argv);
}
void allocateArray(void **devPtr, size_t size)
{
MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size));
}
void freeArray(void *devPtr)
{
MY_CUDA_SAFE_CALL(cudaFree(devPtr));
}
void threadSync()
{
MY_CUDA_SAFE_CALL(cudaThreadSynchronize());
}
void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size)
{
if (vbo)
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&device, vbo));
MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
if (vbo)
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vbo));
}
void copyArrayToDevice(void* device, const void* host, int offset, int size)
{
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
}
void registerGLBufferObject(uint vbo)
{
MY_CUDA_SAFE_CALL(cudaGLRegisterBufferObject(vbo));
}
void unregisterGLBufferObject(uint vbo)
{
MY_CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(vbo));
}
void setParameters(SimParams *hostParams)
{
// copy parameters to constant memory
MY_CUDA_SAFE_CALL( cudaMemcpyToSymbol(params, hostParams, sizeof(SimParams)) );
}
//Round a / b to nearest higher integer value
int iDivUp(int a, int b){
return (a % b != 0) ? (a / b + 1) : (a / b);
}
// compute grid and thread block size for a given number of elements
void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads)
{
numThreads = min(blockSize, n);
numBlocks = iDivUp(n, numThreads);
}
void
integrateSystem(uint vboOldPos, uint vboNewPos,
float* oldVel, float* newVel,
float deltaTime,
int numBodies)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *oldPos, *newPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
// execute the kernel
integrate<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
(float4*)oldPos, (float4*)oldVel,
deltaTime);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("integrate kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
}
void
updateGrid(uint vboPos,
uint* gridCounters,
uint* gridCells,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *pos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
MY_CUDA_SAFE_CALL(cudaMemset(gridCounters, 0, numCells*sizeof(uint)));
// execute the kernel
updateGridD<<< numBlocks, numThreads >>>((float4 *) pos,
gridCounters,
gridCells);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
}
void
calcHash(uint vboPos,
uint* particleHash,
int numBodies)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
float *pos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&pos, vboPos));
// execute the kernel
calcHashD<<< numBlocks, numThreads >>>((float4 *) pos,
(uint2 *) particleHash);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboPos));
}
void
reorderDataAndFindCellStart(uint* particleHash,
uint vboOldPos,
float* oldVel,
float* sortedPos,
float* sortedVel,
uint* cellStart,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
float *oldPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
#endif
reorderDataAndFindCellStartD<<< numBlocks, numThreads >>>(
(uint2 *) particleHash,
(float4 *) oldPos,
(float4 *) oldVel,
(float4 *) sortedPos,
(float4 *) sortedVel,
(uint *) cellStart);
CUT_CHECK_ERROR("Kernel execution failed: reorderDataAndFindCellStartD");
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
#endif
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
}
void
collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel,
float* oldVel, float* newVel,
uint* gridCounters,
uint* gridCells,
uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells,
uint maxParticlesPerCell)
{
float4 *oldPos, *newPos;
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&oldPos, vboOldPos));
MY_CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&newPos, vboNewPos));
#if USE_TEX
#if USE_SORT
// use sorted arrays
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, sortedPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, sortedVel, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, particleHashTex, particleHash, numBodies*sizeof(uint2)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, cellStartTex, cellStart, numCells*sizeof(uint)));
#else
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldPosTex, oldPos, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, oldVelTex, oldVel, numBodies*sizeof(float4)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCountersTex, gridCounters,numCells*sizeof(uint)));
MY_CUDA_SAFE_CALL(cudaBindTexture(0, gridCellsTex, gridCells, numCells*maxParticlesPerCell*sizeof(uint)));
#endif
#endif
// thread per particle
int numThreads, numBlocks;
computeGridSize(numBodies, BLOCKDIM, numBlocks, numThreads);
// execute the kernel
collideD<<< numBlocks, numThreads >>>((float4*)newPos, (float4*)newVel,
#if USE_SORT
(float4*)sortedPos, (float4*)sortedVel,
(uint2 *) particleHash,
cellStart
#else
(float4*)oldPos, (float4*)oldVel,
gridCounters,
gridCells
#endif
);
// check if kernel invocation generated an error
CUT_CHECK_ERROR("Kernel execution failed");
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboNewPos));
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
#if USE_TEX
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldPosTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(oldVelTex));
#if USE_SORT
MY_CUDA_SAFE_CALL(cudaUnbindTexture(particleHashTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(cellStartTex));
#else
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCountersTex));
MY_CUDA_SAFE_CALL(cudaUnbindTexture(gridCellsTex));
#endif
#endif
}
} // extern "C"

View File

@@ -1,57 +0,0 @@
extern "C"
{
void cudaInit(int argc, char **argv);
void allocateArray(void **devPtr, int size);
void freeArray(void *devPtr);
void threadSync();
void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int size);
void copyArrayToDevice(void* device, const void* host, int offset, int size);
void registerGLBufferObject(unsigned int vbo);
void unregisterGLBufferObject(unsigned int vbo);
void setParameters(SimParams *hostParams);
void
integrateSystem(uint vboOldPos, uint vboNewPos,
float* oldVel, float* newVel,
float deltaTime,
int numBodies);
void
updateGrid(uint vboPos,
uint* gridCounters,
uint* gridCells,
uint numBodies,
uint numCells);
void
calcHash(uint vboPos,
uint* particleHash,
int numBodies);
void
reorderDataAndFindCellStart(uint* particleHash,
uint vboOldPos,
float* oldVel,
float* sortedPos,
float* sortedVel,
uint* cellStart,
uint numBodies,
uint numCells);
void
collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel,
float* oldVel, float* newVel,
uint* gridCounters,
uint* gridCells,
uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells,
uint maxParticlesPerCell);
}

View File

@@ -1,193 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef __BODYSYSTEMCUDA_H__
#define __BODYSYSTEMCUDA_H__
#define DEBUG_GRID 0
#define DO_TIMING 0
#include "particles_kernel.cuh"
#include "vector_functions.h"
#include "LinearMath/btAlignedObjectArray.h"
// CUDA BodySystem: runs on the GPU
class ParticleSystem
{
public:
ParticleSystem(uint numParticles, uint3 gridSize);
~ParticleSystem();
enum ParticleConfig
{
CONFIG_RANDOM,
CONFIG_GRID,
_NUM_CONFIGS
};
enum ParticleArray
{
POSITION,
VELOCITY,
};
enum SimulationMode
{
SIMULATION_CUDA,
SIMULATION_BULLET_CPU,
SIMULATION_NUM_MODES
};
void update(float deltaTime);
void updateCuda(float deltaTime);
void updateBullet(float deltaTime);
void reset(ParticleConfig config);
float* getArray(ParticleArray array);
void setArray(ParticleArray array, const float* data, int start, int count);
int getNumParticles() const { return m_numParticles; }
unsigned int getCurrentReadBuffer() const { return m_posVbo[m_currentPosRead]; }
unsigned int getColorBuffer() const { return m_colorVBO; }
void dumpGrid();
void dumpParticles(uint start, uint count);
void setIterations(int i) { m_solverIterations = i; }
void setDamping(float x) { m_params.globalDamping = x; }
void setGravity(float x) { m_params.gravity = make_float3(0.0f, x, 0.0f); }
void setCollideSpring(float x) { m_params.spring = x; }
void setCollideDamping(float x) { m_params.damping = x; }
void setCollideShear(float x) { m_params.shear = x; }
void setCollideAttraction(float x) { m_params.attraction = x; }
void setColliderPos(float4 x) { m_params.colliderPos = x; }
float getParticleRadius() { return m_params.particleRadius; }
float4 getColliderPos() { return m_params.colliderPos; }
float getColliderRadius() { return m_params.colliderRadius; }
uint3 getGridSize() { return m_params.gridSize; }
float3 getWorldOrigin() { return m_params.worldOrigin; }
float3 getCellSize() { return m_params.cellSize; }
void addSphere(int index, float *pos, float *vel, int r, float spacing);
SimulationMode getSimulationMode() const
{
return m_simulationMode;
}
void setSimulationMode(SimulationMode mode)
{
m_simulationMode=mode;
}
void debugDraw();
protected: // methods
ParticleSystem() {}
uint createVBO(uint size);
void _initialize(int numParticles);
void _finalize();
void initGrid(uint *size, float spacing, float jitter, uint numParticles);
protected:
// Bullet data
void initializeBullet();
void finalizeBullet();
class btDiscreteDynamicsWorld* m_dynamicsWorld;
class btDefaultCollisionConfiguration* m_collisionConfiguration;
class btCollisionDispatcher* m_dispatcher;
// class btCudaBroadphase* m_broadphase;
class btBroadphaseInterface* m_broadphase;
class btSequentialImpulseConstraintSolver* m_constraintSolver;
btAlignedObjectArray<class btRigidBody*> m_bulletParticles;
btRigidBody* m_bulletCollider;
float* copyBuffersFromDeviceToHost();
void copyBuffersFromHostToDevice();
protected: // data
bool m_bInitialized;
uint m_numParticles;
// CPU data
float* m_hPos;
float* m_hVel;
uint* m_hGridCounters;
uint* m_hGridCells;
uint* m_hParticleHash;
uint* m_hCellStart;
// GPU data
float* m_dPos[2];
float* m_dVel[2];
float* m_dSortedPos;
float* m_dSortedVel;
// uniform grid data
uint* m_dGridCounters; // counts number of entries per grid cell
uint* m_dGridCells; // contains indices of up to "m_maxParticlesPerCell" particles per cell
uint* m_dParticleHash[2];
uint* m_dCellStart;
uint m_posVbo[2];
uint m_colorVBO;
uint m_currentPosRead, m_currentVelRead;
uint m_currentPosWrite, m_currentVelWrite;
// params
SimParams m_params;
uint3 m_gridSize;
uint m_numGridCells;
uint m_maxParticlesPerCell;
uint m_timer;
uint m_solverIterations;
SimulationMode m_simulationMode;
};
#endif // __BODYSYSTEMCUDA_H__

View File

@@ -1,571 +0,0 @@
/*
* Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/*
Particle system example with collisions using uniform grid
*/
#include <cstdlib>
#include <cstdio>
#include <algorithm>
#include <math.h>
//#include <cutil.h>
#include <GL/glew.h>
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#include "LinearMath/btQuickprof.h"
#include "particleSystem.h"
#include "render_particles.h"
#include "paramgl.h"
// view params
int ox, oy;
int buttonState = 0;
float camera_trans[] = {0, 0, -3};
float camera_rot[] = {0, 0, 0};
float camera_trans_lag[] = {0, 0, -3};
float camera_rot_lag[] = {0, 0, 0};
const float inertia = 0.1;
ParticleRenderer::DisplayMode displayMode = ParticleRenderer::PARTICLE_SPHERES;
int mode = 0;
bool displayEnabled = true;
bool bPause = false;
bool displaySliders = false;
bool wireframe = false;
enum { M_VIEW = 0, M_MOVE };
uint numParticles = 0;
uint3 gridSize;
int numIterations = 0; // run until exit
// simulation parameters
float timestep = 0.5f;
float damping = 1.0f;
float gravity = 0.0003f;
int iterations = 1;
int ballr = 10;
float collideSpring = 0.5f;;
float collideDamping = 0.02f;;
float collideShear = 0.1f;
float collideAttraction = 0.0f;
ParticleSystem *psystem = 0;
// fps
static int fpsCount = 0;
static int fpsLimit = 1;
unsigned int timer;
ParticleRenderer *renderer = 0;
float modelView[16];
ParamListGL *params;
extern "C" void cudaInit(int argc, char **argv);
void init(int numParticles, uint3 gridSize)
{
psystem = new ParticleSystem(numParticles, gridSize);
psystem->reset(ParticleSystem::CONFIG_GRID);
renderer = new ParticleRenderer;
renderer->setParticleRadius(psystem->getParticleRadius());
renderer->setColorBuffer(psystem->getColorBuffer());
// CUT_SAFE_CALL(cutCreateTimer(&timer));
}
void initGL()
{
glewInit();
if (!glewIsSupported("GL_VERSION_2_0 GL_VERSION_1_5 GL_ARB_multitexture GL_ARB_vertex_buffer_object")) {
fprintf(stderr, "Required OpenGL extensions missing.");
exit(-1);
}
glEnable(GL_DEPTH_TEST);
glClearColor(0.25, 0.25, 0.25, 1.0);
glutReportErrors();
}
void display()
{
// update the simulation
if (!bPause)
{
psystem->setIterations(iterations);
psystem->setDamping(damping);
psystem->setGravity(-gravity);
psystem->setCollideSpring(collideSpring);
psystem->setCollideDamping(collideDamping);
psystem->setCollideShear(collideShear);
psystem->setCollideAttraction(collideAttraction);
psystem->update(timestep);
renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles());
}
// render
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// view transform
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
for (int c = 0; c < 3; ++c)
{
camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia;
camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia;
}
glTranslatef(camera_trans_lag[0], camera_trans_lag[1], camera_trans_lag[2]);
glRotatef(camera_rot_lag[0], 1.0, 0.0, 0.0);
glRotatef(camera_rot_lag[1], 0.0, 1.0, 0.0);
glGetFloatv(GL_MODELVIEW_MATRIX, modelView);
// cube
glColor3f(1.0, 1.0, 1.0);
glutWireCube(2.0);
// collider
glPushMatrix();
float4 p = psystem->getColliderPos();
glTranslatef(p.x, p.y, p.z);
glColor3f(1.0, 0.0, 0.0);
glutSolidSphere(psystem->getColliderRadius(), 20, 10);
glPopMatrix();
if (displayEnabled)
{
renderer->display(displayMode);
}
if (displaySliders) {
glDisable(GL_DEPTH_TEST);
glBlendFunc(GL_ONE_MINUS_DST_COLOR, GL_ZERO); // invert color
glEnable(GL_BLEND);
params->Render(0, 0);
glDisable(GL_BLEND);
glEnable(GL_DEPTH_TEST);
}
psystem->debugDraw();
glDisable(GL_DEPTH_TEST);
float offsX = 10.f;
float offsY = 10.f;
renderer->showProfileInfo(offsX, offsY, 20.f);
glEnable(GL_DEPTH_TEST);
glutSwapBuffers();
{
char fps[256];
//float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f);
switch (psystem->getSimulationMode())
{
case ParticleSystem::SIMULATION_CUDA:
{
sprintf(fps, "CUDA particles (%d particles)", numParticles);
break;
}
case ParticleSystem::SIMULATION_BULLET_CPU:
{
sprintf(fps, "Bullet btCudaBroadphase (%d btSphereShapes)", numParticles);
break;
}
default:
{
sprintf(fps, "Unknown simulation mode");
}
}
glutSetWindowTitle(fps);
}
glutReportErrors();
}
void reshape(int w, int h)
{
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60.0, (float) w / (float) h, 0.1, 10.0);
glMatrixMode(GL_MODELVIEW);
glViewport(0, 0, w, h);
renderer->setWindowSize(w, h);
renderer->setFOV(60.0);
}
void mouse(int button, int state, int x, int y)
{
int mods;
if (state == GLUT_DOWN)
buttonState |= 1<<button;
else if (state == GLUT_UP)
buttonState = 0;
mods = glutGetModifiers();
if (mods & GLUT_ACTIVE_SHIFT) {
buttonState = 2;
} else if (mods & GLUT_ACTIVE_CTRL) {
buttonState = 3;
}
ox = x; oy = y;
if (displaySliders) {
if (params->Mouse(x, y, button, state)) {
glutPostRedisplay();
return;
}
}
glutPostRedisplay();
}
// transfrom vector by matrix
void xform(float *v, float *r, GLfloat *m)
{
r[0] = v[0]*m[0] + v[1]*m[4] + v[2]*m[8] + m[12];
r[1] = v[0]*m[1] + v[1]*m[5] + v[2]*m[9] + m[13];
r[2] = v[0]*m[2] + v[1]*m[6] + v[2]*m[10] + m[14];
}
// transform vector by transpose of matrix
void ixform(float *v, float *r, GLfloat *m)
{
r[0] = v[0]*m[0] + v[1]*m[1] + v[2]*m[2];
r[1] = v[0]*m[4] + v[1]*m[5] + v[2]*m[6];
r[2] = v[0]*m[8] + v[1]*m[9] + v[2]*m[10];
}
void ixformPoint(float *v, float *r, GLfloat *m)
{
float x[4];
x[0] = v[0] - m[12];
x[1] = v[1] - m[13];
x[2] = v[2] - m[14];
x[3] = 1.0f;
ixform(x, r, m);
}
void motion(int x, int y)
{
float dx, dy;
dx = x - ox;
dy = y - oy;
if (displaySliders) {
if (params->Motion(x, y)) {
ox = x; oy = y;
glutPostRedisplay();
return;
}
}
switch(mode)
{
case M_VIEW:
if (buttonState == 3) {
// left+middle = zoom
camera_trans[2] += (dy / 100.0) * 0.5 * fabs(camera_trans[2]);
}
else if (buttonState & 2) {
// middle = translate
camera_trans[0] += dx / 100.0;
camera_trans[1] -= dy / 100.0;
}
else if (buttonState & 1) {
// left = rotate
camera_rot[0] += dy / 5.0;
camera_rot[1] += dx / 5.0;
}
break;
case M_MOVE:
{
float translateSpeed = 0.003f;
float4 p = psystem->getColliderPos();
if (buttonState==1) {
float v[3], r[3];
v[0] = dx*translateSpeed;
v[1] = -dy*translateSpeed;
v[2] = 0.0f;
ixform(v, r, modelView);
p.x += r[0];
p.y += r[1];
p.z += r[2];
} else if (buttonState==2) {
float v[3], r[3];
v[0] = 0.0f;
v[1] = 0.0f;
v[2] = dy*translateSpeed;
ixform(v, r, modelView);
p.x += r[0];
p.y += r[1];
p.z += r[2];
}
psystem->setColliderPos(p);
}
break;
}
ox = x; oy = y;
glutPostRedisplay();
}
inline float frand()
{
return rand() / (float) RAND_MAX;
}
// commented out to remove unused parameter warnings in Linux
void key(unsigned char key, int /*x*/, int /*y*/)
{
#ifndef BT_NO_PROFILE
if (key >= 0x31 && key < 0x37)
{
int child = key-0x31;
renderer->m_profileIterator->Enter_Child(child);
}
if (key==0x30)
{
renderer->m_profileIterator->Enter_Parent();
}
#endif //BT_NO_PROFILE
switch (key)
{
case ' ':
bPause = !bPause;
break;
case 13:
psystem->update(timestep);
renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles());
break;
case '\033':
case 'q':
exit(0);
break;
case 'v':
mode = M_VIEW;
break;
case 'm':
mode = M_MOVE;
break;
case 'p':
displayMode = (ParticleRenderer::DisplayMode)
((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES);
break;
case 'd':
psystem->dumpGrid();
break;
case 'u':
psystem->dumpParticles(0, 1);
break;
case 'r':
displayEnabled = !displayEnabled;
break;
case 'g':
psystem->reset(ParticleSystem::CONFIG_GRID);
break;
case 'a':
psystem->reset(ParticleSystem::CONFIG_RANDOM);
break;
case 'e':
{
// inject a sphere of particles
float pr = psystem->getParticleRadius();
float tr = pr+(pr*2.0f)*ballr;
float pos[4], vel[4];
pos[0] = -1.0 + tr + frand()*(2.0f - tr*2.0f);
pos[1] = 1.0f - tr;
pos[2] = -1.0 + tr + frand()*(2.0f - tr*2.0f);
pos[3] = 0.0f;
vel[0] = vel[1] = vel[2] = vel[3] = 0.0f;
psystem->addSphere(0, pos, vel, ballr, pr*2.0f);
}
break;
case 'b':
{
// shoot ball from camera
float pr = psystem->getParticleRadius();
float vel[4], velw[4], pos[4], posw[4];
vel[0] = 0.0f;
vel[1] = 0.0f;
vel[2] = -0.05f;
vel[3] = 0.0f;
ixform(vel, velw, modelView);
pos[0] = 0.0f;
pos[1] = 0.0f;
pos[2] = -2.5f;
pos[3] = 1.0;
ixformPoint(pos, posw, modelView);
posw[3] = 0.0f;
psystem->addSphere(0, posw, velw, ballr, pr*2.0f);
}
break;
case 'w':
wireframe = !wireframe;
break;
case 'h':
displaySliders = !displaySliders;
break;
case 's':
psystem->setSimulationMode((ParticleSystem::SimulationMode) ((psystem->getSimulationMode() + 1) % ParticleSystem::SIMULATION_NUM_MODES));
CProfileManager::CleanupMemory();
break;
}
glutPostRedisplay();
}
void special(int k, int x, int y)
{
if (displaySliders) {
params->Special(k, x, y);
}
}
void idle(void)
{
glutPostRedisplay();
}
void initParams()
{
// create a new parameter list
params = new ParamListGL("misc");
params->AddParam(new Param<float>("time step", timestep, 0.0, 1.0, 0.01, &timestep));
params->AddParam(new Param<int>("iterations", iterations, 0, 10, 1, &iterations));
params->AddParam(new Param<float>("damping", damping, 0.0, 1.0, 0.001, &damping));
params->AddParam(new Param<float>("gravity", gravity, 0.0, 0.001, 0.0001, &gravity));
params->AddParam(new Param<int>("ball r", ballr, 1, 20, 1, &ballr));
params->AddParam(new Param<float>("collide spring", collideSpring, 0.0, 1.0, 0.001, &collideSpring));
params->AddParam(new Param<float>("collide damping", collideDamping, 0.0, 0.1, 0.001, &collideDamping));
params->AddParam(new Param<float>("collide shear", collideShear, 0.0, 0.1, 0.001, &collideShear));
params->AddParam(new Param<float>("collide attract", collideAttraction, 0.0, 0.1, 0.001, &collideAttraction));
}
void mainMenu(int i)
{
key((unsigned char) i, 0, 0);
}
void initMenus()
{
glutCreateMenu(mainMenu);
glutAddMenuEntry("Reset block [g]", 'g');
glutAddMenuEntry("Reset random [a]", 'a');
glutAddMenuEntry("Add sphere [e]", 'e');
glutAddMenuEntry("Shoot ball [b]", 'b');
glutAddMenuEntry("View mode [v]", 'v');
glutAddMenuEntry("Move cursor mode [m]", 'm');
glutAddMenuEntry("Toggle point rendering [p]", 'p');
glutAddMenuEntry("Toggle Bullet simulation[s]", 's');
glutAddMenuEntry("Toggle animation [ ]", ' ');
glutAddMenuEntry("Step animation [ret]", 13);
glutAddMenuEntry("Toggle sliders [h]", 'h');
glutAddMenuEntry("Quit (esc)", '\033');
glutAttachMenu(GLUT_RIGHT_BUTTON);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char** argv)
{
// numParticles = 65536*2;
// numParticles = 65536;
// numParticles = 32768;
// numParticles = 8192;
// numParticles = 4096;
numParticles = 2048;
// numParticles = 1024;
// numParticles = 256;
// numParticles = 32;
// numParticles = 2;
uint gridDim = 64;
numIterations = 0;
gridSize.x = gridSize.y = gridSize.z = gridDim;
printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z);
cudaInit(argc, argv);
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
glutInitWindowSize(640, 480);
glutCreateWindow("CUDA particles");
initGL();
init(numParticles, gridSize);
initParams();
initMenus();
glutDisplayFunc(display);
glutReshapeFunc(reshape);
glutMouseFunc(mouse);
glutMotionFunc(motion);
glutKeyboardFunc(key);
glutSpecialFunc(special);
glutIdleFunc(idle);
glutMainLoop();
if (psystem)
delete psystem;
return 0;
}

View File

@@ -1,228 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "particles", "particles.vcproj", "{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}"
ProjectSection(ProjectDependencies) = postProject
{F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165}
{7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3}
{61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319}
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcollision", "..\..\msvc\8\libbulletcollision.vcproj", "{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletmath", "..\..\msvc\8\libbulletmath.vcproj", "{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletdynamics", "..\..\msvc\8\libbulletdynamics.vcproj", "{61BD1097-CF2E-B296-DAA9-73A6FE135319}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcuda", "libbulletcuda.vcproj", "{F74E8E02-0B47-4816-BD0B-FAEAE3343165}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletopenglsupport", "..\..\msvc\8\libbulletopenglsupport.vcproj", "{7C428E76-9271-6284-20F0-9B38ED6931E3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "appBasicDemo", "..\..\msvc\8\appBasicDemo.vcproj", "{3578834A-4B06-DE6F-78AC-FE11F7226D35}"
ProjectSection(ProjectDependencies) = postProject
{F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165}
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}
{61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319}
{7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
DebugDll|Win32 = DebugDll|Win32
DebugDll|x64 = DebugDll|x64
DebugDoublePrecision|Win32 = DebugDoublePrecision|Win32
DebugDoublePrecision|x64 = DebugDoublePrecision|x64
EmuDebug|Win32 = EmuDebug|Win32
EmuDebug|x64 = EmuDebug|x64
EmuRelease|Win32 = EmuRelease|Win32
EmuRelease|x64 = EmuRelease|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
ReleaseDll|Win32 = ReleaseDll|Win32
ReleaseDll|x64 = ReleaseDll|x64
ReleaseDoublePrecision|Win32 = ReleaseDoublePrecision|Win32
ReleaseDoublePrecision|x64 = ReleaseDoublePrecision|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.ActiveCfg = Debug|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.Build.0 = Debug|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.ActiveCfg = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.Build.0 = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|Win32.ActiveCfg = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.ActiveCfg = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.Build.0 = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|Win32.ActiveCfg = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.ActiveCfg = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.Build.0 = Debug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.ActiveCfg = EmuDebug|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.Build.0 = EmuDebug|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.ActiveCfg = EmuDebug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.Build.0 = EmuDebug|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.ActiveCfg = EmuRelease|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.Build.0 = EmuRelease|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.ActiveCfg = EmuRelease|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.Build.0 = EmuRelease|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.ActiveCfg = Release|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.Build.0 = Release|Win32
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.ActiveCfg = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.Build.0 = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|Win32.ActiveCfg = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.ActiveCfg = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.Build.0 = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|Win32.ActiveCfg = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.ActiveCfg = Release|x64
{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.Build.0 = Release|x64
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.ActiveCfg = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.Build.0 = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|x64.ActiveCfg = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.Build.0 = DebugDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.Build.0 = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|x64.ActiveCfg = Debug|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.ActiveCfg = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.Build.0 = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|x64.ActiveCfg = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.ActiveCfg = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.Build.0 = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|x64.ActiveCfg = Release|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.ActiveCfg = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.Build.0 = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|x64.ActiveCfg = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.Build.0 = DebugDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.Build.0 = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|x64.ActiveCfg = Debug|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.ActiveCfg = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.Build.0 = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|x64.ActiveCfg = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.ActiveCfg = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.Build.0 = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|x64.ActiveCfg = Release|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.ActiveCfg = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.Build.0 = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|x64.ActiveCfg = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.Build.0 = DebugDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.Build.0 = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|x64.ActiveCfg = Debug|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.ActiveCfg = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.Build.0 = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|x64.ActiveCfg = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.ActiveCfg = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.Build.0 = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|x64.ActiveCfg = Release|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.ActiveCfg = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.Build.0 = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|x64.ActiveCfg = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.Build.0 = DebugDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.Build.0 = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|x64.ActiveCfg = Debug|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.ActiveCfg = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.Build.0 = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|x64.ActiveCfg = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.ActiveCfg = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.Build.0 = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|x64.ActiveCfg = Release|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.ActiveCfg = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.Build.0 = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|x64.ActiveCfg = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.Build.0 = DebugDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.Build.0 = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|x64.ActiveCfg = Debug|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.ActiveCfg = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.Build.0 = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|x64.ActiveCfg = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.ActiveCfg = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.Build.0 = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|x64.ActiveCfg = Release|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.ActiveCfg = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.Build.0 = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|x64.ActiveCfg = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.ActiveCfg = DebugDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.Build.0 = DebugDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|x64.ActiveCfg = DebugDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.ActiveCfg = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.Build.0 = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|x64.ActiveCfg = Debug|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.ActiveCfg = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.Build.0 = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|x64.ActiveCfg = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.ActiveCfg = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.Build.0 = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|x64.ActiveCfg = Release|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32
{3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -1,880 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="particles"
ProjectGUID="{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}"
RootNamespace="particles"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../DebugCudaParticles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/particles.pdb"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(CUDA_INC_PATH);./;../../common/inc"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil64D.lib glew64.lib glut64.lib"
OutputFile="../../bin/win64/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/particles.pdb"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_CONSOLE;_CRT_SECURE_NO_WARNINGS"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../ReleaseCudaParticles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="$(CUDA_INC_PATH);./;../../common/inc"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil64.lib glew64.lib glut64.lib"
OutputFile="../../bin/win64/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuDebug|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../../src;../../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil32D.lib glew32.lib"
OutputFile="../../bin/win32/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/particles.pdb"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuDebug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(CUDA_INC_PATH);./;../../common/inc"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil64D.lib glew64.lib glut64.lib"
OutputFile="../../bin/win64/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/particles.pdb"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuRelease|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../../src;../../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil32.lib glew32.lib"
OutputFile="../../bin/win32/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuRelease|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(CUDA_INC_PATH);./;../../common/inc"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib cutil64.lib glew64.lib glut64.lib"
OutputFile="../../bin/win64/$(ConfigurationName)/particles.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../common/lib"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="src"
Filter="cu;cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\paramgl.cpp"
>
</File>
<File
RelativePath=".\particles.cpp"
>
</File>
<File
RelativePath=".\particles_kernel.cu"
>
<FileConfiguration
Name="Debug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\particles_kernel.cuh"
>
</File>
<File
RelativePath=".\particleSystem.cpp"
>
</File>
<File
RelativePath=".\particleSystem.cu"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\particleSystem.cuh"
>
</File>
<File
RelativePath=".\particleSystem.h"
>
</File>
<File
RelativePath=".\render_particles.cpp"
>
</File>
<File
RelativePath=".\render_particles.h"
>
</File>
<File
RelativePath=".\shaders.cpp"
>
</File>
<File
RelativePath=".\shaders.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -1,381 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
/*
* Device code.
*/
#ifndef _PARTICLES_KERNEL_H_
#define _PARTICLES_KERNEL_H_
#include <stdio.h>
#include <math.h>
#include "cutil_math.h"
#include "math_constants.h"
#include "particles_kernel.cuh"
#if USE_TEX
// textures for particle position and velocity
texture<float4, 1, cudaReadModeElementType> oldPosTex;
texture<float4, 1, cudaReadModeElementType> oldVelTex;
texture<uint2, 1, cudaReadModeElementType> particleHashTex;
texture<uint, 1, cudaReadModeElementType> cellStartTex;
texture<uint, 1, cudaReadModeElementType> gridCountersTex;
texture<uint, 1, cudaReadModeElementType> gridCellsTex;
#endif
__constant__ SimParams params;
// integrate particle attributes
__global__ void
integrate(float4* newPos, float4* newVel,
float4* oldPos, float4* oldVel,
float deltaTime)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 pos4 = oldPos[index];
float4 vel4 = oldVel[index];
float3 pos = make_float3(pos4);
float3 vel = make_float3(vel4);
vel += params.gravity * deltaTime;
vel *= params.globalDamping;
// new position = old position + velocity * deltaTime
pos += vel * deltaTime;
// bounce off cube sides
if (pos.x > 1.0f - params.particleRadius) { pos.x = 1.0f - params.particleRadius; vel.x *= params.boundaryDamping; }
if (pos.x < -1.0f + params.particleRadius) { pos.x = -1.0f + params.particleRadius; vel.x *= params.boundaryDamping;}
if (pos.y > 1.0f - params.particleRadius) { pos.y = 1.0f - params.particleRadius; vel.y *= params.boundaryDamping; }
if (pos.y < -1.0f + params.particleRadius) { pos.y = -1.0f + params.particleRadius; vel.y *= params.boundaryDamping;}
if (pos.z > 1.0f - params.particleRadius) { pos.z = 1.0f - params.particleRadius; vel.z *= params.boundaryDamping; }
if (pos.z < -1.0f + params.particleRadius) { pos.z = -1.0f + params.particleRadius; vel.z *= params.boundaryDamping;}
// store new position and velocity
newPos[index] = make_float4(pos, pos4.w);
newVel[index] = make_float4(vel, vel4.w);
}
// calculate position in uniform grid
__device__ int3 calcGridPos(float4 p)
{
int3 gridPos;
gridPos.x = floor((p.x - params.worldOrigin.x) / params.cellSize.x);
gridPos.y = floor((p.y - params.worldOrigin.y) / params.cellSize.y);
gridPos.z = floor((p.z - params.worldOrigin.z) / params.cellSize.z);
return gridPos;
}
// calculate address in grid from position (clamping to edges)
__device__ uint calcGridHash(int3 gridPos)
{
gridPos.x = max(0, min(gridPos.x, params.gridSize.x-1));
gridPos.y = max(0, min(gridPos.y, params.gridSize.y-1));
gridPos.z = max(0, min(gridPos.z, params.gridSize.z-1));
return __mul24(__mul24(gridPos.z, params.gridSize.y), params.gridSize.x) + __mul24(gridPos.y, params.gridSize.x) + gridPos.x;
}
// add particle to cell using atomics
__device__ void addParticleToCell(int3 gridPos,
uint index,
uint* gridCounters,
uint* gridCells)
{
// calculate grid hash
uint gridHash = calcGridHash(gridPos);
// increment cell counter using atomics
#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS
int counter = 0;
#else
int counter = atomicAdd(&gridCounters[gridHash], 1); // returns previous value
counter = min(counter, params.maxParticlesPerCell-1);
#endif
// write particle index into this cell (very uncoalesced!)
gridCells[gridHash*params.maxParticlesPerCell + counter] = index;
}
// update uniform grid
__global__ void
updateGridD(float4* pos,
uint* gridCounters,
uint* gridCells)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 p = pos[index];
// get address in grid
int3 gridPos = calcGridPos(p);
addParticleToCell(gridPos, index, gridCounters, gridCells);
}
// calculate grid hash value for each particle
__global__ void
calcHashD(float4* pos,
uint2* particleHash)
{
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
float4 p = pos[index];
// get address in grid
int3 gridPos = calcGridPos(p);
uint gridHash = calcGridHash(gridPos);
// store grid hash and particle index
particleHash[index] = make_uint2(gridHash, index);
}
// rearrange particle data into sorted order, and find the start of each cell in the
// sorted hash array
__global__ void
reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by hash
float4* oldPos,
float4* oldVel,
float4* sortedPos,
float4* sortedVel,
uint* cellStart)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
uint2 sortedData = particleHash[index];
// Load hash data into shared memory so that we can look
// at neighboring particle's hash value without loading
// two hash values per thread
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = sortedData.x;
if (index > 0 && threadIdx.x == 0)
{
// first thread in block must load neighbor particle hash
volatile uint2 prevData = particleHash[index-1];
sharedHash[0] = prevData.x;
}
__syncthreads();
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
{
cellStart[sortedData.x] = index;
}
// Now use the sorted index to reorder the pos and vel data
float4 pos = FETCH(oldPos, sortedData.y); // macro does either global read or texture fetch
float4 vel = FETCH(oldVel, sortedData.y); // see particles_kernel.cuh
sortedPos[index] = pos;
sortedVel[index] = vel;
}
// collide two spheres using DEM method
__device__ float3 collideSpheres(float4 posA, float4 posB,
float4 velA, float4 velB,
float radiusA, float radiusB,
float attraction)
{
// calculate relative position
float3 relPos;
relPos.x = posB.x - posA.x;
relPos.y = posB.y - posA.y;
relPos.z = posB.z - posA.z;
float dist = length(relPos);
float collideDist = radiusA + radiusB;
float3 force = make_float3(0.0f);
if (dist < collideDist) {
float3 norm = relPos / dist;
// relative velocity
float3 relVel;
relVel.x = velB.x - velA.x;
relVel.y = velB.y - velA.y;
relVel.z = velB.z - velA.z;
// relative tangential velocity
float3 tanVel = relVel - (dot(relVel, norm) * norm);
// spring force
force = -params.spring*(collideDist - dist) * norm;
// dashpot (damping) force
force += params.damping*relVel;
// tangential shear force
force += params.shear*tanVel;
// attraction
force += attraction*relPos;
}
return force;
}
// collide particle with all particles in a given cell
// version using grid built with atomics
__device__
float3 collideCell(int3 gridPos,
uint index,
float4 pos,
float4 vel,
float4* oldPos,
float4* oldVel,
uint* gridCounters,
uint* gridCells)
{
float3 force = make_float3(0.0f);
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return force;
}
uint gridHash = calcGridHash(gridPos);
// iterate over particles in this cell
uint particlesInCell = FETCH(gridCounters, gridHash);
particlesInCell = min(particlesInCell, params.maxParticlesPerCell-1);
for(uint i=0; i<particlesInCell; i++) {
uint index2 = FETCH(gridCells, gridHash*params.maxParticlesPerCell + i);
if (index2 != index) { // check not colliding with self
float4 pos2 = FETCH(oldPos, index2);
float4 vel2 = FETCH(oldVel, index2);
// collide two spheres
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
force += projVec;
}
}
return force;
}
// version using sorted grid
__device__
float3 collideCell2(int3 gridPos,
uint index,
float4 pos,
float4 vel,
float4* oldPos,
float4* oldVel,
uint2* particleHash,
uint* cellStart)
{
float3 force = make_float3(0.0f);
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return force;
}
uint gridHash = calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = FETCH(cellStart, gridHash);
if (bucketStart == 0xffffffff)
return force; // cell empty
// iterate over particles in this cell
for(uint i=0; i<params.maxParticlesPerCell; i++) {
uint index2 = bucketStart + i;
uint2 cellData = FETCH(particleHash, index2);
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) { // check not colliding with self
float4 pos2 = FETCH(oldPos, index2);
float4 vel2 = FETCH(oldVel, index2);
// collide two spheres
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
force += projVec;
}
}
return force;
}
__global__ void
collideD(float4* newPos, float4* newVel,
float4* oldPos, float4* oldVel,
#if USE_SORT
uint2* particleHash,
uint* cellStart
#else
uint* gridCounters,
uint* gridCells
#endif
)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
// read particle data from sorted arrays
float4 pos = FETCH(oldPos, index);
float4 vel = FETCH(oldVel, index);
// get address in grid
int3 gridPos = calcGridPos(pos);
float3 force = make_float3(0.0f);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
#if USE_SORT
force += collideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
#else
force += collideCell(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, gridCounters, gridCells);
#endif
}
}
}
float3 projVec = collideSpheres(pos, params.colliderPos, vel, make_float4(0.0f, 0.0f, 0.0f, 0.0f), params.particleRadius, params.colliderRadius, 0.0f);
force += projVec;
#if USE_SORT
// write new velocity back to original unsorted location
volatile uint2 sortedData = particleHash[index];
newVel[sortedData.y] = vel + make_float4(force, 0.0f);
#else
newVel[index] = vel + make_float4(force, 0.0f);
#endif
}
#endif

View File

@@ -1,46 +0,0 @@
#ifndef PARTICLES_KERNEL_H
#define PARTICLES_KERNEL_H
#define BLOCKDIM 64
#define USE_SORT 1
#ifndef __DEVICE_EMULATION__
#define USE_TEX 1
#endif
#ifdef USE_TEX
#define FETCH(t, i) tex1Dfetch(t##Tex, i)
#else
#define FETCH(t, i) t[i]
#endif
#include "vector_types.h"
typedef unsigned int uint;
struct SimParams {
float4 colliderPos;
float colliderRadius;
float3 gravity;
float globalDamping;
float particleRadius;
uint3 gridSize;
uint numCells;
float3 worldOrigin;
float3 cellSize;
float3 worldSize;
uint3 m_gridSize;
uint numBodies;
uint maxParticlesPerCell;
float spring;
float damping;
float shear;
float attraction;
float boundaryDamping;
};
#endif

View File

@@ -1,260 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#include <GL/glew.h>
#include <math.h>
#include <assert.h>
#include <stdio.h>
#include <paramgl.h>
#include "BMF_Api.h"
#include "LinearMath/btQuickprof.h"
#include "render_particles.h"
#include "shaders.h"
#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif
ParticleRenderer::ParticleRenderer()
: m_pos(0),
m_numParticles(0),
m_pointSize(1.0f),
m_particleRadius(0.125f * 0.5f),
m_program(0),
m_vbo(0),
m_colorVBO(0)
{
_initGL();
#ifndef BT_NO_PROFILE
m_profileIterator = CProfileManager::Get_Iterator();
#endif //BT_NO_PROFILE
}
ParticleRenderer::~ParticleRenderer()
{
m_pos = 0;
}
void ParticleRenderer::setPositions(float *pos, int numParticles)
{
m_pos = pos;
m_numParticles = numParticles;
}
void ParticleRenderer::setVertexBuffer(unsigned int vbo, int numParticles)
{
m_vbo = vbo;
m_numParticles = numParticles;
}
void ParticleRenderer::_drawPoints()
{
if (!m_vbo)
{
glBegin(GL_POINTS);
{
int k = 0;
for (int i = 0; i < m_numParticles; ++i)
{
glVertex3fv(&m_pos[k]);
k += 4;
}
}
glEnd();
}
else
{
glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
if (m_colorVBO) {
glBindBufferARB(GL_ARRAY_BUFFER_ARB, m_colorVBO);
glColorPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_COLOR_ARRAY);
}
glDrawArrays(GL_POINTS, 0, m_numParticles);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
}
}
void ParticleRenderer::display(DisplayMode mode /* = PARTICLE_POINTS */)
{
switch (mode)
{
case PARTICLE_POINTS:
glColor3f(1, 1, 1);
glPointSize(m_pointSize);
_drawPoints();
break;
default:
case PARTICLE_SPHERES:
glEnable(GL_POINT_SPRITE_ARB);
glTexEnvi(GL_POINT_SPRITE_ARB, GL_COORD_REPLACE_ARB, GL_TRUE);
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE_NV);
glDepthMask(GL_TRUE);
glEnable(GL_DEPTH_TEST);
glUseProgram(m_program);
glUniform1f( glGetUniformLocation(m_program, "pointScale"), m_window_h / tanf(m_fov*0.5f*(float)M_PI/180.0f) );
glUniform1f( glGetUniformLocation(m_program, "pointRadius"), m_particleRadius );
glColor3f(1, 1, 1);
_drawPoints();
glUseProgram(0);
glDisable(GL_POINT_SPRITE_ARB);
break;
}
}
GLuint
ParticleRenderer::_compileProgram(const char *vsource, const char *fsource)
{
GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(vertexShader, 1, &vsource, 0);
glShaderSource(fragmentShader, 1, &fsource, 0);
glCompileShader(vertexShader);
glCompileShader(fragmentShader);
GLuint program = glCreateProgram();
glAttachShader(program, vertexShader);
glAttachShader(program, fragmentShader);
glLinkProgram(program);
// check if program linked
GLint success = 0;
glGetProgramiv(program, GL_LINK_STATUS, &success);
if (!success) {
char temp[256];
glGetProgramInfoLog(program, 256, 0, temp);
printf("Failed to link program:\n%s\n", temp);
glDeleteProgram(program);
program = 0;
}
return program;
}
void ParticleRenderer::_initGL()
{
m_program = _compileProgram(vertexShader, spherePixelShader);
#if !defined(__APPLE__) && !defined(MACOSX)
glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FALSE);
glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
#endif
}
void ParticleRenderer::showProfileInfo(float& xOffset,float& yStart, float yIncr)
{
#ifndef BT_NO_PROFILE
static double time_since_reset = 0.f;
// if (!m_idle)
{
time_since_reset = CProfileManager::Get_Time_Since_Reset();
}
beginWinCoords();
{
//recompute profiling data, and store profile strings
char blockTime[128];
double totalTime = 0;
int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
m_profileIterator->First();
double parent_time = m_profileIterator->Is_Root() ? time_since_reset : m_profileIterator->Get_Current_Parent_Total_Time();
{
sprintf(blockTime,"--- Profiling: %s (total running time: %.3f ms) ---", m_profileIterator->Get_Current_Parent_Name(), parent_time );
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
sprintf(blockTime,"press number (1,2...) to display child timings, or 0 to go up to parent" );
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
}
double accumulated_time = 0.f;
for (int i = 0; !m_profileIterator->Is_Done(); m_profileIterator->Next())
{
double current_total_time = m_profileIterator->Get_Current_Total_Time();
accumulated_time += current_total_time;
double fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
sprintf(blockTime,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)",
++i, m_profileIterator->Get_Current_Name(), fraction,
(current_total_time / (double)frames_since_reset),m_profileIterator->Get_Current_Total_Calls());
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
totalTime += current_total_time;
}
sprintf(blockTime,"%s (%.3f %%) :: %.3f ms", "Unaccounted",
// (min(0, time_since_reset - totalTime) / time_since_reset) * 100);
parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
sprintf(blockTime,"-------------------------------------------------");
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
}
endWinCoords();
#endif//BT_NO_PROFILE
}
void ParticleRenderer::displayProfileString(int xOffset,int yStart,char* message)
{
glRasterPos3f(xOffset,yStart,0);
BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),message);
}

View File

@@ -1,82 +0,0 @@
/*
* Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef __RENDER_PARTICLES__
#define __RENDER_PARTICLES__
class ParticleRenderer
{
public:
ParticleRenderer();
~ParticleRenderer();
void setPositions(float *pos, int numParticles);
void setVertexBuffer(unsigned int vbo, int numParticles);
void setColorBuffer(unsigned int vbo) { m_colorVBO = vbo; }
enum DisplayMode
{
PARTICLE_POINTS,
PARTICLE_SPHERES,
PARTICLE_NUM_MODES
};
void display(DisplayMode mode = PARTICLE_POINTS);
void displayGrid();
void setPointSize(float size) { m_pointSize = size; }
void setParticleRadius(float r) { m_particleRadius = r; }
void setFOV(float fov) { m_fov = fov; }
void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; }
void showProfileInfo(float& xOffset,float& yStart, float yIncr);
void displayProfileString(int xOffset,int yStart,char* message);
class CProfileIterator* m_profileIterator;
protected: // methods
void _initGL();
void _drawPoints();
GLuint _compileProgram(const char *vsource, const char *fsource);
protected: // data
float *m_pos;
int m_numParticles;
float m_pointSize;
float m_particleRadius;
float m_fov;
int m_window_w, m_window_h;
GLuint m_program;
GLuint m_vbo;
GLuint m_colorVBO;
};
#endif //__ RENDER_PARTICLES__

View File

@@ -1,41 +0,0 @@
#define STRINGIFY(A) #A
// vertex shader
const char *vertexShader = STRINGIFY(
uniform float pointRadius; // point size in world space
uniform float pointScale; // scale to calculate size in pixels
uniform float densityScale;
uniform float densityOffset;
void main()
{
// calculate window-space point size
vec3 posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0));
float dist = length(posEye);
gl_PointSize = pointRadius * (pointScale / dist);
gl_TexCoord[0] = gl_MultiTexCoord0;
gl_Position = gl_ModelViewProjectionMatrix * vec4(gl_Vertex.xyz, 1.0);
gl_FrontColor = gl_Color;
}
);
// pixel shader for rendering points as shaded spheres
const char *spherePixelShader = STRINGIFY(
void main()
{
const vec3 lightDir = vec3(0.577, 0.577, 0.577);
// calculate normal from texture coordinates
vec3 N;
N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0);
float mag = dot(N.xy, N.xy);
if (mag > 1.0) discard; // kill pixels outside circle
N.z = sqrt(1.0-mag);
// calculate lighting
float diffuse = max(0.0, dot(lightDir, N));
gl_FragColor = gl_Color * diffuse;
}
);

View File

@@ -1,2 +0,0 @@
extern const char *vertexShader;
extern const char *spherePixelShader;

View File

@@ -0,0 +1,28 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
#include "btGpuDefines.h"
#include "btGpuUtilsSharedDefs.h"
//----------------------------------------------------------------------------------------
#include "btGpuUtilsSharedCode.h"
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------