diff --git a/Extras/CUDA/btCudaBroadphase.cpp b/Extras/CUDA/btCudaBroadphase.cpp index 25a715083..4e30ea46d 100644 --- a/Extras/CUDA/btCudaBroadphase.cpp +++ b/Extras/CUDA/btCudaBroadphase.cpp @@ -1,4 +1,3 @@ - /* Bullet Continuous Collision Detection and Physics Library Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ @@ -14,1508 +13,316 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "particles_kernel.cuh" -#include "particleSystem.cuh" -#include "radixsort.cuh" -#include "vector_functions.h" -#include +//-------------------------------------------------------------------------- -#ifdef WIN32//for glut.h -#include -#endif - -#include -//think different -#if defined(__APPLE__) && !defined (VMDMESA) -#include -#include -#include -#else -#include -#endif - -#define MAX_COLL_PAIR_PER_PARTICLE 64 - -#define USE_SORT 1 -#define USE_OLD 0 -#define USE_CUDA 1 - -#include "btCudaBroadphase.h" #include "LinearMath/btAlignedAllocator.h" #include "LinearMath/btQuickprof.h" #include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" +#include "btCudaBroadphaseKernel.h" +#include "btCudaBroadphase.h" +#include "radixsort.cuh" +//#include "vector_functions.h" -btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) : -btSimpleBroadphase(maxProxies, +//-------------------------------------------------------------------------- + +#include + +//-------------------------------------------------------------------------- + +btCudaBroadphase::btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell, + btScalar cellFactorAABB) : + btSimpleBroadphase(maxSmallProxies, // new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache), new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache), - m_bInitialized(false), - m_numParticles(simParams.numBodies), - m_hPos(0), - m_hVel(0), - m_currentPosRead(0), - m_currentVelRead(0), - m_currentPosWrite(1), - m_currentVelWrite(1), - m_maxParticlesPerCell(4), - m_simParams(simParams) + m_bInitialized(false), + m_numBodies(0) { m_ownsPairCache = true; + m_params.m_gridSizeX = gridSizeX; + m_params.m_gridSizeY = gridSizeY; + m_params.m_gridSizeZ = gridSizeZ; + m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ; + btVector3 w_org = worldAabbMin; + m_params.m_worldOriginX = w_org.getX(); + m_params.m_worldOriginY = w_org.getY(); + m_params.m_worldOriginZ = w_org.getZ(); + btVector3 w_size = worldAabbMax - worldAabbMin; + m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX; + m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY; + m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ; + m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ); + m_maxRadius *= btScalar(0.5f); + m_params.m_numBodies = m_numBodies; + m_params.m_maxBodiesPerCell = maxBodiesPerCell; - m_dPos[0] = m_dPos[1] = 0; - m_dVel[0] = m_dVel[1] = 0; + m_numLargeHandles = 0; + m_maxLargeHandles = maxLargeProxies; - m_simParams.gridSize.x = 64; - m_simParams.gridSize.y = 64; - m_simParams.gridSize.z = 64; + m_maxPairsPerBody = maxPairsPerBody; + m_cellFactorAABB = cellFactorAABB; - m_simParams.numCells = m_simParams.gridSize.x*m_simParams.gridSize.y*m_simParams.gridSize.z; - m_simParams.worldSize = make_float3(2.0f, 2.0f, 2.0f); + _initialize(); +} // btCudaBroadphase::btCudaBroadphase() - // set simulation parameters - - m_simParams.numBodies = m_numParticles; - m_simParams.maxParticlesPerCell = m_maxParticlesPerCell; - - m_simParams.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f); - m_simParams.cellSize = make_float3(m_simParams.worldSize.x / m_simParams.gridSize.x, m_simParams.worldSize.y / m_simParams.gridSize.y, m_simParams.worldSize.z / m_simParams.gridSize.z); - - m_simParams.particleRadius = m_simParams.cellSize.x * 0.5f; - m_simParams.colliderPos = make_float4(-1.2f, -0.8f, 0.8f, 1.0f); - m_simParams.colliderRadius = 0.2f; - - m_simParams.spring = 0.5f; - m_simParams.damping = 0.02f; - m_simParams.shear = 0.1f; - m_simParams.attraction = 0.0f; - m_simParams.boundaryDamping = -0.5f; - - m_simParams.gravity = make_float3(0.0f, -0.0003f, 0.0f); - m_simParams.globalDamping = 1.0f; - - _initialize(m_numParticles); - -} - -static inline float lerp(float a, float b, float t) -{ - return a + t*(b-a); -} - -static void colorRamp(float t, float *r) -{ - const int ncolors = 7; - float c[ncolors][3] = { - { 1.0, 0.0, 0.0, }, - { 1.0, 0.5, 0.0, }, - { 1.0, 1.0, 0.0, }, - { 0.0, 1.0, 0.0, }, - { 0.0, 1.0, 1.0, }, - { 0.0, 0.0, 1.0, }, - { 1.0, 0.0, 1.0, }, - }; - t = t * (ncolors-1); - int i = (int) t; - float u = t - floor(t); - r[0] = lerp(c[i][0], c[i+1][0], u); - r[1] = lerp(c[i][1], c[i+1][1], u); - r[2] = lerp(c[i][2], c[i+1][2], u); -} - - -unsigned int btCudaBroadphase::createVBO(unsigned int size) -{ - GLuint vbo; - glGenBuffers(1, &vbo); - glBindBuffer(GL_ARRAY_BUFFER, vbo); - glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - registerGLBufferObject(vbo); - return vbo; -} - - -void btCudaBroadphase::_initialize(int numParticles) -{ - assert(!m_bInitialized); - - // allocate host storage - m_hPos = new float[numParticles*4]; - m_hVel = new float[numParticles*4]; - m_hSortedPos = new float[numParticles*4]; - memset(m_hPos, 0, numParticles*4*sizeof(float)); - memset(m_hVel, 0, numParticles*4*sizeof(float)); - memset(m_hSortedPos, 0, numParticles*4*sizeof(float)); - - m_hGridCounters = new uint[m_simParams.numCells]; - m_hGridCells = new uint[m_simParams.numCells*m_maxParticlesPerCell]; - memset(m_hGridCounters, 0, m_simParams.numCells*sizeof(uint)); - memset(m_hGridCells, 0, m_simParams.numCells*m_maxParticlesPerCell*sizeof(uint)); - - m_hParticleHash = new uint[numParticles*2]; - memset(m_hParticleHash, 0, numParticles*2*sizeof(uint)); - - m_hCellStart = new uint[m_simParams.numCells]; - memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint)); - - - m_hPairBuffStartCurr = new unsigned int[m_numParticles * 2 + 1]; - // --------------- for now, init with MAX_COLL_PAIR_PER_PARTICLE for each particle - m_hPairBuffStartCurr[0] = 0; - m_hPairBuffStartCurr[1] = 0; - for(uint i = 1; i <= m_numParticles; i++) - { - m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + MAX_COLL_PAIR_PER_PARTICLE; -// m_hPairBuffStartCurr[i * 2 + 1] = m_hPairBuffStartCurr[i * 2]; - m_hPairBuffStartCurr[i * 2 + 1] = 0; - } - //---------------- - m_hAABB = new float[numParticles*4*2]; // BB Min & Max - - m_hPairBuff = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE]; - memset(m_hPairBuff, 0x00, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*4); - - m_hPairScan = new unsigned int[m_numParticles + 1]; - m_hPairOut = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE]; - - // allocate GPU data - unsigned int memSize = sizeof(float) * 4 * m_numParticles; - - m_posVbo[0] = createVBO(memSize); - m_posVbo[1] = createVBO(memSize); - - allocateArray((void**)&m_dVel[0], memSize); - allocateArray((void**)&m_dVel[1], memSize); - - allocateArray((void**)&m_dSortedPos, memSize); - allocateArray((void**)&m_dSortedVel, memSize); - -#if USE_SORT - allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint)); - allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint)); - allocateArray((void**)&m_dCellStart, m_simParams.numCells*sizeof(uint)); -#else - allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint)); - allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); -#endif - - allocateArray((void**)&m_dPairBuff, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int)); - copyArrayToDevice(m_dPairBuff, m_hPairBuff, 0, sizeof(unsigned int)*m_numParticles*MAX_COLL_PAIR_PER_PARTICLE); - - allocateArray((void**)&m_dPairBuffStartCurr, (m_numParticles*2 + 1)*sizeof(unsigned int)); - allocateArray((void**)&m_dAABB, memSize*2); - - copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, 0, sizeof(unsigned int)*(m_numParticles*2 + 1)); - - allocateArray((void**)&m_dPairScan, (m_numParticles + 1)*sizeof(unsigned int)); - allocateArray((void**)&m_dPairOut, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int)); - - m_colorVBO = createVBO(m_numParticles*4*sizeof(float)); - -#if 1 - // fill color buffer - glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO); - float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY); - float *ptr = data; - for(uint i=0; i params.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) { - return force; - } + m_hCellStart = new unsigned int[m_params.m_numCells]; + memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int)); - uint gridHash = calcGridHash(gridPos); - - // get start of bucket for this cell - uint bucketStart = FETCH(cellStart, gridHash); - if (bucketStart == 0xffffffff) - return force; // cell empty - - // iterate over particles in this cell - for(uint i=0; i= 0) + m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2]; + // --------------- for now, init with m_maxPairsPerBody for each body + m_hPairBuffStartCurr[0] = 0; + m_hPairBuffStartCurr[1] = 0; + for(int i = 1; i <= m_maxHandles; i++) { + m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody; + m_hPairBuffStartCurr[i * 2 + 1] = 0; + } + //---------------- + unsigned int numAABB = m_maxHandles + m_maxLargeHandles; + m_hAABB = new btCuda3F1U[numAABB * 2]; // AABB Min & Max -//#define _USE_BRUTEFORCE_N 1 -#ifdef _USE_BRUTEFORCE_N + m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed? + m_hPairScan = new unsigned int[m_maxHandles + 1]; + + m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody]; + + // allocate GPU data + btCuda_allocateArray((void**)&m_dBodiesHash[0], m_maxHandles * 2 * sizeof(unsigned int)); + btCuda_allocateArray((void**)&m_dBodiesHash[1], m_maxHandles * 2 * sizeof(unsigned int)); + + btCuda_allocateArray((void**)&m_dCellStart, m_params.m_numCells * sizeof(unsigned int)); + + btCuda_allocateArray((void**)&m_dPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); + btCuda_copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed? + + btCuda_allocateArray((void**)&m_dPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); + btCuda_copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int)); + + btCuda_allocateArray((void**)&m_dAABB, numAABB * sizeof(btCuda3F1U) * 2); + + btCuda_allocateArray((void**)&m_dPairScan, (m_maxHandles + 1) * sizeof(unsigned int)); + + btCuda_allocateArray((void**)&m_dPairOut, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); + + btCuda_setParameters(&m_params); + +// large proxies + + // allocate handles buffer and put all handles on free list + m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16); + m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles]; + m_firstFreeLargeHandle = 0; + { + for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++) + { + m_pLargeHandles[i].SetNextFree(i + 1); + m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i; + } + m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0); + } + +// debug data + m_numPairsAdded = 0; + m_numOverflows = 0; + + m_bInitialized = true; +} // btCudaBroadphase::_initialize() + +//-------------------------------------------------------------------------- + +void btCudaBroadphase::_finalize() +{ + assert(m_bInitialized); + delete [] m_hBodiesHash; + delete [] m_hCellStart; + delete [] m_hPairBuffStartCurr; + delete [] m_hAABB; + delete [] m_hPairBuff; + delete [] m_hPairScan; + delete [] m_hPairOut; + btCuda_freeArray(m_dBodiesHash[0]); + btCuda_freeArray(m_dBodiesHash[1]); + btCuda_freeArray(m_dCellStart); + btCuda_freeArray(m_dPairBuffStartCurr); + btCuda_freeArray(m_dAABB); + btCuda_freeArray(m_dPairBuff); + btCuda_freeArray(m_dPairScan); + btCuda_freeArray(m_dPairOut); + + btAlignedFree(m_pLargeHandlesRawPtr); + + m_bInitialized = false; +} // btCudaBroadphase::_finalize() + +//-------------------------------------------------------------------------- + +void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher) +{ + if(m_numHandles <= 0) + { + BT_PROFILE("addLarge2LargePairsToCache -- CPU"); + addLarge2LargePairsToCache(dispatcher); + return; + } + // update constants + btCuda_setParameters(&m_params); + // move AABB array to GPU + { + BT_PROFILE("copy AABB"); + // do it faster ? + btCuda3F1U* pBB = m_hAABB; int i; - for (i=0;im_clientObject) { - btSimpleBroadphaseProxy* proxy1 = &m_pHandles[i]; - - if (proxy0 != proxy1) - { - btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0); - btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1); - - if (aabbOverlap(p0,p1)) - { - if ( !m_pairCache->findPair(proxy0,proxy1)) - { - m_pairCache->addOverlappingPair(proxy0,proxy1); - } - } else - { - if (!m_pairCache->hasDeferredRemoval()) - { - if ( m_pairCache->findPair(proxy0,proxy1)) - { - m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); - } - } - - } - } - proxy1 = &m_pHandles[proxy1->GetNextAllocated()]; - + continue; } - proxy0 = &m_pHandles[proxy0->GetNextAllocated()]; - + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_small; + pBB++; + num_small++; } -#else //_USE_BRUTEFORCE_N - - // update constants - setParameters(&m_simParams); - - float deltaTime = 1./60.f; - - /* - - // integrate - integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite], - m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite], - deltaTime, - m_numParticles); - - - - - btSwap(m_currentPosRead, m_currentPosWrite); - btSwap(m_currentVelRead, m_currentVelWrite); -*/ - -#if USE_SORT - // sort and search method - - // calculate hash - { - BT_PROFILE("calcHash-- CUDA"); - calcHash( m_posVbo[m_currentPosRead], m_dParticleHash[0], m_numParticles); - } - -#if DEBUG_GRID - copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); - printf("particle hash:\n"); - for(uint i=0; im_aabbMin + proxy0->m_aabbMax)*0.5f; - -// float4* p = (float4*)&m_hSortedPos[index*4]; - - - int3 particleGridPos; - particleGridPos.x = floor((mypos.x() - m_simParams.worldOrigin.x) / m_simParams.cellSize.x); - particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y); - particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z); - - int numRejected=0; - - //for(int z=0; z<1; z++) - for(int z=-1; z<=1; z++) - { - // for(int y=0; y<1; y++) - for(int y=-1; y<=1; y++) - { - // for(int x=0; x<1; x++) - for(int x=-1; x<=1; x++) - { - int3 gridPos; - gridPos.x = particleGridPos.x + x; - gridPos.y = particleGridPos.y + y; - gridPos.z = particleGridPos.z + z; - - if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) - { - continue; - } - - - gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1)); - gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1)); - gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1)); - uint gridHash = ((gridPos.z*m_simParams.gridSize.y)* m_simParams.gridSize.x) + (gridPos.y* m_simParams.gridSize.x) + gridPos.x; - - // get start of bucket for this cell - unsigned int bucketStart = m_hCellStart[gridHash]; - if (bucketStart == 0xffffffff) - continue; - - // iterate over particles in this cell - for(uint q=0; qaddOverlappingPair(proxy0,proxy1); - else - { - numRejected++; - } - - } - } - - - - //int numOverlap += myCollideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart); - } - } - } - } - -#else // USE_OLD - btBroadphasePairArray& overlappingPairArrayA = m_pairCache->getOverlappingPairArray(); - findOverlappingPairs(dispatcher); -#endif - -#endif //_USE_BRUTEFORCE_N - -#if USE_OLD - ///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache - if (m_ownsPairCache && m_pairCache->hasDeferredRemoval()) + m_LastHandleIndex = new_largest_index; + new_largest_index = -1; + unsigned int num_large = 0; + for(i = 0; i <= m_LastLargeHandleIndex; i++) { - BT_PROFILE("Cleaning-- CPU"); - - btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray(); - - //perform a sort, to find duplicates and to sort 'invalid' pairs to the end - //overlappingPairArray.quickSort(btBroadphasePairSortPredicate()); - overlappingPairArray.heapSort(btBroadphasePairSortPredicate()); - //printf("A) overlappingPairArray.size()=%d\n",overlappingPairArray.size()); - - overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair); - m_invalidPair = 0; - - - btBroadphasePair previousPair; - previousPair.m_pProxy0 = 0; - previousPair.m_pProxy1 = 0; - previousPair.m_algorithm = 0; - - - int i; - for (i=0;im_clientObject) { - - btBroadphasePair& pair = overlappingPairArray[i]; - - bool isDuplicate = (pair == previousPair); - - previousPair = pair; - - bool needsRemoval = false; - - if (!isDuplicate) - { - bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1); - - if (hasOverlap) - { - needsRemoval = false;//callback->processOverlap(pair); - } else - { - bool hasOverlapA = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1); - needsRemoval = true; - } - } else - { - //remove duplicate - needsRemoval = true; - //should have no algorithm -// btAssert(!pair.m_algorithm); - } - - if (needsRemoval) - { - m_pairCache->cleanOverlappingPair(pair,dispatcher); - - // m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1); - // m_overlappingPairArray.pop_back(); - pair.m_pProxy0 = 0; - pair.m_pProxy1 = 0; - m_invalidPair++; - - } - + continue; } - - ///if you don't like to skip the invalid pairs in the array, execute following code: - #define CLEAN_INVALID_PAIRS 1 - #ifdef CLEAN_INVALID_PAIRS - - //perform a sort, to sort 'invalid' pairs to the end - //overlappingPairArray.quickSort(btBroadphasePairSortPredicate()); - overlappingPairArray.heapSort(btBroadphasePairSortPredicate()); - //printf("B) overlappingPairArray.size()=%d\n",overlappingPairArray.size()); - - overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair); -// printf("C) overlappingPairArray.size()=%d\n",overlappingPairArray.size()); - m_invalidPair = 0; - #endif//CLEAN_INVALID_PAIRS - + new_largest_index = i; + pBB->fx = proxy0->m_aabbMin.getX(); + pBB->fy = proxy0->m_aabbMin.getY(); + pBB->fz = proxy0->m_aabbMin.getZ(); + pBB->uw = i + m_maxHandles; + pBB++; + pBB->fx = proxy0->m_aabbMax.getX(); + pBB->fy = proxy0->m_aabbMax.getY(); + pBB->fz = proxy0->m_aabbMax.getZ(); + pBB->uw = num_large + m_maxHandles; + pBB++; + num_large++; } -#endif // USE_OLD + m_LastLargeHandleIndex = new_largest_index; + // paranoid checks + btAssert(num_small == m_numHandles); + btAssert(num_large == m_numLargeHandles); } - - //printf("numRejected=%d\n",numRejected); -} - -static inline float frand() -{ - return rand() / (float) RAND_MAX; -} - - -void btCudaBroadphase::initGrid(unsigned int* size, float spacing, float jitter, unsigned int numParticles) -{ - srand(1973); -#ifdef CONTROLLED_START - float extra=0.01f; - for(uint z=0; z maxPerCell) - maxPerCell = m_hGridCounters[i]; - if (m_hGridCounters[i] > 0) { - printf("%d (%d): ", i, m_hGridCounters[i]); - for(uint j=0; jm_aabbMin.getX(); - *pVec++ = proxy0->m_aabbMin.getY(); - *pVec++ = proxy0->m_aabbMin.getZ(); - *pVec++ = 0.0F; - *pVec++ = proxy0->m_aabbMax.getX(); - *pVec++ = proxy0->m_aabbMax.getY(); - *pVec++ = proxy0->m_aabbMax.getZ(); - *pVec++ = 0.0F; - } - } - -#if USE_CUDA -{ - { BT_PROFILE("CopyBB to CUDA"); - copyArrayToDevice(m_dAABB, m_hAABB, 0, sizeof(float)*4*2*m_numParticles); + btCuda_copyArrayToDevice(m_dAABB, m_hAABB, sizeof(btCuda3F1U) * 2 * (m_numHandles + m_numLargeHandles)); + } + // calculate hash + { + BT_PROFILE("calcHash -- CUDA"); + btCuda_calcHashAABB(m_dAABB, m_dBodiesHash[0], m_numHandles); + } +// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles); + // sort bodies based on hash + { + BT_PROFILE("RadixSort-- CUDA"); + RadixSort((KeyValuePair*)m_dBodiesHash[0], (KeyValuePair*)m_dBodiesHash[1], m_numHandles, 32); + } + // find start of each cell + { + BT_PROFILE("Find cell start -- CUDA"); + btCuda_findCellStart(m_dBodiesHash[0], m_dCellStart, m_numHandles, m_params.m_numCells); + } +// btCuda_copyArrayFromDevice((void*)m_hBodiesHash, (void*)m_dBodiesHash[0], sizeof(unsigned int) * 2 * m_numHandles); +// btCuda_copyArrayFromDevice((void*)m_hCellStart, (void*)m_dCellStart, sizeof(unsigned int) * m_params.m_numCells); + { + BT_PROFILE("FindOverlappingPairs -- CUDA"); + btCuda_findOverlappingPairs(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles); } { - BT_PROFILE("btCudaFindOverlappingPairs"); - btCudaFindOverlappingPairs( m_dAABB, - m_dParticleHash[0], - m_dCellStart, - m_dPairBuff, - m_dPairBuffStartCurr, - m_numParticles - ); + BT_PROFILE("FindPairsLarge -- CUDA"); + btCuda_findPairsLarge(m_dAABB, m_dBodiesHash[0], m_dCellStart, m_dPairBuff, m_dPairBuffStartCurr, m_numHandles, m_numLargeHandles); } { - BT_PROFILE("btCudaComputePairCacheChanges"); - btCudaComputePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_numParticles); + BT_PROFILE("ComputePairCacheChanges -- CUDA"); + btCuda_computePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dAABB, m_numHandles); } { - BT_PROFILE("scanOverlappingPairBuffCPU"); - copyArrayFromDevice(m_hPairScan, m_dPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1)); + BT_PROFILE("scanOverlappingPairBuff -- CPU"); + btCuda_copyArrayFromDevice(m_hPairScan, m_dPairScan, sizeof(unsigned int)*(m_numHandles + 1)); scanOverlappingPairBuffCPU(); - copyArrayToDevice(m_dPairScan, m_hPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1)); + btCuda_copyArrayToDevice(m_dPairScan, m_hPairScan, sizeof(unsigned int)*(m_numHandles + 1)); } { - BT_PROFILE("btCudaSqueezeOverlappingPairBuff"); - btCudaSqueezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_numParticles); + BT_PROFILE("SqueezeOverlappingPairBuff -- CUDA"); + btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_dAABB, m_numHandles); } { - BT_PROFILE("btCudaSqueezeOverlappingPairBuff"); - copyArrayFromDevice(m_hPairOut, m_dPairOut, 0, sizeof(unsigned int) * m_hPairScan[m_numParticles]); + BT_PROFILE("SqueezeOverlappingPairBuff -- CUDA"); + btCuda_copyArrayFromDevice(m_hPairOut, m_dPairOut, sizeof(unsigned int) * m_hPairScan[m_numHandles]); } - -} -#else - findOverlappingPairsCPU( m_hAABB, - m_hParticleHash, - m_hCellStart, - m_hPairBuff, - m_hPairBuffStartCurr, - m_numParticles); - computePairCacheChangesCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_numParticles); - scanOverlappingPairBuffCPU(); - squeezeOverlappingPairBuffCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_numParticles); -#endif { - BT_PROFILE("addPairsToCache"); + BT_PROFILE("addPairsToCache -- CPU"); addPairsToCacheCPU(dispatcher); } -} // btCudaBroadphase::fillOverlappingPairCache() - - - -// calculate position in uniform grid -int3 btCudaBroadphase::calcGridPosCPU(float4 p) -{ - int3 gridPos; - gridPos.x = floor((p.x - m_simParams.worldOrigin.x) / m_simParams.cellSize.x); - gridPos.y = floor((p.y - m_simParams.worldOrigin.y) / m_simParams.cellSize.y); - gridPos.z = floor((p.z - m_simParams.worldOrigin.z) / m_simParams.cellSize.z); - return gridPos; -} // btCudaBroadphase::calcGridPos() - -// calculate address in grid from position (clamping to edges) -uint btCudaBroadphase::calcGridHashCPU(int3 gridPos) -{ - gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1)); - gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1)); - gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1)); - return (gridPos.z * m_simParams.gridSize.y) * m_simParams.gridSize.x + gridPos.y * m_simParams.gridSize.x + gridPos.x; -} - -void btCudaBroadphase::computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles) -{ - for(uint i = 0; i < numParticles; i++) { - computePairCacheChangesCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan); + BT_PROFILE("addLarge2LargePairsToCache -- CPU"); + addLarge2LargePairsToCache(dispatcher); } -} - -void btCudaBroadphase::computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan) -{ - uint2 start_curr = pPairBuffStartCurr[index]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint *pInp = pPairBuff + start; - uint num_changes = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - num_changes++; - } - } - pPairScan[index+1] = num_changes; -} - -void btCudaBroadphase::findOverlappingPairsCPU( float* pAABB, - uint* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint* pPairBuffStartCurr, - uint numParticles) -{ - BT_PROFILE("findOverlappingPairsCPU -- CPU"); - for(uint i = 0; i < numParticles; i++) - { - findOverlappingPairsCPU_D( - i, - (float4 *)pAABB, - (uint2*)pParticleHash, - (uint*)pCellStart, - (uint*)pPairBuff, - (uint2*)pPairBuffStartCurr, - numParticles); - } -} // btCudaBroadphase::findOverlappingPairsCPU() - -void btCudaBroadphase::findOverlappingPairsCPU_D( uint index, - float4* pAABB, - uint2* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles) -{ - float4 bbMin = pAABB[index*2]; - float4 bbMax = pAABB[index*2+1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - - // get address in grid - int3 gridPos = calcGridPosCPU(pos); - // examine only neighbouring cells - for(int z=-1; z<=1; z++) { - for(int y=-1; y<=1; y++) { - for(int x=-1; x<=1; x++) { - int3 gridPos2; - gridPos2.x = gridPos.x + x; - gridPos2.y = gridPos.y + y; - gridPos2.z = gridPos.z + z; - findPairsInCellCPU(gridPos2, index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles); - } - } - } -} // btCudaBroadphase::findOverlappingPairsCPU_D() - - -void btCudaBroadphase::findPairsInCellCPU( int3 gridPos, - uint index, - uint2* pParticleHash, - uint* pCellStart, - float4* pAABB, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles) -{ - if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) { - return; - } - uint gridHash = calcGridHashCPU(gridPos); - // get start of bucket for this cell - uint bucketStart = pCellStart[gridHash]; - if (bucketStart == 0xffffffff) - return; // cell empty - // iterate over particles in this cell - float4 min0 = pAABB[index*2]; - float4 max0 = pAABB[index*2+1]; - - uint2 sortedData = pParticleHash[index]; - uint unsorted_indx = sortedData.y; - uint2 start_curr = pPairBuffStartCurr[unsorted_indx]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint curr1 = curr; - uint bucketEnd = bucketStart + m_simParams.maxParticlesPerCell; - bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd; - for(uint index2=bucketStart; index2 < bucketEnd; index2++) - { - uint2 cellData = pParticleHash[index2]; - if (cellData.x != gridHash) break; // no longer in same bucket - if (index2 != index) // check not colliding with self - { - float4 min1 = pAABB[index2*2]; - float4 max1 = pAABB[index2*2 + 1]; - if(cudaTestAABBOverlapCPU(min0, max0, min1, max1)) - { - uint k; - uint unsorted_indx2 = cellData.y; - for(k = 0; k < curr1; k++) - { - uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); - if(old_pair == unsorted_indx2) - { - pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; - break; - } - } - if(k == curr1) - { - pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG; - curr++; - } - } - } - } - pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr); - return; -} // btCudaBroadphase::findPairsInCellCPU() - -uint btCudaBroadphase::cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} // btCudaBroadphase::cudaTestAABBOverlapCPU() + return; +} // btCudaBroadphase::calculateOverlappingPairs() +//-------------------------------------------------------------------------- void btCudaBroadphase::scanOverlappingPairBuffCPU() { m_hPairScan[0] = 0; - for(uint i = 1; i <= m_numParticles; i++) + for(int i = 1; i <= m_numHandles; i++) { unsigned int delta = m_hPairScan[i]; m_hPairScan[i] = m_hPairScan[i-1] + delta; } } // btCudaBroadphase::scanOverlappingPairBuffCPU() -void btCudaBroadphase::squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles) -{ - for(uint i = 0; i < numParticles; i++) - { - squeezeOverlappingPairBuffCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan, pPairOut); - } -} // btCudaBroadphase::squeezeOverlappingPairBuffCPU() - -void btCudaBroadphase::squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut) -{ - uint2 start_curr = pPairBuffStartCurr[index]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint* pInp = pPairBuff + start; - uint* pOut = pPairOut + pPairScan[index]; - uint* pOut2 = pInp; - uint num = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - *pOut = *pInp; - pOut++; - } - if((*pInp) & BT_CUDA_PAIR_ANY_FLG) - { - *pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG); - pOut2++; - num++; - } - } - pPairBuffStartCurr[index] = make_uint2(start, num); -} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D() - -unsigned int gNumPairsAdded = 0; +//-------------------------------------------------------------------------- void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher) { - gNumPairsAdded = 0; - for(uint i = 0; i < m_numParticles; i++) + m_numPairsAdded = 0; + m_numPairsRemoved = 0; + for(int i = 0; i < m_numHandles; i++) { unsigned int num = m_hPairScan[i+1] - m_hPairScan[i]; if(!num) @@ -1523,22 +330,163 @@ void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher) continue; } unsigned int* pInp = m_hPairOut + m_hPairScan[i]; - unsigned int index0 = i; + unsigned int index0 = m_hAABB[i * 2].uw; btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0]; - for(uint j = 0; j < num; j++) + for(unsigned int j = 0; j < num; j++) { unsigned int indx1_s = pInp[j]; unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG); - btSimpleBroadphaseProxy* proxy1 = &m_pHandles[index1]; + btSimpleBroadphaseProxy* proxy1; + if(index1 < (unsigned int)m_maxHandles) + { + proxy1 = &m_pHandles[index1]; + } + else + { + index1 -= m_maxHandles; + btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles)); + proxy1 = &m_pLargeHandles[index1]; + } if(indx1_s & BT_CUDA_PAIR_NEW_FLG) { m_pairCache->addOverlappingPair(proxy0,proxy1); - gNumPairsAdded++; + m_numPairsAdded++; } else { m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + m_numPairsRemoved++; } } } } // btCudaBroadphase::addPairsToCacheCPU() + +//-------------------------------------------------------------------------- + +btBroadphaseProxy* btCudaBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) +{ + btBroadphaseProxy* proxy; + bool bIsLarge = isLargeProxy(aabbMin, aabbMax); + if(bIsLarge) + { + if (m_numLargeHandles >= m_maxLargeHandles) + { + btAssert(0); + return 0; //should never happen, but don't let the game crash ;-) + } + btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2])); + int newHandleIndex = allocLargeHandle(); + proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy); + } + else + { + proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy); + } + return proxy; +} // btCudaBroadphase::createProxy() + +//-------------------------------------------------------------------------- + +void btCudaBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher) +{ + bool bIsLarge = isLargeProxy(proxy); + if(bIsLarge) + { + + btSimpleBroadphaseProxy* proxy0 = static_cast(proxy); + freeLargeHandle(proxy0); + // TODO : remove pair from cache on GPU as well !!! + // UPD: they will not be used anyway, so don't waste time + m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher); + } + else + { + btSimpleBroadphase::destroyProxy(proxy, dispatcher); + } + return; +} // btCudaBroadphase::destroyProxy() + +//-------------------------------------------------------------------------- + +bool btCudaBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax) +{ + btVector3 diag = aabbMax - aabbMin; + btScalar radius = diag.length() * btScalar(0.5f); + + radius *= m_cellFactorAABB; // user-defined factor + + return (radius > m_maxRadius); +} // btCudaBroadphase::isLargeProxy() + +//-------------------------------------------------------------------------- + +bool btCudaBroadphase::isLargeProxy(btBroadphaseProxy* proxy) +{ + return (proxy->getUid() >= (m_maxHandles+2)); +} // btCudaBroadphase::isLargeProxy() + +//-------------------------------------------------------------------------- + +void btCudaBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher) +{ + int i,j; + if (m_numLargeHandles <= 0) + { + return; + } + int new_largest_index = -1; + for(i = 0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i]; + if(!proxy0->m_clientObject) + { + continue; + } + new_largest_index = i; + for(j = i + 1; j <= m_LastLargeHandleIndex; j++) + { + btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j]; + if(!proxy1->m_clientObject) + { + continue; + } + btAssert(proxy0 != proxy1); + btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0); + btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1); + if(aabbOverlap(p0,p1)) + { + if (!m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->addOverlappingPair(proxy0,proxy1); + } + } + else + { + if(m_pairCache->findPair(proxy0,proxy1)) + { + m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher); + } + } + } + } + m_LastLargeHandleIndex = new_largest_index; + return; +} // btCudaBroadphase::addLarge2LargePairsToCache() + +//-------------------------------------------------------------------------- + +void btCudaBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback) +{ + btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback); + for (int i=0; i <= m_LastLargeHandleIndex; i++) + { + btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i]; + if(!proxy->m_clientObject) + { + continue; + } + rayCallback.process(proxy); + } +} // btCudaBroadphase::rayTest() + +//-------------------------------------------------------------------------- diff --git a/Extras/CUDA/btCudaBroadphase.cu b/Extras/CUDA/btCudaBroadphase.cu new file mode 100644 index 000000000..e21807af8 --- /dev/null +++ b/Extras/CUDA/btCudaBroadphase.cu @@ -0,0 +1,589 @@ +/* + * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO USER: + * + * This source code is subject to NVIDIA ownership rights under U.S. and + * international Copyright laws. + * + * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE + * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR + * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE + * OR PERFORMANCE OF THIS SOURCE CODE. + * + * U.S. Government End Users. This source code is a "commercial item" as + * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + * "commercial computer software" and "commercial computer software + * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) + * and is provided to the U.S. Government only as a commercial end item. + * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through + * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the + * source code with only those rights set forth herein. + */ + +#include +#include +#include + +#include "cutil_math.h" +#include "math_constants.h" + +#if defined(__APPLE__) || defined(MACOSX) +#include +#else +#include +#endif + +#include + +#include "btCudaBroadphaseKernel.h" +//#include "radixsort.cu" + + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + +#ifdef __DEVICE_EMULATION__ + #define B_CUDA_USE_TEX 0 +#else + #define B_CUDA_USE_TEX 1 +#endif + +__device__ inline btCuda3F1U tex_fetch3F1U(float4 a) { return *((btCuda3F1U*)(&a)); } + +#if B_CUDA_USE_TEX + #define FETCH(t, i) tex_fetch3F1U(tex1Dfetch(t##Tex, i)) +#else + #define FETCH(t, i) t[i] +#endif + +texture particleHashTex; +texture cellStartTex; +texture pAABBTex; + +//---------------------------------------------------------------------------------------- + +__constant__ btCudaBroadphaseParams params; + +//---------------------------------------------------------------------------------------- + +// calculate position in uniform grid +__device__ int3 btCuda_calcGridPos(float4 p) +{ + int3 gridPos; + gridPos.x = floor((p.x - params.m_worldOriginX) / params.m_cellSizeX); + gridPos.y = floor((p.y - params.m_worldOriginY) / params.m_cellSizeY); + gridPos.z = floor((p.z - params.m_worldOriginZ) / params.m_cellSizeZ); + return gridPos; +} + +//---------------------------------------------------------------------------------------- + +// calculate address in grid from position (clamping to edges) +__device__ uint btCuda_calcGridHash(int3 gridPos) +{ + gridPos.x = max(0, min(gridPos.x, params.m_gridSizeX - 1)); + gridPos.y = max(0, min(gridPos.y, params.m_gridSizeY - 1)); + gridPos.z = max(0, min(gridPos.z, params.m_gridSizeZ - 1)); + return __mul24(__mul24(gridPos.z, params.m_gridSizeY), params.m_gridSizeX) + __mul24(gridPos.y, params.m_gridSizeX) + gridPos.x; +} + +//---------------------------------------------------------------------------------------- + +// calculate grid hash value for each body using its AABB +__global__ void calcHashAABBD(btCuda3F1U* pAABB, uint2* pHash, uint numBodies) +{ + int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + btCuda3F1U bbMin = pAABB[index*2]; + btCuda3F1U bbMax = pAABB[index*2 + 1]; + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = btCuda_calcGridPos(pos); + uint gridHash = btCuda_calcGridHash(gridPos); + // store grid hash and body index + pHash[index] = make_uint2(gridHash, index); +} + +//---------------------------------------------------------------------------------------- + +__global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies) +{ + int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + // Load hash data into shared memory so that we can look + // at neighboring body's hash value without loading + // two hash values per thread + __shared__ uint sharedHash[257]; + sharedHash[threadIdx.x+1] = sortedData.x; + if((index > 0) && (threadIdx.x == 0)) + { + // first thread in block must load neighbor body hash + volatile uint2 prevData = pHash[index-1]; + sharedHash[0] = prevData.x; + } + __syncthreads(); + if((index == 0) || (sortedData.x != sharedHash[threadIdx.x])) + { + cellStart[sortedData.x] = index; + } +} + +//---------------------------------------------------------------------------------------- + +__device__ uint cudaTestAABBOverlap(btCuda3F1U min0, btCuda3F1U max0, btCuda3F1U min1, btCuda3F1U max1) +{ + return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && + (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && + (min0.fz <= max1.fz)&& (min1.fz <= max0.fz); +} + +//---------------------------------------------------------------------------------------- + +__device__ void findPairsInCell(int3 gridPos, + uint index, + uint2* pHash, + uint* pCellStart, + btCuda3F1U* pAABB, + uint* pPairBuff, + uint2* pPairBuffStartCurr, + uint numBodies) +{ + if ( (gridPos.x < 0) || (gridPos.x > params.m_gridSizeX - 1) + || (gridPos.y < 0) || (gridPos.y > params.m_gridSizeY - 1) + || (gridPos.z < 0) || (gridPos.z > params.m_gridSizeZ - 1)) + { + return; + } + uint gridHash = btCuda_calcGridHash(gridPos); + // get start of bucket for this cell + uint bucketStart = pCellStart[gridHash]; + if (bucketStart == 0xffffffff) + { + return; // cell empty + } + // iterate over bodies in this cell + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + btCuda3F1U min0 = FETCH(pAABB, unsorted_indx*2); + btCuda3F1U max0 = FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + uint bucketEnd = bucketStart + params.m_maxBodiesPerCell; + bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd; + for(uint index2 = bucketStart; index2 < bucketEnd; index2++) + { + uint2 cellData = pHash[index2]; + if (cellData.x != gridHash) + { + break; // no longer in same bucket + } + uint unsorted_indx2 = cellData.y; + if (unsorted_indx2 < unsorted_indx) // check not colliding with self + { + btCuda3F1U min1 = FETCH(pAABB, unsorted_indx2*2); + btCuda3F1U max1 = FETCH(pAABB, unsorted_indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint handleIndex2 = min1.uw; + uint k; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG; + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + curr++; + } + } + } + } + pPairBuffStartCurr[handleIndex] = make_uint2(start, curr); + return; +} + +//---------------------------------------------------------------------------------------- + +__global__ void +findOverlappingPairsD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, + uint2* pPairBuffStartCurr, uint numBodies) +{ + int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + btCuda3F1U bbMin = FETCH(pAABB, unsorted_indx*2); + btCuda3F1U bbMax = FETCH(pAABB, unsorted_indx*2 + 1); + float4 pos; + pos.x = (bbMin.fx + bbMax.fx) * 0.5f; + pos.y = (bbMin.fy + bbMax.fy) * 0.5f; + pos.z = (bbMin.fz + bbMax.fz) * 0.5f; + // get address in grid + int3 gridPos = btCuda_calcGridPos(pos); + // examine only neighbouring cells + for(int z=-1; z<=1; z++) { + for(int y=-1; y<=1; y++) { + for(int x=-1; x<=1; x++) { + findPairsInCell(gridPos + make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies); + } + } + } +} + +//---------------------------------------------------------------------------------------- + +__global__ void +findPairsLargeD( btCuda3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, + uint2* pPairBuffStartCurr, uint numBodies, uint numLarge) +{ + int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + uint2 sortedData = pHash[index]; + uint unsorted_indx = sortedData.y; + btCuda3F1U min0 = FETCH(pAABB, unsorted_indx*2); + btCuda3F1U max0 = FETCH(pAABB, unsorted_indx*2 + 1); + uint handleIndex = min0.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1]; + uint curr_max = start_curr_next.x - start - 1; + for(uint i = 0; i < numLarge; i++) + { + uint indx2 = numBodies + i; + btCuda3F1U min1 = FETCH(pAABB, indx2*2); + btCuda3F1U max1 = FETCH(pAABB, indx2*2 + 1); + if(cudaTestAABBOverlap(min0, max0, min1, max1)) + { + uint k; + uint handleIndex2 = min1.uw; + for(k = 0; k < curr; k++) + { + uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); + if(old_pair == handleIndex2) + { + pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; + break; + } + } + if(k == curr) + { + pPairBuff[start+curr] = handleIndex2 | BT_CUDA_PAIR_NEW_FLG; + if(curr >= curr_max) + { // not a good solution, but let's avoid crash + break; + } + curr++; + } + } + } + pPairBuffStartCurr[handleIndex] = make_uint2(start, curr); + return; +} + +//---------------------------------------------------------------------------------------- + +__global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, btCuda3F1U* pAABB, uint numBodies) +{ + int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + btCuda3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint *pInp = pPairBuff + start; + uint num_changes = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) + { + num_changes++; + } + } + pPairScan[index+1] = num_changes; +} + +//---------------------------------------------------------------------------------------- + +__global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, btCuda3F1U* pAABB, uint numBodies) +{ + int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; + if(index >= numBodies) + { + return; + } + btCuda3F1U bbMin = pAABB[index * 2]; + uint handleIndex = bbMin.uw; + uint2 start_curr = pPairBuffStartCurr[handleIndex]; + uint start = start_curr.x; + uint curr = start_curr.y; + uint* pInp = pPairBuff + start; + uint* pOut = pPairOut + pPairScan[index]; + uint* pOut2 = pInp; + uint num = 0; + for(uint k = 0; k < curr; k++, pInp++) + { + if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) + { + *pOut = *pInp; + pOut++; + } + if((*pInp) & BT_CUDA_PAIR_ANY_FLG) + { + *pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG); + pOut2++; + num++; + } + } + pPairBuffStartCurr[handleIndex] = make_uint2(start, num); +} // squeezeOverlappingPairBuffD() + + +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +// E N D O F K E R N E L F U N C T I O N S +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- +//---------------------------------------------------------------------------------------- + + +//! Check for CUDA error +# define CUT_CHECK_ERROR(errorMessage) do { \ + cudaError_t err = cudaGetLastError(); \ + if( cudaSuccess != err) { \ + fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ + errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ + btCuda_exit(EXIT_FAILURE); \ + } \ + err = cudaThreadSynchronize(); \ + if( cudaSuccess != err) { \ + fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ + errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ + btCuda_exit(EXIT_FAILURE); \ + } } while (0) + + +# define MY_CUDA_SAFE_CALL_NO_SYNC( call) do { \ + cudaError err = call; \ + if( cudaSuccess != err) { \ + fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ + __FILE__, __LINE__, cudaGetErrorString( err) ); \ + btCuda_exit(EXIT_FAILURE); \ + } } while (0) + +# define MY_CUDA_SAFE_CALL( call) do { \ + MY_CUDA_SAFE_CALL_NO_SYNC(call); \ + cudaError err = cudaThreadSynchronize(); \ + if( cudaSuccess != err) { \ + fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \ + __FILE__, __LINE__, cudaGetErrorString( err) ); \ + btCuda_exit(EXIT_FAILURE); \ + } } while (0) + + +extern "C" +{ + +void btCuda_exit(int val) +{ + exit(val); +} + +void btCuda_allocateArray(void** devPtr, unsigned int size) +{ + MY_CUDA_SAFE_CALL(cudaMalloc(devPtr, size)); +} + +void btCuda_freeArray(void* devPtr) +{ + MY_CUDA_SAFE_CALL(cudaFree(devPtr)); +} + +void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size) +{ + MY_CUDA_SAFE_CALL(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost)); +} + +void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size) +{ + MY_CUDA_SAFE_CALL(cudaMemcpy((char*)device, host, size, cudaMemcpyHostToDevice)); +} + +void btCuda_setParameters(btCudaBroadphaseParams* hostParams) +{ + // copy parameters to constant memory + MY_CUDA_SAFE_CALL(cudaMemcpyToSymbol(params, hostParams, sizeof(btCudaBroadphaseParams))); +} + +//Round a / b to nearest higher integer value +int btCuda_iDivUp(int a, int b) +{ + return (a % b != 0) ? (a / b + 1) : (a / b); +} + +// compute grid and thread block size for a given number of elements +void btCuda_computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads) +{ + numThreads = min(blockSize, n); + numBlocks = btCuda_iDivUp(n, numThreads); +} + +void btCuda_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies) +{ + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 256, numBlocks, numThreads); + // execute the kernel + calcHashAABBD<<< numBlocks, numThreads >>>(pAABB, (uint2*)hash, numBodies); + // check if kernel invocation generated an error + CUT_CHECK_ERROR("calcHashAABBD kernel execution failed"); +} + +void btCuda_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells) +{ + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 256, numBlocks, numThreads); + MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint))); + findCellStartD<<< numBlocks, numThreads >>>((uint2*)hash, (uint*)cellStart, numBodies); + CUT_CHECK_ERROR("Kernel execution failed: findCellStartD"); +} + +void btCuda_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash, + unsigned int* pCellStart, + unsigned int* pPairBuff, + unsigned int* pPairBuffStartCurr, + unsigned int numBodies) +{ +#if B_CUDA_USE_TEX + MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(btCuda3F1U))); +#endif + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 64, numBlocks, numThreads); + findOverlappingPairsD<<< numBlocks, numThreads >>>( + pAABB, + (uint2*)pHash, + (uint*)pCellStart, + (uint*)pPairBuff, + (uint2*)pPairBuffStartCurr, + numBodies + ); + CUT_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD"); +#if B_CUDA_USE_TEX + MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif + } // btCuda_findOverlappingPairs() + + + +void btCuda_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash, + unsigned int* pCellStart, + unsigned int* pPairBuff, + unsigned int* pPairBuffStartCurr, + unsigned int numBodies, + unsigned int numLarge) +{ +#if B_CUDA_USE_TEX + MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(btCuda3F1U))); +#endif + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 64, numBlocks, numThreads); + findPairsLargeD<<< numBlocks, numThreads >>>( + pAABB, + (uint2*)pHash, + (uint*)pCellStart, + (uint*)pPairBuff, + (uint2*)pPairBuffStartCurr, + numBodies, + numLarge + ); + CUT_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD"); +#if B_CUDA_USE_TEX + MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex)); +#endif + } // btCuda_findPairsLarge() + + + +void btCuda_computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, + unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies) +{ + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 256, numBlocks, numThreads); + computePairCacheChangesD<<< numBlocks, numThreads >>>( + (uint*)pPairBuff, + (uint2*)pPairBuffStartCurr, + (uint*)pPairScan, + pAABB, + numBodies + ); + CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD"); + } // btCuda_computePairCacheChanges() + + +void btCuda_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, + unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies) +{ + int numThreads, numBlocks; + btCuda_computeGridSize(numBodies, 256, numBlocks, numThreads); + squeezeOverlappingPairBuffD<<< numBlocks, numThreads >>>( + (uint*)pPairBuff, + (uint2*)pPairBuffStartCurr, + (uint*)pPairScan, + (uint*)pPairOut, + pAABB, + numBodies + ); + CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD"); +} // btCuda_squeezeOverlappingPairBuff() + + +} // extern "C" diff --git a/Extras/CUDA/btCudaBroadphase.h b/Extras/CUDA/btCudaBroadphase.h index f03a1ea25..1cdc3b790 100644 --- a/Extras/CUDA/btCudaBroadphase.h +++ b/Extras/CUDA/btCudaBroadphase.h @@ -18,169 +18,95 @@ subject to the following restrictions: #include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" +#include "btCudaBroadphaseKernel.h" + + ///The btCudaBroadphase uses CUDA to compute overlapping pairs using a GPU. class btCudaBroadphase : public btSimpleBroadphase { - - - - bool m_bInitialized; - int m_numParticles; - - // CPU data - float* m_hPos; - float* m_hVel; - float* m_hSortedPos; - - unsigned int* m_hGridCounters; - unsigned int* m_hGridCells; - - unsigned int* m_hParticleHash; - unsigned int* m_hCellStart; - + bool m_bInitialized; + unsigned int m_numBodies; + unsigned int m_numCells; + unsigned int m_maxPairsPerBody; + btScalar m_cellFactorAABB; + // CPU data + unsigned int* m_hBodiesHash; + unsigned int* m_hCellStart; unsigned int* m_hPairBuffStartCurr; - float* m_hAABB; - + btCuda3F1U* m_hAABB; unsigned int* m_hPairBuff; unsigned int* m_hPairScan; unsigned int* m_hPairOut; - // GPU data - float* m_dPos[2]; - float* m_dVel[2]; - - float* m_dSortedPos; - float* m_dSortedVel; - - // uniform grid data - unsigned int* m_dGridCounters; // counts number of entries per grid cell - unsigned int* m_dGridCells; // contains indices of up to "m_maxParticlesPerCell" particles per cell - - unsigned int* m_dParticleHash[2]; - unsigned int* m_dCellStart; - - unsigned int m_posVbo[2]; - unsigned int m_colorVBO; - - unsigned int m_currentPosRead, m_currentVelRead; - unsigned int m_currentPosWrite, m_currentVelWrite; - - // buffers on GPU + unsigned int* m_dBodiesHash[2]; + unsigned int* m_dCellStart; unsigned int* m_dPairBuff; unsigned int* m_dPairBuffStartCurr; - float* m_dAABB; - + btCuda3F1U* m_dAABB; unsigned int* m_dPairScan; unsigned int* m_dPairOut; - - // params - struct SimParams& m_simParams; - - - - unsigned int m_maxParticlesPerCell; + unsigned int m_maxBodiesPerCell; + btCudaBroadphaseParams m_params; + btScalar m_maxRadius; +// large proxies + int m_numLargeHandles; + int m_maxLargeHandles; + int m_LastLargeHandleIndex; + btSimpleBroadphaseProxy* m_pLargeHandles; + void* m_pLargeHandlesRawPtr; + int m_firstFreeLargeHandle; + int allocLargeHandle() + { + btAssert(m_numLargeHandles < m_maxLargeHandles); + int freeLargeHandle = m_firstFreeLargeHandle; + m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree(); + m_numLargeHandles++; + if(freeLargeHandle > m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex = freeLargeHandle; + } + return freeLargeHandle; + } + void freeLargeHandle(btSimpleBroadphaseProxy* proxy) + { + int handle = int(proxy - m_pLargeHandles); + btAssert((handle >= 0) && (handle < m_maxHandles)); + if(handle == m_LastLargeHandleIndex) + { + m_LastLargeHandleIndex--; + } + proxy->SetNextFree(m_firstFreeLargeHandle); + m_firstFreeLargeHandle = handle; + proxy->m_clientObject = 0; + m_numLargeHandles--; + } + bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax); + bool isLargeProxy(btBroadphaseProxy* proxy); // debug unsigned int m_numPairsAdded; - unsigned int m_maxPairsPerParticle; + unsigned int m_numPairsRemoved; unsigned int m_numOverflows; - -protected: - - unsigned int createVBO(unsigned int size); - - void _initialize(int numParticles); - - void _finalize(); - - - +// public: - - enum ParticleArray - { - POSITION, - VELOCITY, - }; - - enum ParticleConfig - { - CONFIG_RANDOM, - CONFIG_GRID, - _NUM_CONFIGS - }; - - btCudaBroadphase(SimParams& simParams,int maxProxies); - + btCudaBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, + int gridSizeX, int gridSizeY, int gridSizeZ, + int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody, + int maxBodiesPerCell = 8, + btScalar cellFactorAABB = btScalar(1.0f)); virtual ~btCudaBroadphase(); - - void initGrid(unsigned int* size, float spacing, float jitter, unsigned int numParticles); - - void reset(ParticleConfig config); - - void setArray(ParticleArray array, const float* data, int start, int count); - - float* getArray(ParticleArray array); - - void addSphere(int start, float *pos, float *vel, int r, float spacing); - virtual void calculateOverlappingPairs(btDispatcher* dispatcher); - unsigned int getCurrentReadBuffer() const { return m_posVbo[m_currentPosRead]; } - unsigned int getColorBuffer() const { return m_colorVBO; } - void dumpParticles(unsigned int start, unsigned int count); - void dumpGrid(); - - float* copyBuffersFromDeviceToHost(); - void copyBuffersFromHostToDevice(); - float* getHvelPtr(); - float* getHposPtr(); - void quickHack(float deltaTime); - void quickHack2(); - void integrate(); - - - void findOverlappingPairs(btDispatcher* dispatcher); - - int3 calcGridPosCPU(float4 p); - uint calcGridHashCPU(int3 gridPos); - - void computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles); - void computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan); - - void findOverlappingPairsCPU( float* pAABB, - uint* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint* pPairBuffStartCurr, - uint numParticles); - void findOverlappingPairsCPU_D( uint index, - float4* pAABB, - uint2* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles); - -void findPairsInCellCPU(int3 gridPos, - uint index, - uint2* pParticleHash, - uint* pCellStart, - float4* pAABB, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles); -uint cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1); + virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy); + virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher); + virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback); +protected: + void _initialize(); + void _finalize(); void scanOverlappingPairBuffCPU(); - - void squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles); - void squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut); - void addPairsToCacheCPU(btDispatcher* dispatcher); - void resetOverlappingPairBuffCPU(); - - + void addLarge2LargePairsToCache(btDispatcher* dispatcher); }; #endif //CUDA_BROADPHASE_H \ No newline at end of file diff --git a/Extras/CUDA/btCudaBroadphase.sln b/Extras/CUDA/btCudaBroadphase.sln deleted file mode 100644 index 74eebc4e4..000000000 --- a/Extras/CUDA/btCudaBroadphase.sln +++ /dev/null @@ -1,113 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 9.00 -# Visual Studio 2005 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "btCudaBroadphase", "btCudaBroadphase.vcproj", "{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}" - ProjectSection(ProjectDependencies) = postProject - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} - {61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319} - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletmath", "..\..\msvc\8\libbulletmath.vcproj", "{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletdynamics", "..\..\msvc\8\libbulletdynamics.vcproj", "{61BD1097-CF2E-B296-DAA9-73A6FE135319}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcollision", "..\..\msvc\8\libbulletcollision.vcproj", "{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - DebugDoublePrecision|Win32 = DebugDoublePrecision|Win32 - DebugDoublePrecision|x64 = DebugDoublePrecision|x64 - EmuDebug|Win32 = EmuDebug|Win32 - EmuDebug|x64 = EmuDebug|x64 - EmuRelease|Win32 = EmuRelease|Win32 - EmuRelease|x64 = EmuRelease|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - ReleaseDoublePrecision|Win32 = ReleaseDoublePrecision|Win32 - ReleaseDoublePrecision|x64 = ReleaseDoublePrecision|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.ActiveCfg = Debug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.Build.0 = Debug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.Build.0 = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|Win32.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.ActiveCfg = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.Build.0 = Debug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.ActiveCfg = EmuDebug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.Build.0 = EmuDebug|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.ActiveCfg = EmuDebug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.Build.0 = EmuDebug|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.ActiveCfg = EmuRelease|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.Build.0 = EmuRelease|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.ActiveCfg = EmuRelease|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.Build.0 = EmuRelease|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.ActiveCfg = Release|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.Build.0 = Release|Win32 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.Build.0 = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|Win32.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.ActiveCfg = Release|x64 - {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.Build.0 = Release|x64 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.Build.0 = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|x64.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.Build.0 = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.Build.0 = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|x64.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.Build.0 = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|x64.ActiveCfg = Release|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.Build.0 = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|x64.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.Build.0 = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.Build.0 = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|x64.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.Build.0 = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|x64.ActiveCfg = Release|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.Build.0 = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|x64.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.Build.0 = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|x64.ActiveCfg = Debug|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.Build.0 = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|x64.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.Build.0 = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|x64.ActiveCfg = Release|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 - {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/Extras/CUDA/btCudaBroadphaseKernel.h b/Extras/CUDA/btCudaBroadphaseKernel.h new file mode 100644 index 000000000..5548c076b --- /dev/null +++ b/Extras/CUDA/btCudaBroadphaseKernel.h @@ -0,0 +1,84 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Keep this file free from Bullet headers +// it is included into CUDA program +//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +#ifndef CUDA_BROADPHASE_KERNEL_H +#define CUDA_BROADPHASE_KERNEL_H + +#define CUDA_BROADPHASE_USE_CUDA 1 + +#define BT_CUDA_PAIR_FOUND_FLG (0x40000000) +#define BT_CUDA_PAIR_NEW_FLG (0x20000000) +#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG) + +struct btCudaBroadphaseParams +{ + unsigned int m_gridSizeX; + unsigned int m_gridSizeY; + unsigned int m_gridSizeZ; + unsigned int m_numCells; + float m_worldOriginX; + float m_worldOriginY; + float m_worldOriginZ; + float m_cellSizeX; + float m_cellSizeY; + float m_cellSizeZ; + unsigned int m_numBodies; + unsigned int m_maxBodiesPerCell; +}; + +struct btCuda3F1U +{ + float fx; + float fy; + float fz; + unsigned int uw; +}; + + +extern "C" +{ + + void btCuda_allocateArray(void** devPtr, unsigned int size); + void btCuda_freeArray(void* devPtr); + void btCuda_copyArrayFromDevice(void* host, const void* device, unsigned int size); + void btCuda_copyArrayToDevice(void* device, const void* host, unsigned int size); + void btCuda_setParameters(btCudaBroadphaseParams* hostParams); + void btCuda_calcHashAABB(btCuda3F1U* pAABB, unsigned int* hash, unsigned int numBodies); + void btCuda_findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells); + void btCuda_findOverlappingPairs( btCuda3F1U* pAABB, unsigned int* pHash, + unsigned int* pCellStart, + unsigned int* pPairBuff, + unsigned int* pPairBuffStartCurr, + unsigned int numBodies); + void btCuda_findPairsLarge( btCuda3F1U* pAABB, unsigned int* pHash, + unsigned int* pCellStart, + unsigned int* pPairBuff, + unsigned int* pPairBuffStartCurr, + unsigned int numBodies, + unsigned int numLarge); + + void btCuda_computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, + unsigned int* pPairScan, btCuda3F1U* pAABB, unsigned int numBodies); + void btCuda_squeezeOverlappingPairBuff( unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, + unsigned int* pPairOut, btCuda3F1U* pAABB, unsigned int numBodies); +} + + +#endif // CUDA_BROADPHASE_KERNEL_H \ No newline at end of file diff --git a/Extras/CUDA/libbulletcuda.vcproj b/Extras/CUDA/libbulletcuda.vcproj new file mode 100644 index 000000000..53a6d1f6a --- /dev/null +++ b/Extras/CUDA/libbulletcuda.vcproj @@ -0,0 +1,593 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Extras/CUDA/particleSystem.cpp b/Extras/CUDA/particleSystem.cpp index d508b80a8..6ce9da8a0 100644 --- a/Extras/CUDA/particleSystem.cpp +++ b/Extras/CUDA/particleSystem.cpp @@ -32,6 +32,7 @@ #include "radixsort.cuh" #include "particles_kernel.cuh" +//#include #include #include @@ -42,6 +43,7 @@ #include #include +#include "../../Demos/OpenGL/GLDebugDrawer.h" #include "btCudaBroadphase.h" @@ -50,22 +52,609 @@ #define CUDART_PI_F 3.141592654f #endif +#define USE_BULLET 1 + +#define VEL_DIR_FACT (30.0F) +#define ACC_DIR_FACT (VEL_DIR_FACT*VEL_DIR_FACT) +#define VEL_INV_FACT (1.0F/VEL_DIR_FACT) +#define ACC_INV_FACT (1.0F/ACC_DIR_FACT) + +GLDebugDrawer debugDrawer; + ParticleSystem::ParticleSystem(uint numParticles, uint3 gridSize) : - m_simulationMode(SIMULATION_BULLET_CPU)//SIMULATION_CUDA) + m_bInitialized(false), + m_numParticles(numParticles), + m_hPos(0), + m_hVel(0), + m_currentPosRead(0), + m_currentVelRead(0), + m_currentPosWrite(1), + m_currentVelWrite(1), + m_gridSize(gridSize), + m_maxParticlesPerCell(4), + m_timer(0), + m_solverIterations(1), +// m_simulationMode(SIMULATION_CUDA) + m_simulationMode(SIMULATION_BULLET_CPU) { - this->m_params.numBodies = numParticles; - this->m_params.m_gridSize = gridSize; + m_dPos[0] = m_dPos[1] = 0; + m_dVel[0] = m_dVel[1] = 0; + + m_numGridCells = m_gridSize.x*m_gridSize.y*m_gridSize.z; + float3 worldSize = make_float3(2.0f, 2.0f, 2.0f); + + // set simulation parameters + m_params.gridSize = m_gridSize; + m_params.numCells = m_numGridCells; + m_params.numBodies = m_numParticles; + m_params.maxParticlesPerCell = m_maxParticlesPerCell; + + m_params.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f); + m_params.cellSize = make_float3(worldSize.x / m_gridSize.x, worldSize.y / m_gridSize.y, worldSize.z / m_gridSize.z); + + m_params.particleRadius = m_params.cellSize.x * 0.5f; + m_params.colliderPos = make_float4(0.0f, -0.7f, 0.0f, 1.0f); + m_params.colliderRadius = 0.2f; + + m_params.spring = 0.5f; + m_params.damping = 0.02f; + m_params.shear = 0.1f; + m_params.attraction = 0.0f; + m_params.boundaryDamping = -0.5f; + + m_params.gravity = make_float3(0.0f, -0.0003f, 0.0f); + m_params.globalDamping = 1.0f; + + _initialize(numParticles); + +#if USE_BULLET initializeBullet(); +#endif } ParticleSystem::~ParticleSystem() { +#if USE_BULLET finalizeBullet(); +#endif + _finalize(); + m_numParticles = 0; +} + +uint +ParticleSystem::createVBO(uint size) +{ + GLuint vbo; + glGenBuffers(1, &vbo); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + registerGLBufferObject(vbo); + return vbo; +} + +inline float lerp(float a, float b, float t) +{ + return a + t*(b-a); +} + +void colorRamp(float t, float *r) +{ + const int ncolors = 7; + float c[ncolors][3] = { + { 1.0, 0.0, 0.0, }, + { 1.0, 0.5, 0.0, }, + { 1.0, 1.0, 0.0, }, + { 0.0, 1.0, 0.0, }, + { 0.0, 1.0, 1.0, }, + { 0.0, 0.0, 1.0, }, + { 1.0, 0.0, 1.0, }, + }; + t = t * (ncolors-1); + int i = (int) t; + float u = t - floor(t); + r[0] = lerp(c[i][0], c[i+1][0], u); + r[1] = lerp(c[i][1], c[i+1][1], u); + r[2] = lerp(c[i][2], c[i+1][2], u); +} + +void +ParticleSystem::_initialize(int numParticles) +{ + assert(!m_bInitialized); + + m_numParticles = numParticles; + + // allocate host storage + m_hPos = new float[m_numParticles*4]; + m_hVel = new float[m_numParticles*4]; + memset(m_hPos, 0, m_numParticles*4*sizeof(float)); + memset(m_hVel, 0, m_numParticles*4*sizeof(float)); + + m_hGridCounters = new uint[m_numGridCells]; + m_hGridCells = new uint[m_numGridCells*m_maxParticlesPerCell]; + memset(m_hGridCounters, 0, m_numGridCells*sizeof(uint)); + memset(m_hGridCells, 0, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); + + m_hParticleHash = new uint[m_numParticles*2]; + memset(m_hParticleHash, 0, m_numParticles*2*sizeof(uint)); + + m_hCellStart = new uint[m_numGridCells]; + memset(m_hCellStart, 0, m_numGridCells*sizeof(uint)); + + // allocate GPU data + unsigned int memSize = sizeof(float) * 4 * m_numParticles; + + m_posVbo[0] = createVBO(memSize); + m_posVbo[1] = createVBO(memSize); + + allocateArray((void**)&m_dVel[0], memSize); + allocateArray((void**)&m_dVel[1], memSize); + + allocateArray((void**)&m_dSortedPos, memSize); + allocateArray((void**)&m_dSortedVel, memSize); + +#if USE_SORT + allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint)); + allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint)); + allocateArray((void**)&m_dCellStart, m_numGridCells*sizeof(uint)); +#else + allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint)); + allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); +#endif + + m_colorVBO = createVBO(m_numParticles*4*sizeof(float)); + +#if 1 + // fill color buffer + glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO); + float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + float *ptr = data; + for(uint i=0; i 1.0f - m_params.particleRadius) { pos.x = 1.0f - m_params.particleRadius; vel.x *= m_params.boundaryDamping; } +// if (pos.x < -1.0f + m_params.particleRadius) { pos.x = -1.0f + m_params.particleRadius; vel.x *= m_params.boundaryDamping;} +// if (pos.y > 1.0f - m_params.particleRadius) { pos.y = 1.0f - m_params.particleRadius; vel.y *= m_params.boundaryDamping; } +// if (pos.y < -1.0f + m_params.particleRadius) { pos.y = -1.0f + m_params.particleRadius; vel.y *= m_params.boundaryDamping;} +// if (pos.z > 1.0f - m_params.particleRadius) { pos.z = 1.0f - m_params.particleRadius; vel.z *= m_params.boundaryDamping; } +// if (pos.z < -1.0f + m_params.particleRadius) { pos.z = -1.0f + m_params.particleRadius; vel.z *= m_params.boundaryDamping;} + btTransform& trans = m_bulletParticles[i]->getWorldTransform(); + trans.setOrigin(btVector3(pos.x, pos.y, pos.z)); + m_bulletParticles[i]->setLinearVelocity(btVector3(vel.x, vel.y, vel.z)*btScalar(VEL_DIR_FACT)); + m_bulletParticles[i]->setAngularVelocity(btVector3(0,0,0)); + } + + glUnmapBufferARB(GL_ARRAY_BUFFER); + + std::swap(m_currentPosRead, m_currentPosWrite); + std::swap(m_currentVelRead, m_currentVelWrite); + + btTransform& collTrans = m_bulletCollider->getWorldTransform(); + collTrans.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z)); + + m_dynamicsWorld->stepSimulation(deltaTime); + + glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]); + hPos = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY); + + //sync transform and velocity from Bullet to particle system + for (uint i=0;igetWorldTransform(); + hPos[i*4] = trans.getOrigin().getX(); + hPos[i*4+1] = trans.getOrigin().getY(); + hPos[i*4+2] = trans.getOrigin().getZ(); + hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX() * VEL_INV_FACT; + hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY() * VEL_INV_FACT; + hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ() * VEL_INV_FACT; + } + copyBuffersFromHostToDevice(); + + collTrans = m_bulletCollider->getWorldTransform(); + m_params.colliderPos.x = collTrans.getOrigin().getX(); + m_params.colliderPos.y = collTrans.getOrigin().getY(); + m_params.colliderPos.z = collTrans.getOrigin().getZ(); + +} + + + +void +ParticleSystem::updateCuda(float deltaTime) +{ +#ifndef BT_NO_PROFILE + CProfileManager::Reset(); +#endif //BT_NO_PROFILE + BT_PROFILE("update CUDA"); + // update constants + setParameters(&m_params); + + // integrate + { + BT_PROFILE("integrate"); + integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite], + m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite], + deltaTime, + m_numParticles); + } + std::swap(m_currentPosRead, m_currentPosWrite); + std::swap(m_currentVelRead, m_currentVelWrite); + +#if USE_SORT + // sort and search method + + // calculate hash + { + BT_PROFILE("calcHash"); + calcHash(m_posVbo[m_currentPosRead], + m_dParticleHash[0], + m_numParticles); + } + +#if DEBUG_GRID + copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); + printf("particle hash:\n"); + for(uint i=0; i maxPerCell) + maxPerCell = m_hGridCounters[i]; + if (m_hGridCounters[i] > 0) { + printf("%d (%d): ", i, m_hGridCounters[i]); + for(uint j=0; jsetGravity(100*btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z)); +// m_dynamicsWorld->setGravity(100*btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z)); + m_dynamicsWorld->setGravity(btScalar(ACC_DIR_FACT) * btVector3(m_params.gravity.x,m_params.gravity.y,m_params.gravity.z)); m_dynamicsWorld->getSolverInfo().m_numIterations=1; - btBoxShape* worldBox = new btBoxShape(btVector3(m_params.worldSize.x/2,m_params.worldSize.y/2,m_params.worldSize.z/2)); - worldBox->setMargin(0.f); - - //create 6 static planes for the world cube - btStaticPlaneShape* planeShape; btRigidBody* body; - btVector3 worldSize(); - int i; + btCollisionShape* boxShape = new btBoxShape(btVector3(btScalar(1.2),btScalar(0.05),btScalar(1.2))); +// boxShape->setMargin(0.03f); - + btScalar mass(0.); + btVector3 localInertia(0,0,0); + btRigidBody::btRigidBodyConstructionInfo boxRbcInfo(mass, 0, boxShape, localInertia); + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, -1.05f,0)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 1.05f,0)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(-1.05f, 0, 0)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(0, 0, SIMD_HALF_PI); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(1.05f, 0, 0)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, -1.05f)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + boxRbcInfo.m_startWorldTransform.setIdentity(); + boxRbcInfo.m_startWorldTransform.getBasis().setEulerZYX(SIMD_HALF_PI, 0, 0); + boxRbcInfo.m_startWorldTransform.setOrigin(btVector3(0, 0, 1.05f)); + boxRbcInfo.m_friction = 0.0f; + body = new btRigidBody(boxRbcInfo); + m_dynamicsWorld->addRigidBody(body); + + + + unsigned int i; btSphereShape* particleSphere = new btSphereShape(m_params.particleRadius); particleSphere->setMargin(0.0); - btVector3 localInertia; particleSphere->calculateLocalInertia(1,localInertia); - float* m_hPos = m_broadphase->getHposPtr(); + reset(CONFIG_GRID); for (i=0;iaddRigidBody(body); } - reset(CONFIG_GRID); + btSphereShape* colliderSphere = new btSphereShape(m_params.colliderRadius); + colliderSphere->setMargin(0.0); + colliderSphere->calculateLocalInertia(10., localInertia); + btRigidBody::btRigidBodyConstructionInfo rbci(5., 0, colliderSphere,localInertia); + rbci.m_startWorldTransform.setOrigin(btVector3(m_params.colliderPos.x, m_params.colliderPos.y, m_params.colliderPos.z)); + body = new btRigidBody(rbci); + body->setActivationState(DISABLE_DEACTIVATION); + m_bulletCollider = body; + m_dynamicsWorld->addRigidBody(body); /* for (i=0;i<6;i++) { @@ -130,7 +766,6 @@ void ParticleSystem::initializeBullet() m_dynamicsWorld->addRigidBody(body); } */ - } void ParticleSystem::finalizeBullet() @@ -142,139 +777,29 @@ void ParticleSystem::finalizeBullet() delete m_collisionConfiguration; } - - -void -ParticleSystem::update(float deltaTime) +float* ParticleSystem::copyBuffersFromDeviceToHost() { - assert(m_bInitialized); - - switch (m_simulationMode) - { - case SIMULATION_CUDA: - { - m_broadphase->quickHack(deltaTime); - //todo - break; - } - case SIMULATION_BULLET_CPU: - { - m_broadphase->integrate(); - - - ///copy particles from device to main memory - { - float* hPosData = m_broadphase->copyBuffersFromDeviceToHost(); - float* m_hVel = m_broadphase->getHvelPtr(); - m_broadphase->copyBuffersFromHostToDevice(); - - - //sync transform and velocity from particle system to Bullet - - for (int i=0;igetWorldTransform(); - trans.setOrigin(btVector3(hPosData[i*4],hPosData[i*4+1],hPosData[i*4+2])); - m_bulletParticles[i]->setLinearVelocity(btVector3(m_hVel[i*4],m_hVel[i*4+1],m_hVel[i*4+2])*10.); - } - } - - m_dynamicsWorld->stepSimulation(deltaTime); - -/* for (int i=0;icopyBuffersFromDeviceToHost(); - float* m_hVel = m_broadphase->getHvelPtr(); - - //sync transform and velocity from Bullet to particle system - for (int i=0;igetWorldTransform(); - hPosData[i*4] = trans.getOrigin().getX(); - hPosData[i*4+1] = trans.getOrigin().getY(); - hPosData[i*4+2] = trans.getOrigin().getZ(); - - m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.f; - m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.f; - m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.f; - } - - m_broadphase->copyBuffersFromHostToDevice(); - } - - break; - - } - - - - default: - { - printf("unknown simulation method\n"); - } - }; + copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*m_numParticles); + // fill color buffer + glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]); + float* hPosData = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY); + return hPosData; +} +void ParticleSystem::copyBuffersFromHostToDevice() +{ + glUnmapBufferARB(GL_ARRAY_BUFFER); + copyArrayToDevice(m_dVel[m_currentVelRead],m_hVel, 0, sizeof(float)*4*m_numParticles); } - -float* ParticleSystem::getArray(ParticleArray array) -{ - return m_broadphase->getArray((btCudaBroadphase::ParticleArray)array); - -} void ParticleSystem::debugDraw() { +#if USE_BULLET glDisable(GL_DEPTH_TEST); m_dynamicsWorld->debugDrawWorld(); glEnable(GL_DEPTH_TEST); +#endif } -void ParticleSystem::reset(ParticleConfig config) -{ - m_broadphase->reset((btCudaBroadphase::ParticleConfig)config); - for (int i=0;isetAngularVelocity(btVector3(0,0,0)); - } - -} - - -void ParticleSystem::addSphere(int start, float *pos, float *vel, int r, float spacing) -{ - m_broadphase->addSphere(start,pos,vel,r,spacing); -} - -unsigned int ParticleSystem::getCurrentReadBuffer() const -{ - return m_broadphase->getCurrentReadBuffer(); -} -unsigned int ParticleSystem::getColorBuffer() const -{ - return m_broadphase->getColorBuffer(); -} - -void ParticleSystem::dumpGrid() -{ - return m_broadphase->dumpGrid(); -} - -void ParticleSystem::dumpParticles(uint start, uint count) -{ - m_broadphase->dumpParticles(start,count); -} - -int ParticleSystem::getNumParticles() const -{ - return m_params.numBodies; -} \ No newline at end of file diff --git a/Extras/CUDA/particleSystem.cu b/Extras/CUDA/particleSystem.cu index 578be979d..e6430cfd2 100644 --- a/Extras/CUDA/particleSystem.cu +++ b/Extras/CUDA/particleSystem.cu @@ -41,7 +41,7 @@ #include #include "particles_kernel.cu" -#include "radixsort.cu" +//#include "radixsort.cu" //! Check for CUDA error # define CUT_CHECK_ERROR(errorMessage) do { \ @@ -77,9 +77,9 @@ } } while (0) + extern "C" { - void mm_exit(int val) { exit(val); @@ -87,7 +87,7 @@ void mm_exit(int val) void cudaInit(int argc, char **argv) { - //CUT_DEVICE_INIT(argc, argv); +// CUT_DEVICE_INIT(argc, argv); } void allocateArray(void **devPtr, size_t size) @@ -117,26 +117,6 @@ void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int s void copyArrayToDevice(void* device, const void* host, int offset, int size) { MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice)); -/* - cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice); - switch(err) - { - case cudaSuccess : - return; - case cudaErrorInvalidValue : - printf("\ncudaErrorInvalidValue : %d\n", err); - return; - case cudaErrorInvalidDevicePointer : - printf("\ncudaErrorInvalidDevicePointer : %d\n", err); - return; - case cudaErrorInvalidMemcpyDirection : - printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err); - return; - default : - printf("\nX3 : %d\n", err); - return; - } -*/ } void registerGLBufferObject(uint vbo) @@ -280,27 +260,6 @@ reorderDataAndFindCellStart(uint* particleHash, MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos)); } -#if 1 -void -findCellStart( uint* particleHash, - uint* cellStart, - uint numBodies, - uint numCells) -{ - int numThreads, numBlocks; - computeGridSize(numBodies, 256, numBlocks, numThreads); - - MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint))); - - findCellStartD<<< numBlocks, numThreads >>>( - (uint2 *) particleHash, - (uint *) cellStart); - CUT_CHECK_ERROR("Kernel execution failed: findCellStartD"); - -} -#endif - - void collide(uint vboOldPos, uint vboNewPos, float* sortedPos, float* sortedVel, @@ -374,71 +333,4 @@ collide(uint vboOldPos, uint vboNewPos, #endif } -void -btCudaFindOverlappingPairs( float* pAABB, - uint* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint* pPairBuffStartCurr, - uint numParticles) -{ - -// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4); -// if(err != cudaSuccess) -// { -// printf("\nAAAAA\n"); -// } - - - MY_CUDA_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numParticles*2*sizeof(float4))); - - int numThreads, numBlocks; -// computeGridSize(numParticles, 256, numBlocks, numThreads); - computeGridSize(numParticles, 64, numBlocks, numThreads); -// numThreads = 1; -// numBlocks = 1; - btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>( - (float4 *)pAABB, - (uint2*)pParticleHash, - (uint*)pCellStart, - (uint*)pPairBuff, - (uint2*)pPairBuffStartCurr, - numParticles - ); - CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD"); - MY_CUDA_SAFE_CALL(cudaUnbindTexture(pAABBTex)); - } // btCudaFindOverlappingPairs() - - -void -btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles) -{ - int numThreads, numBlocks; - computeGridSize(numParticles, 256, numBlocks, numThreads); - - btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>( - (uint*)pPairBuff, - (uint2*)pPairBuffStartCurr, - (uint*)pPairScan - ); - CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD"); - } // btCudaFindOverlappingPairs() - - -void -btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles) -{ - int numThreads, numBlocks; - computeGridSize(numParticles, 256, numBlocks, numThreads); - - btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>( - (uint*)pPairBuff, - (uint2*)pPairBuffStartCurr, - (uint*)pPairScan, - pPairOut - ); - CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD"); -} - - } // extern "C" diff --git a/Extras/CUDA/particleSystem.cuh b/Extras/CUDA/particleSystem.cuh index e58535f0f..e2b297fc6 100644 --- a/Extras/CUDA/particleSystem.cuh +++ b/Extras/CUDA/particleSystem.cuh @@ -42,12 +42,6 @@ reorderDataAndFindCellStart(uint* particleHash, uint numBodies, uint numCells); -void -findCellStart( uint* particleHash, - uint* cellStart, - uint numBodies, - uint numCells); - void collide(uint vboOldPos, uint vboNewPos, float* sortedPos, float* sortedVel, @@ -59,20 +53,5 @@ collide(uint vboOldPos, uint vboNewPos, uint numBodies, uint numCells, uint maxParticlesPerCell); - -void -btCudaFindOverlappingPairs( float* pAABB, - uint* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint* pPairBuffStartCurr, - uint numParticles); - -void -btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles); - - -void btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles); - } diff --git a/Extras/CUDA/particleSystem.h b/Extras/CUDA/particleSystem.h index 0ac75e6b7..cebf01b7d 100644 --- a/Extras/CUDA/particleSystem.h +++ b/Extras/CUDA/particleSystem.h @@ -35,6 +35,7 @@ #include "particles_kernel.cuh" #include "vector_functions.h" + #include "LinearMath/btAlignedObjectArray.h" // CUDA BodySystem: runs on the GPU @@ -44,13 +45,6 @@ public: ParticleSystem(uint numParticles, uint3 gridSize); ~ParticleSystem(); - - enum ParticleArray - { - POSITION, - VELOCITY, - }; - enum ParticleConfig { CONFIG_RANDOM, @@ -58,6 +52,12 @@ public: _NUM_CONFIGS }; + enum ParticleArray + { + POSITION, + VELOCITY, + }; + enum SimulationMode { SIMULATION_CUDA, @@ -65,33 +65,19 @@ public: SIMULATION_NUM_MODES }; - void reset(ParticleConfig config); - void debugDraw(); - - /// - ///Bullet data - /// - - void initializeBullet(); - void finalizeBullet(); - class btDiscreteDynamicsWorld* m_dynamicsWorld; - class btDefaultCollisionConfiguration* m_collisionConfiguration; - class btCollisionDispatcher* m_dispatcher; - class btCudaBroadphase* m_broadphase; -// class btBroadphaseInterface* m_broadphase; - class btSequentialImpulseConstraintSolver* m_constraintSolver; - btAlignedObjectArray m_bulletParticles; - - void update(float deltaTime); + void updateCuda(float deltaTime); + void updateBullet(float deltaTime); + void reset(ParticleConfig config); float* getArray(ParticleArray array); + void setArray(ParticleArray array, const float* data, int start, int count); - int getNumParticles() const; + int getNumParticles() const { return m_numParticles; } - unsigned int getCurrentReadBuffer() const; - unsigned int getColorBuffer() const; + unsigned int getCurrentReadBuffer() const { return m_posVbo[m_currentPosRead]; } + unsigned int getColorBuffer() const { return m_colorVBO; } void dumpGrid(); void dumpParticles(uint start, uint count); @@ -127,21 +113,73 @@ public: m_simulationMode=mode; } + void debugDraw(); + protected: // methods - ParticleSystem() - : m_simulationMode(SIMULATION_CUDA) - {} - + ParticleSystem() {} + uint createVBO(uint size); + + void _initialize(int numParticles); + void _finalize(); + void initGrid(uint *size, float spacing, float jitter, uint numParticles); +protected: + // Bullet data + void initializeBullet(); + void finalizeBullet(); + class btDiscreteDynamicsWorld* m_dynamicsWorld; + class btDefaultCollisionConfiguration* m_collisionConfiguration; + class btCollisionDispatcher* m_dispatcher; +// class btCudaBroadphase* m_broadphase; + class btBroadphaseInterface* m_broadphase; + class btSequentialImpulseConstraintSolver* m_constraintSolver; + btAlignedObjectArray m_bulletParticles; + btRigidBody* m_bulletCollider; + + + float* copyBuffersFromDeviceToHost(); + void copyBuffersFromHostToDevice(); + protected: // data bool m_bInitialized; - + uint m_numParticles; + + // CPU data + float* m_hPos; + float* m_hVel; + + uint* m_hGridCounters; + uint* m_hGridCells; + + uint* m_hParticleHash; + uint* m_hCellStart; + + // GPU data + float* m_dPos[2]; + float* m_dVel[2]; + + float* m_dSortedPos; + float* m_dSortedVel; + + // uniform grid data + uint* m_dGridCounters; // counts number of entries per grid cell + uint* m_dGridCells; // contains indices of up to "m_maxParticlesPerCell" particles per cell + + uint* m_dParticleHash[2]; + uint* m_dCellStart; + + uint m_posVbo[2]; + uint m_colorVBO; + + uint m_currentPosRead, m_currentVelRead; + uint m_currentPosWrite, m_currentVelWrite; - // params SimParams m_params; + uint3 m_gridSize; + uint m_numGridCells; uint m_maxParticlesPerCell; uint m_timer; @@ -149,6 +187,7 @@ protected: // data uint m_solverIterations; SimulationMode m_simulationMode; + }; #endif // __BODYSYSTEMCUDA_H__ diff --git a/Extras/CUDA/particles.cpp b/Extras/CUDA/particles.cpp index 9ebb19efe..09f1bf9f0 100644 --- a/Extras/CUDA/particles.cpp +++ b/Extras/CUDA/particles.cpp @@ -45,7 +45,6 @@ #include #endif - #include "LinearMath/btQuickprof.h" #include "particleSystem.h" @@ -62,8 +61,6 @@ float camera_rot_lag[] = {0, 0, 0}; const float inertia = 0.1; ParticleRenderer::DisplayMode displayMode = ParticleRenderer::PARTICLE_SPHERES; - - int mode = 0; bool displayEnabled = true; bool bPause = false; @@ -91,6 +88,9 @@ float collideAttraction = 0.0f; ParticleSystem *psystem = 0; // fps +static int fpsCount = 0; +static int fpsLimit = 1; +unsigned int timer; ParticleRenderer *renderer = 0; @@ -109,7 +109,7 @@ void init(int numParticles, uint3 gridSize) renderer->setParticleRadius(psystem->getParticleRadius()); renderer->setColorBuffer(psystem->getColorBuffer()); - +// CUT_SAFE_CALL(cutCreateTimer(&timer)); } void initGL() @@ -129,7 +129,6 @@ void initGL() void display() { - // update the simulation if (!bPause) { @@ -143,8 +142,6 @@ void display() psystem->update(timestep); renderer->setVertexBuffer(psystem->getCurrentReadBuffer(), psystem->getNumParticles()); - float* posArray = psystem->getArray(ParticleSystem::POSITION); - renderer->setPositions(posArray,psystem->getNumParticles()); } // render @@ -168,9 +165,7 @@ void display() glColor3f(1.0, 1.0, 1.0); glutWireCube(2.0); - - - // collider + // collider glPushMatrix(); float4 p = psystem->getColliderPos(); glTranslatef(p.x, p.y, p.z); @@ -195,18 +190,12 @@ void display() psystem->debugDraw(); glDisable(GL_DEPTH_TEST); -// glDisable(GL_LIGHTING); -// glColor3f(0, 0, 0); float offsX = 10.f; float offsY = 10.f; renderer->showProfileInfo(offsX, offsY, 20.f); -// glEnable(GL_LIGHTING); glEnable(GL_DEPTH_TEST); - - - glutSwapBuffers(); { @@ -233,6 +222,7 @@ void display() } glutReportErrors(); + } void reshape(int w, int h) @@ -375,7 +365,6 @@ inline float frand() // commented out to remove unused parameter warnings in Linux void key(unsigned char key, int /*x*/, int /*y*/) { - #ifndef BT_NO_PROFILE if (key >= 0x31 && key < 0x37) { @@ -407,12 +396,9 @@ void key(unsigned char key, int /*x*/, int /*y*/) case 'm': mode = M_MOVE; break; - case 's': - psystem->setSimulationMode((ParticleSystem::SimulationMode) ((psystem->getSimulationMode() + 1) % ParticleSystem::SIMULATION_NUM_MODES)); - break; - case 'p': - displayMode = (ParticleRenderer::DisplayMode) ((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES); + displayMode = (ParticleRenderer::DisplayMode) + ((displayMode + 1) % ParticleRenderer::PARTICLE_NUM_MODES); break; case 'd': psystem->dumpGrid(); @@ -474,6 +460,10 @@ void key(unsigned char key, int /*x*/, int /*y*/) case 'h': displaySliders = !displaySliders; break; + case 's': + psystem->setSimulationMode((ParticleSystem::SimulationMode) ((psystem->getSimulationMode() + 1) % ParticleSystem::SIMULATION_NUM_MODES)); + CProfileManager::CleanupMemory(); + break; } glutPostRedisplay(); @@ -536,16 +526,22 @@ void initMenus() int main(int argc, char** argv) { -// numParticles =1024;//1024;//64;//16380;//32768; - numParticles =8192; +// numParticles = 65536*2; +// numParticles = 65536; +// numParticles = 32768; +// numParticles = 8192; +// numParticles = 4096; + numParticles = 2048; +// numParticles = 1024; +// numParticles = 256; +// numParticles = 32; +// numParticles = 2; uint gridDim = 64; numIterations = 0; gridSize.x = gridSize.y = gridSize.z = gridDim; printf("grid: %d x %d x %d = %d cells\n", gridSize.x, gridSize.y, gridSize.z, gridSize.x*gridSize.y*gridSize.z); - bool benchmark = false; - cudaInit(argc, argv); glutInit(&argc, argv); @@ -558,7 +554,6 @@ main(int argc, char** argv) initParams(); initMenus(); - glutDisplayFunc(display); glutReshapeFunc(reshape); glutMouseFunc(mouse); @@ -569,7 +564,6 @@ main(int argc, char** argv) glutMainLoop(); - if (psystem) delete psystem; diff --git a/Extras/CUDA/particles.sln b/Extras/CUDA/particles.sln new file mode 100644 index 000000000..82c494c6d --- /dev/null +++ b/Extras/CUDA/particles.sln @@ -0,0 +1,228 @@ + +Microsoft Visual Studio Solution File, Format Version 9.00 +# Visual Studio 2005 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "particles", "particles.vcproj", "{AF7F45C7-8545-4CA2-B835-FDE8823B7B09}" + ProjectSection(ProjectDependencies) = postProject + {F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165} + {7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3} + {61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319} + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcollision", "..\..\msvc\8\libbulletcollision.vcproj", "{6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletmath", "..\..\msvc\8\libbulletmath.vcproj", "{7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletdynamics", "..\..\msvc\8\libbulletdynamics.vcproj", "{61BD1097-CF2E-B296-DAA9-73A6FE135319}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletcuda", "libbulletcuda.vcproj", "{F74E8E02-0B47-4816-BD0B-FAEAE3343165}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libbulletopenglsupport", "..\..\msvc\8\libbulletopenglsupport.vcproj", "{7C428E76-9271-6284-20F0-9B38ED6931E3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "appBasicDemo", "..\..\msvc\8\appBasicDemo.vcproj", "{3578834A-4B06-DE6F-78AC-FE11F7226D35}" + ProjectSection(ProjectDependencies) = postProject + {F74E8E02-0B47-4816-BD0B-FAEAE3343165} = {F74E8E02-0B47-4816-BD0B-FAEAE3343165} + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} = {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE} + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} = {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A} + {61BD1097-CF2E-B296-DAA9-73A6FE135319} = {61BD1097-CF2E-B296-DAA9-73A6FE135319} + {7C428E76-9271-6284-20F0-9B38ED6931E3} = {7C428E76-9271-6284-20F0-9B38ED6931E3} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + DebugDll|Win32 = DebugDll|Win32 + DebugDll|x64 = DebugDll|x64 + DebugDoublePrecision|Win32 = DebugDoublePrecision|Win32 + DebugDoublePrecision|x64 = DebugDoublePrecision|x64 + EmuDebug|Win32 = EmuDebug|Win32 + EmuDebug|x64 = EmuDebug|x64 + EmuRelease|Win32 = EmuRelease|Win32 + EmuRelease|x64 = EmuRelease|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + ReleaseDll|Win32 = ReleaseDll|Win32 + ReleaseDll|x64 = ReleaseDll|x64 + ReleaseDoublePrecision|Win32 = ReleaseDoublePrecision|Win32 + ReleaseDoublePrecision|x64 = ReleaseDoublePrecision|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.ActiveCfg = Debug|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|Win32.Build.0 = Debug|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.ActiveCfg = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Debug|x64.Build.0 = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|Win32.ActiveCfg = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.ActiveCfg = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDll|x64.Build.0 = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|Win32.ActiveCfg = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.ActiveCfg = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.DebugDoublePrecision|x64.Build.0 = Debug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.ActiveCfg = EmuDebug|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|Win32.Build.0 = EmuDebug|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.ActiveCfg = EmuDebug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuDebug|x64.Build.0 = EmuDebug|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.ActiveCfg = EmuRelease|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|Win32.Build.0 = EmuRelease|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.ActiveCfg = EmuRelease|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.EmuRelease|x64.Build.0 = EmuRelease|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.ActiveCfg = Release|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|Win32.Build.0 = Release|Win32 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.ActiveCfg = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.Release|x64.Build.0 = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|Win32.ActiveCfg = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.ActiveCfg = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDll|x64.Build.0 = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|Win32.ActiveCfg = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.ActiveCfg = Release|x64 + {AF7F45C7-8545-4CA2-B835-FDE8823B7B09}.ReleaseDoublePrecision|x64.Build.0 = Release|x64 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.ActiveCfg = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|Win32.Build.0 = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Debug|x64.ActiveCfg = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|Win32.Build.0 = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|Win32.Build.0 = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.EmuRelease|x64.ActiveCfg = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.ActiveCfg = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|Win32.Build.0 = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.Release|x64.ActiveCfg = Release|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {6ADA430D-009C-2ED4-A787-2AC2D6FEB8CE}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.ActiveCfg = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|Win32.Build.0 = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Debug|x64.ActiveCfg = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|Win32.Build.0 = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|Win32.Build.0 = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.EmuRelease|x64.ActiveCfg = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.ActiveCfg = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|Win32.Build.0 = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.Release|x64.ActiveCfg = Release|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {7D6E339F-9C2C-31DA-FDB0-5EE50973CF2A}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.ActiveCfg = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|Win32.Build.0 = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Debug|x64.ActiveCfg = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|Win32.Build.0 = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|Win32.Build.0 = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.EmuRelease|x64.ActiveCfg = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.ActiveCfg = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|Win32.Build.0 = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.Release|x64.ActiveCfg = Release|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {61BD1097-CF2E-B296-DAA9-73A6FE135319}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.ActiveCfg = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|Win32.Build.0 = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Debug|x64.ActiveCfg = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|Win32.Build.0 = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|Win32.Build.0 = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.EmuRelease|x64.ActiveCfg = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.ActiveCfg = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|Win32.Build.0 = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.Release|x64.ActiveCfg = Release|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {F74E8E02-0B47-4816-BD0B-FAEAE3343165}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.ActiveCfg = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|Win32.Build.0 = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Debug|x64.ActiveCfg = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|Win32.Build.0 = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|Win32.Build.0 = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.EmuRelease|x64.ActiveCfg = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.ActiveCfg = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|Win32.Build.0 = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.Release|x64.ActiveCfg = Release|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {7C428E76-9271-6284-20F0-9B38ED6931E3}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.ActiveCfg = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|Win32.Build.0 = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Debug|x64.ActiveCfg = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.ActiveCfg = DebugDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|Win32.Build.0 = DebugDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDll|x64.ActiveCfg = DebugDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.ActiveCfg = DebugDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|Win32.Build.0 = DebugDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.DebugDoublePrecision|x64.ActiveCfg = DebugDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.ActiveCfg = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|Win32.Build.0 = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuDebug|x64.ActiveCfg = Debug|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.ActiveCfg = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|Win32.Build.0 = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.EmuRelease|x64.ActiveCfg = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.ActiveCfg = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|Win32.Build.0 = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.Release|x64.ActiveCfg = Release|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.ActiveCfg = ReleaseDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|Win32.Build.0 = ReleaseDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDll|x64.ActiveCfg = ReleaseDll|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.ActiveCfg = ReleaseDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|Win32.Build.0 = ReleaseDoublePrecision|Win32 + {3578834A-4B06-DE6F-78AC-FE11F7226D35}.ReleaseDoublePrecision|x64.ActiveCfg = ReleaseDoublePrecision|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Extras/CUDA/btCudaBroadphase.vcproj b/Extras/CUDA/particles.vcproj similarity index 78% rename from Extras/CUDA/btCudaBroadphase.vcproj rename to Extras/CUDA/particles.vcproj index 4e18a89ca..5dc6ab5e6 100644 --- a/Extras/CUDA/btCudaBroadphase.vcproj +++ b/Extras/CUDA/particles.vcproj @@ -2,9 +2,9 @@ @@ -45,7 +45,7 @@ Name="VCCLCompilerTool" Optimization="0" AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src;../../Demos/OpenGL" - PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE" + PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS" MinimalRebuild="true" BasicRuntimeChecks="3" RuntimeLibrary="1" @@ -70,7 +70,7 @@ LinkIncremental="1" AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut" GenerateDebugInformation="true" - ProgramDatabaseFile="$(OutDir)/btCudaBroadphase.pdb" + ProgramDatabaseFile="$(OutDir)/particles.pdb" SubSystem="1" TargetMachine="1" /> @@ -149,11 +149,11 @@ @@ -209,7 +209,7 @@ @@ -475,11 +475,11 @@ @@ -535,7 +535,7 @@ - - - - - - - - - - @@ -793,7 +773,7 @@ > @@ -813,7 +793,7 @@ > @@ -877,95 +857,6 @@ RelativePath=".\particleSystem.h" > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -983,46 +874,6 @@ > - - - - - - - - - - - - - - - - - - - - diff --git a/Extras/CUDA/particles_kernel.cu b/Extras/CUDA/particles_kernel.cu index 2cadb81fe..9817cb384 100644 --- a/Extras/CUDA/particles_kernel.cu +++ b/Extras/CUDA/particles_kernel.cu @@ -50,10 +50,6 @@ texture cellStartTex; texture gridCountersTex; texture gridCellsTex; - -texture pAABBTex; - - #endif __constant__ SimParams params; @@ -203,35 +199,6 @@ reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by has } - -__global__ void -findCellStartD(uint2* particleHash, // particle id sorted by hash - uint* cellStart) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - - uint2 sortedData = particleHash[index]; - - // Load hash data into shared memory so that we can look - // at neighboring particle's hash value without loading - // two hash values per thread - __shared__ uint sharedHash[257]; - sharedHash[threadIdx.x+1] = sortedData.x; - if (index > 0 && threadIdx.x == 0) - { - // first thread in block must load neighbor particle hash - volatile uint2 prevData = particleHash[index-1]; - sharedHash[0] = prevData.x; - } - - __syncthreads(); - if (index == 0 || sortedData.x != sharedHash[threadIdx.x]) - { - cellStart[sortedData.x] = index; - } -} - - // collide two spheres using DEM method __device__ float3 collideSpheres(float4 posA, float4 posB, float4 velA, float4 velB, @@ -411,167 +378,4 @@ collideD(float4* newPos, float4* newVel, #endif } -__device__ -uint cudaTestAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} - - -__device__ -void findPairsInCell( int3 gridPos, - uint index, - uint2* pParticleHash, - uint* pCellStart, - float4* pAABB, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles) -{ - if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) || - (gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) || - (gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) { - return; - } - uint gridHash = calcGridHash(gridPos); - // get start of bucket for this cell - uint bucketStart = pCellStart[gridHash]; - if (bucketStart == 0xffffffff) - return; // cell empty - // iterate over particles in this cell -// float4 min0 = pAABB[index*2]; -// float4 max0 = pAABB[index*2+1]; - float4 min0 = FETCH(pAABB, index*2); // pAABB[index*2]; - float4 max0 = FETCH(pAABB, index*2 + 1); // pAABB[index*2+1]; - - uint2 sortedData = pParticleHash[index]; - uint unsorted_indx = sortedData.y; - uint2 start_curr = pPairBuffStartCurr[unsorted_indx]; -// uint2 start_curr = pPairBuffStartCurr[index]; - - uint start = start_curr.x; - uint curr = start_curr.y; - uint bucketEnd = bucketStart + params.maxParticlesPerCell; - bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd; - for(uint index2=bucketStart; index2 < bucketEnd; index2++) - { - uint2 cellData = pParticleHash[index2]; - if (cellData.x != gridHash) break; // no longer in same bucket - if (index2 != index) // check not colliding with self - { - uint unsorted_indx2 = cellData.y; - if(unsorted_indx2 < unsorted_indx) - { - continue; - } -// float4 min1 = pAABB[index2*2]; -// float4 max1 = pAABB[index2*2 + 1]; - float4 min1 = FETCH(pAABB, index2*2);// pAABB[index2*2]; - float4 max1 = FETCH(pAABB, index2*2 + 1); // pAABB[index2*2 + 1]; - if(cudaTestAABBOverlap(min0, max0, min1, max1)) - { - uint k; - for(k = 0; k < curr; k++) - { - uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG); - if(old_pair == unsorted_indx2) - { - pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG; - break; - } - } - if(k == curr) - { - pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG; - curr++; - } - } - } - } - pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr); - return; -} - - - -__global__ void -btCudaFindOverlappingPairsD( - float4* pAABB, - uint2* pParticleHash, - uint* pCellStart, - uint* pPairBuff, - uint2* pPairBuffStartCurr, - uint numParticles -) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - float4 bbMin = pAABB[index*2]; - float4 bbMax = pAABB[index*2+1]; - float4 pos = (bbMin + bbMax) * 0.5f; - // get address in grid - int3 gridPos = calcGridPos(pos); - // examine only neighbouring cells - for(int z=-1; z<=1; z++) { - for(int y=-1; y<=1; y++) { - for(int x=-1; x<=1; x++) { - findPairsInCell(gridPos + make_int3(x, y, z), index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles); - } - } - } -} - - - -__global__ void -btCudaComputePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - uint2 start_curr = pPairBuffStartCurr[index]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint *pInp = pPairBuff + start; - uint num_changes = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - num_changes++; - } - } - pPairScan[index+1] = num_changes; -} - - -__global__ void -btCudaSqueezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut) -{ - int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; - uint2 start_curr = pPairBuffStartCurr[index]; - uint start = start_curr.x; - uint curr = start_curr.y; - uint* pInp = pPairBuff + start; - uint* pOut = pPairOut + pPairScan[index]; - uint* pOut2 = pInp; - uint num = 0; - for(uint k = 0; k < curr; k++, pInp++) - { - if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG)) - { - *pOut = *pInp; - pOut++; - } - if((*pInp) & BT_CUDA_PAIR_ANY_FLG) - { - *pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG); - pOut2++; - num++; - } - } - pPairBuffStartCurr[index] = make_uint2(start, num); -} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D() - - - #endif diff --git a/Extras/CUDA/particles_kernel.cuh b/Extras/CUDA/particles_kernel.cuh index 34da7825c..34728ec27 100644 --- a/Extras/CUDA/particles_kernel.cuh +++ b/Extras/CUDA/particles_kernel.cuh @@ -14,12 +14,6 @@ #define FETCH(t, i) t[i] #endif - -#define BT_CUDA_PAIR_FOUND_FLG (0x40000000) -#define BT_CUDA_PAIR_NEW_FLG (0x20000000) -#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG) - - #include "vector_types.h" typedef unsigned int uint; @@ -47,7 +41,6 @@ struct SimParams { float shear; float attraction; float boundaryDamping; - }; #endif diff --git a/Extras/CUDA/render_particles.cpp b/Extras/CUDA/render_particles.cpp index ce5bdb3df..9722f308c 100644 --- a/Extras/CUDA/render_particles.cpp +++ b/Extras/CUDA/render_particles.cpp @@ -33,12 +33,13 @@ #include #include +#include #include "BMF_Api.h" +#include "LinearMath/btQuickprof.h" + #include "render_particles.h" #include "shaders.h" -#include "LinearMath/btQuickprof.h" -#include "paramgl.h" #ifndef M_PI #define M_PI 3.1415926535897932384626433832795 @@ -186,7 +187,7 @@ void ParticleRenderer::_initGL() #endif } -#if 1 + void ParticleRenderer::showProfileInfo(float& xOffset,float& yStart, float yIncr) { #ifndef BT_NO_PROFILE @@ -257,5 +258,3 @@ void ParticleRenderer::displayProfileString(int xOffset,int yStart,char* message glRasterPos3f(xOffset,yStart,0); BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),message); } - -#endif \ No newline at end of file diff --git a/Extras/CUDA/render_particles.h b/Extras/CUDA/render_particles.h index 5ec788dbc..6511478b1 100644 --- a/Extras/CUDA/render_particles.h +++ b/Extras/CUDA/render_particles.h @@ -30,9 +30,6 @@ #ifndef __RENDER_PARTICLES__ #define __RENDER_PARTICLES__ - -class CProfileIterator; - class ParticleRenderer { public: @@ -62,7 +59,6 @@ public: void displayProfileString(int xOffset,int yStart,char* message); class CProfileIterator* m_profileIterator; - protected: // methods void _initGL(); void _drawPoints();