more CUDA work

This commit is contained in:
rponom
2008-10-28 23:25:59 +00:00
parent f5e16847df
commit 309a12836d
12 changed files with 1325 additions and 357 deletions

View File

@@ -34,17 +34,21 @@ subject to the following restrictions:
#include <GL/glut.h> #include <GL/glut.h>
#endif #endif
#define MAX_COLL_PAIR_PER_PARTICLE 64
#define USE_SORT 1 #define USE_SORT 1
#define USE_OLD 0
#define USE_CUDA 1
#include "btCudaBroadphase.h" #include "btCudaBroadphase.h"
#include "LinearMath/btAlignedAllocator.h" #include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h" #include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) : btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) :
btSimpleBroadphase(maxProxies, btSimpleBroadphase(maxProxies,
new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache), // new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
m_bInitialized(false), m_bInitialized(false),
m_numParticles(simParams.numBodies), m_numParticles(simParams.numBodies),
m_hPos(0), m_hPos(0),
@@ -155,6 +159,26 @@ void btCudaBroadphase::_initialize(int numParticles)
m_hCellStart = new uint[m_simParams.numCells]; m_hCellStart = new uint[m_simParams.numCells];
memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint)); memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint));
m_hPairBuffStartCurr = new unsigned int[m_numParticles * 2 + 1];
// --------------- for now, init with MAX_COLL_PAIR_PER_PARTICLE for each particle
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(uint i = 1; i <= m_numParticles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + MAX_COLL_PAIR_PER_PARTICLE;
// m_hPairBuffStartCurr[i * 2 + 1] = m_hPairBuffStartCurr[i * 2];
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
//----------------
m_hAABB = new float[numParticles*4*2]; // BB Min & Max
m_hPairBuff = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
memset(m_hPairBuff, 0x00, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*4);
m_hPairScan = new unsigned int[m_numParticles + 1];
m_hPairOut = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
// allocate GPU data // allocate GPU data
unsigned int memSize = sizeof(float) * 4 * m_numParticles; unsigned int memSize = sizeof(float) * 4 * m_numParticles;
@@ -176,7 +200,18 @@ void btCudaBroadphase::_initialize(int numParticles)
allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint)); allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
#endif #endif
m_colorVBO = createVBO(m_numParticles*4*sizeof(float)); allocateArray((void**)&m_dPairBuff, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, 0, sizeof(unsigned int)*m_numParticles*MAX_COLL_PAIR_PER_PARTICLE);
allocateArray((void**)&m_dPairBuffStartCurr, (m_numParticles*2 + 1)*sizeof(unsigned int));
allocateArray((void**)&m_dAABB, memSize*2);
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, 0, sizeof(unsigned int)*(m_numParticles*2 + 1));
allocateArray((void**)&m_dPairScan, (m_numParticles + 1)*sizeof(unsigned int));
allocateArray((void**)&m_dPairOut, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
#if 1 #if 1
// fill color buffer // fill color buffer
@@ -201,6 +236,10 @@ void btCudaBroadphase::_initialize(int numParticles)
setParameters(&m_simParams); setParameters(&m_simParams);
// Pair cache data
m_maxPairsPerParticle = 0;
m_numOverflows = 0;
m_bInitialized = true; m_bInitialized = true;
} }
@@ -217,6 +256,14 @@ void btCudaBroadphase::_finalize()
delete [] m_hGridCounters; delete [] m_hGridCounters;
delete [] m_hGridCells; delete [] m_hGridCells;
delete [] m_dPairBuff;
delete [] m_dPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScan;
delete [] m_hPairOut;
freeArray(m_dVel[0]); freeArray(m_dVel[0]);
freeArray(m_dVel[1]); freeArray(m_dVel[1]);
@@ -231,12 +278,20 @@ void btCudaBroadphase::_finalize()
freeArray(m_dGridCounters); freeArray(m_dGridCounters);
freeArray(m_dGridCells); freeArray(m_dGridCells);
#endif #endif
freeArray(m_dPairBuff);
freeArray(m_dPairBuffStartCurr);
freeArray(m_dAABB);
freeArray(m_hPairBuff);
freeArray(m_hPairScan);
freeArray(m_hPairOut);
unregisterGLBufferObject(m_posVbo[0]); unregisterGLBufferObject(m_posVbo[0]);
unregisterGLBufferObject(m_posVbo[1]); unregisterGLBufferObject(m_posVbo[1]);
glDeleteBuffers(2, (const GLuint*)m_posVbo); glDeleteBuffers(2, (const GLuint*)m_posVbo);
glDeleteBuffers(1, (const GLuint*)&m_colorVBO); glDeleteBuffers(1, (const GLuint*)&m_colorVBO);
} }
btCudaBroadphase::~btCudaBroadphase() btCudaBroadphase::~btCudaBroadphase()
@@ -300,9 +355,6 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
static int frameCount = 0; static int frameCount = 0;
//printf("framecount=%d\n",frameCount++); //printf("framecount=%d\n",frameCount++);
int numRejected=0;
if (m_numHandles >= 0) if (m_numHandles >= 0)
{ {
@@ -373,9 +425,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
// sort and search method // sort and search method
// calculate hash // calculate hash
calcHash(m_posVbo[m_currentPosRead], {
m_dParticleHash[0], BT_PROFILE("calcHash-- CUDA");
m_numParticles); calcHash( m_posVbo[m_currentPosRead], m_dParticleHash[0], m_numParticles);
}
#if DEBUG_GRID #if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
@@ -388,7 +441,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
// sort particles based on hash // sort particles based on hash
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32); {
BT_PROFILE("RadixSort-- CUDA");
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
}
#if DEBUG_GRID #if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
@@ -401,14 +457,24 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
// reorder particle arrays into sorted order and // reorder particle arrays into sorted order and
// find start of each cell // find start of each cell
reorderDataAndFindCellStart(m_dParticleHash[0], {
m_posVbo[m_currentPosRead], BT_PROFILE("Reorder-- CUDA");
m_dVel[m_currentVelRead], #if USE_OLD
m_dSortedPos, reorderDataAndFindCellStart(m_dParticleHash[0],
m_dSortedVel, m_posVbo[m_currentPosRead],
m_dCellStart, m_dVel[m_currentVelRead],
m_numParticles, m_dSortedPos,
m_simParams.numCells); m_dSortedVel,
m_dCellStart,
m_numParticles,
m_simParams.numCells);
#else
findCellStart(m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_simParams.numCells);
#endif
}
//#define DEBUG_GRID2 //#define DEBUG_GRID2
#ifdef DEBUG_GRID2 #ifdef DEBUG_GRID2
@@ -455,9 +521,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
} }
*/ */
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles); copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells); copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
// copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
//#define DEBUG_INDICES 1 //#define DEBUG_INDICES 1
#ifdef DEBUG_INDICES #ifdef DEBUG_INDICES
@@ -482,6 +549,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
// } // }
} }
#if USE_OLD
//printf("particle hash sorted:\n"); //printf("particle hash sorted:\n");
for(uint pi=0; pi<m_numParticles; pi++) for(uint pi=0; pi<m_numParticles; pi++)
{ {
@@ -494,7 +562,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index]; btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
btVector3 mypos = (proxy0->m_min+proxy0->m_max)*0.5f; btVector3 mypos = (proxy0->m_aabbMin + proxy0->m_aabbMax)*0.5f;
// float4* p = (float4*)&m_hSortedPos[index*4]; // float4* p = (float4*)&m_hSortedPos[index*4];
@@ -504,6 +572,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y); particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z); particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
int numRejected=0;
//for(int z=0; z<1; z++) //for(int z=0; z<1; z++)
for(int z=-1; z<=1; z++) for(int z=-1; z<=1; z++)
@@ -575,15 +644,19 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
} }
} }
#else // USE_OLD
btBroadphasePairArray& overlappingPairArrayA = m_pairCache->getOverlappingPairArray();
findOverlappingPairs(dispatcher);
#endif
#endif //_USE_BRUTEFORCE_N #endif //_USE_BRUTEFORCE_N
#if USE_OLD
///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache ///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache
if (m_ownsPairCache && m_pairCache->hasDeferredRemoval()) if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
{ {
BT_PROFILE("Cleaning-- CPU");
btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray(); btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray();
//perform a sort, to find duplicates and to sort 'invalid' pairs to the end //perform a sort, to find duplicates and to sort 'invalid' pairs to the end
@@ -622,6 +695,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
needsRemoval = false;//callback->processOverlap(pair); needsRemoval = false;//callback->processOverlap(pair);
} else } else
{ {
bool hasOverlapA = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
needsRemoval = true; needsRemoval = true;
} }
} else } else
@@ -629,7 +703,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
//remove duplicate //remove duplicate
needsRemoval = true; needsRemoval = true;
//should have no algorithm //should have no algorithm
btAssert(!pair.m_algorithm); // btAssert(!pair.m_algorithm);
} }
if (needsRemoval) if (needsRemoval)
@@ -661,6 +735,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
#endif//CLEAN_INVALID_PAIRS #endif//CLEAN_INVALID_PAIRS
} }
#endif // USE_OLD
} }
//printf("numRejected=%d\n",numRejected); //printf("numRejected=%d\n",numRejected);
@@ -1137,3 +1212,333 @@ void btCudaBroadphase::quickHack2()
} }
void btCudaBroadphase::findOverlappingPairs(btDispatcher* dispatcher)
{
BT_PROFILE("findOverlappingPairs -- CPU");
int numRejected=0;
m_numPairsAdded = 0;
{
BT_PROFILE("copy AABB -- CPU");
// do it faster ?
float* pVec = m_hAABB;
for(uint pi=0; pi<m_numParticles; pi++)
{
int index = m_hParticleHash[pi*2+1];
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
*pVec++ = proxy0->m_aabbMin.getX();
*pVec++ = proxy0->m_aabbMin.getY();
*pVec++ = proxy0->m_aabbMin.getZ();
*pVec++ = 0.0F;
*pVec++ = proxy0->m_aabbMax.getX();
*pVec++ = proxy0->m_aabbMax.getY();
*pVec++ = proxy0->m_aabbMax.getZ();
*pVec++ = 0.0F;
}
}
#if USE_CUDA
{
{
BT_PROFILE("CopyBB to CUDA");
copyArrayToDevice(m_dAABB, m_hAABB, 0, sizeof(float)*4*2*m_numParticles);
}
{
BT_PROFILE("btCudaFindOverlappingPairs");
btCudaFindOverlappingPairs( m_dAABB,
m_dParticleHash[0],
m_dCellStart,
m_dPairBuff,
m_dPairBuffStartCurr,
m_numParticles
);
}
{
BT_PROFILE("btCudaComputePairCacheChanges");
btCudaComputePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_numParticles);
}
{
BT_PROFILE("scanOverlappingPairBuffCPU");
copyArrayFromDevice(m_hPairScan, m_dPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
scanOverlappingPairBuffCPU();
copyArrayToDevice(m_dPairScan, m_hPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
}
{
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
btCudaSqueezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_numParticles);
}
{
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
copyArrayFromDevice(m_hPairOut, m_dPairOut, 0, sizeof(unsigned int) * m_hPairScan[m_numParticles]);
}
}
#else
findOverlappingPairsCPU( m_hAABB,
m_hParticleHash,
m_hCellStart,
m_hPairBuff,
m_hPairBuffStartCurr,
m_numParticles);
computePairCacheChangesCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_numParticles);
scanOverlappingPairBuffCPU();
squeezeOverlappingPairBuffCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_numParticles);
#endif
{
BT_PROFILE("addPairsToCache");
addPairsToCacheCPU(dispatcher);
}
} // btCudaBroadphase::fillOverlappingPairCache()
// calculate position in uniform grid
int3 btCudaBroadphase::calcGridPosCPU(float4 p)
{
int3 gridPos;
gridPos.x = floor((p.x - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
gridPos.y = floor((p.y - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
gridPos.z = floor((p.z - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
return gridPos;
} // btCudaBroadphase::calcGridPos()
// calculate address in grid from position (clamping to edges)
uint btCudaBroadphase::calcGridHashCPU(int3 gridPos)
{
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
return (gridPos.z * m_simParams.gridSize.y) * m_simParams.gridSize.x + gridPos.y * m_simParams.gridSize.x + gridPos.x;
}
void btCudaBroadphase::computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
{
for(uint i = 0; i < numParticles; i++)
{
computePairCacheChangesCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan);
}
}
void btCudaBroadphase::computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
{
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
void btCudaBroadphase::findOverlappingPairsCPU( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles)
{
BT_PROFILE("findOverlappingPairsCPU -- CPU");
for(uint i = 0; i < numParticles; i++)
{
findOverlappingPairsCPU_D(
i,
(float4 *)pAABB,
(uint2*)pParticleHash,
(uint*)pCellStart,
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
numParticles);
}
} // btCudaBroadphase::findOverlappingPairsCPU()
void btCudaBroadphase::findOverlappingPairsCPU_D( uint index,
float4* pAABB,
uint2* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles)
{
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2+1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
// get address in grid
int3 gridPos = calcGridPosCPU(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
int3 gridPos2;
gridPos2.x = gridPos.x + x;
gridPos2.y = gridPos.y + y;
gridPos2.z = gridPos.z + z;
findPairsInCellCPU(gridPos2, index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
}
}
}
} // btCudaBroadphase::findOverlappingPairsCPU_D()
void btCudaBroadphase::findPairsInCellCPU( int3 gridPos,
uint index,
uint2* pParticleHash,
uint* pCellStart,
float4* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles)
{
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) {
return;
}
uint gridHash = calcGridHashCPU(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
return; // cell empty
// iterate over particles in this cell
float4 min0 = pAABB[index*2];
float4 max0 = pAABB[index*2+1];
uint2 sortedData = pParticleHash[index];
uint unsorted_indx = sortedData.y;
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
uint start = start_curr.x;
uint curr = start_curr.y;
uint curr1 = curr;
uint bucketEnd = bucketStart + m_simParams.maxParticlesPerCell;
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pParticleHash[index2];
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) // check not colliding with self
{
float4 min1 = pAABB[index2*2];
float4 max1 = pAABB[index2*2 + 1];
if(cudaTestAABBOverlapCPU(min0, max0, min1, max1))
{
uint k;
uint unsorted_indx2 = cellData.y;
for(k = 0; k < curr1; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
if(old_pair == unsorted_indx2)
{
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
break;
}
}
if(k == curr1)
{
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
curr++;
}
}
}
}
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
return;
} // btCudaBroadphase::findPairsInCellCPU()
uint btCudaBroadphase::cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
} // btCudaBroadphase::cudaTestAABBOverlapCPU()
void btCudaBroadphase::scanOverlappingPairBuffCPU()
{
m_hPairScan[0] = 0;
for(uint i = 1; i <= m_numParticles; i++)
{
unsigned int delta = m_hPairScan[i];
m_hPairScan[i] = m_hPairScan[i-1] + delta;
}
} // btCudaBroadphase::scanOverlappingPairBuffCPU()
void btCudaBroadphase::squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
{
for(uint i = 0; i < numParticles; i++)
{
squeezeOverlappingPairBuffCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan, pPairOut);
}
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU()
void btCudaBroadphase::squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
{
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint* pOut = pPairOut + pPairScan[index];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[index] = make_uint2(start, num);
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
unsigned int gNumPairsAdded = 0;
void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher)
{
gNumPairsAdded = 0;
for(uint i = 0; i < m_numParticles; i++)
{
unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
if(!num)
{
continue;
}
unsigned int* pInp = m_hPairOut + m_hPairScan[i];
unsigned int index0 = i;
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
for(uint j = 0; j < num; j++)
{
unsigned int indx1_s = pInp[j];
unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG);
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[index1];
if(indx1_s & BT_CUDA_PAIR_NEW_FLG)
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
gNumPairsAdded++;
}
else
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
} // btCudaBroadphase::addPairsToCacheCPU()

View File

@@ -37,7 +37,13 @@ class btCudaBroadphase : public btSimpleBroadphase
unsigned int* m_hParticleHash; unsigned int* m_hParticleHash;
unsigned int* m_hCellStart; unsigned int* m_hCellStart;
unsigned int* m_hPairBuffStartCurr;
float* m_hAABB;
unsigned int* m_hPairBuff;
unsigned int* m_hPairScan;
unsigned int* m_hPairOut;
// GPU data // GPU data
float* m_dPos[2]; float* m_dPos[2];
@@ -59,6 +65,14 @@ class btCudaBroadphase : public btSimpleBroadphase
unsigned int m_currentPosRead, m_currentVelRead; unsigned int m_currentPosRead, m_currentVelRead;
unsigned int m_currentPosWrite, m_currentVelWrite; unsigned int m_currentPosWrite, m_currentVelWrite;
// buffers on GPU
unsigned int* m_dPairBuff;
unsigned int* m_dPairBuffStartCurr;
float* m_dAABB;
unsigned int* m_dPairScan;
unsigned int* m_dPairOut;
// params // params
struct SimParams& m_simParams; struct SimParams& m_simParams;
@@ -66,6 +80,11 @@ class btCudaBroadphase : public btSimpleBroadphase
unsigned int m_maxParticlesPerCell; unsigned int m_maxParticlesPerCell;
// debug
unsigned int m_numPairsAdded;
unsigned int m_maxPairsPerParticle;
unsigned int m_numOverflows;
protected: protected:
unsigned int createVBO(unsigned int size); unsigned int createVBO(unsigned int size);
@@ -120,5 +139,48 @@ public:
void quickHack2(); void quickHack2();
void integrate(); void integrate();
void findOverlappingPairs(btDispatcher* dispatcher);
int3 calcGridPosCPU(float4 p);
uint calcGridHashCPU(int3 gridPos);
void computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles);
void computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan);
void findOverlappingPairsCPU( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles);
void findOverlappingPairsCPU_D( uint index,
float4* pAABB,
uint2* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles);
void findPairsInCellCPU(int3 gridPos,
uint index,
uint2* pParticleHash,
uint* pCellStart,
float4* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles);
uint cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1);
void scanOverlappingPairBuffCPU();
void squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles);
void squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut);
void addPairsToCacheCPU(btDispatcher* dispatcher);
void resetOverlappingPairBuffCPU();
}; };
#endif //CUDA_BROADPHASE_H #endif //CUDA_BROADPHASE_H

View File

@@ -44,7 +44,7 @@
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
Optimization="0" Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src" AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE" PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true" MinimalRebuild="true"
BasicRuntimeChecks="3" BasicRuntimeChecks="3"
@@ -100,249 +100,6 @@
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\" CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/> />
</Configuration> </Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../ReleaseCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuDebug|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../EmuDebugCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/btCudaBroadphase.pdb"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration
Name="EmuRelease|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../EmuReleaseCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration <Configuration
Name="Debug|x64" Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)" OutputDirectory="$(PlatformName)\$(ConfigurationName)"
@@ -426,6 +183,86 @@
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\" CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/> />
</Configuration> </Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../ReleaseCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration <Configuration
Name="Release|x64" Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)" OutputDirectory="$(PlatformName)\$(ConfigurationName)"
@@ -507,6 +344,88 @@
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\" CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/> />
</Configuration> </Configuration>
<Configuration
Name="EmuDebug|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../EmuDebugCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/btCudaBroadphase.pdb"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration <Configuration
Name="EmuDebug|x64" Name="EmuDebug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)" OutputDirectory="$(PlatformName)\$(ConfigurationName)"
@@ -590,6 +509,87 @@
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\" CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/> />
</Configuration> </Configuration>
<Configuration
Name="EmuRelease|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../../Glut;&quot;$(CUDA_INC_PATH)&quot;;./;../../src;../../Demos/OpenGL"
PreprocessorDefinitions="WIN32;_CONSOLE"
RuntimeLibrary="0"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="cudart.lib glew32.lib"
OutputFile="../../EmuReleaseCudaBroadphase.exe"
LinkIncremental="1"
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
/>
</Configuration>
<Configuration <Configuration
Name="EmuRelease|x64" Name="EmuRelease|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)" OutputDirectory="$(PlatformName)\$(ConfigurationName)"
@@ -720,30 +720,6 @@
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="Debug|x64" Name="Debug|x64"
ExcludedFromBuild="true" ExcludedFromBuild="true"
@@ -752,6 +728,14 @@
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="Release|x64" Name="Release|x64"
ExcludedFromBuild="true" ExcludedFromBuild="true"
@@ -760,6 +744,14 @@
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="EmuDebug|x64" Name="EmuDebug|x64"
ExcludedFromBuild="true" ExcludedFromBuild="true"
@@ -768,6 +760,14 @@
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="EmuRelease|x64" Name="EmuRelease|x64"
ExcludedFromBuild="true" ExcludedFromBuild="true"
@@ -793,37 +793,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -Icommon/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -Icommon/inc -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -Icommon/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu" AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj" Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/> />
@@ -838,6 +808,16 @@
Outputs="$(ConfigurationName)\particleSystem_cu.obj" Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -use_fast_math -ccbin &quot;$(VCInstallDir)\bin&quot; -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -Icommon/inc -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="Release|x64" Name="Release|x64"
> >
@@ -848,6 +828,16 @@
Outputs="$(ConfigurationName)\particleSystem_cu.obj" Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="EmuDebug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="EmuDebug|x64" Name="EmuDebug|x64"
> >
@@ -858,6 +848,16 @@
Outputs="$(ConfigurationName)\particleSystem_cu.obj" Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="EmuRelease|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -arch=sm_10 -ccbin &quot;$(VCInstallDir)\bin&quot; -deviceemu -c -I &quot;$(CUDA_INC_PATH)&quot;; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu&#x0D;&#x0A;"
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="EmuRelease|x64" Name="EmuRelease|x64"
> >
@@ -899,6 +899,16 @@
Outputs="" Outputs=""
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine=""
AdditionalDependencies=""
Outputs=""
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="EmuDebug|Win32" Name="EmuDebug|Win32"
ExcludedFromBuild="true" ExcludedFromBuild="true"
@@ -915,16 +925,6 @@
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine=""
AdditionalDependencies=""
Outputs=""
/>
</FileConfiguration>
</File> </File>
<File <File
RelativePath=".\radixsort.cuh" RelativePath=".\radixsort.cuh"
@@ -1003,6 +1003,10 @@
RelativePath=".\btCudaBroadphase.h" RelativePath=".\btCudaBroadphase.h"
> >
</File> </File>
<File
RelativePath=".\VTune\btCudaBroadphase.vpj"
>
</File>
<File <File
RelativePath=".\cutil_math.h" RelativePath=".\cutil_math.h"
> >
@@ -1019,10 +1023,6 @@
RelativePath=".\paramgl.h" RelativePath=".\paramgl.h"
> >
</File> </File>
<File
RelativePath=".\VTune\btCudaBroadphase.vpj"
>
</File>
</Files> </Files>
<Globals> <Globals>
</Globals> </Globals>

View File

@@ -72,7 +72,7 @@ void ParticleSystem::initializeBullet()
m_collisionConfiguration = new btDefaultCollisionConfiguration(); m_collisionConfiguration = new btDefaultCollisionConfiguration();
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration); m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
//m_broadphase = new btDbvtBroadphase(); // m_broadphase = new btDbvtBroadphase();
//m_broadphase = new btAxisSweep3(btVector3(-3,-3,-3),btVector3(3,3,3)); //m_broadphase = new btAxisSweep3(btVector3(-3,-3,-3),btVector3(3,3,3));
m_broadphase = new btCudaBroadphase(m_params,m_params.numBodies+6); m_broadphase = new btCudaBroadphase(m_params,m_params.numBodies+6);
@@ -202,9 +202,9 @@ ParticleSystem::update(float deltaTime)
hPosData[i*4+1] = trans.getOrigin().getY(); hPosData[i*4+1] = trans.getOrigin().getY();
hPosData[i*4+2] = trans.getOrigin().getZ(); hPosData[i*4+2] = trans.getOrigin().getZ();
m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.; m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.f;
m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.; m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.f;
m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.; m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.f;
} }
m_broadphase->copyBuffersFromHostToDevice(); m_broadphase->copyBuffersFromHostToDevice();

View File

@@ -49,13 +49,13 @@
if( cudaSuccess != err) { \ if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \ mm_exit(EXIT_FAILURE); \
} \ } \
err = cudaThreadSynchronize(); \ err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \ if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \ mm_exit(EXIT_FAILURE); \
} } while (0) } } while (0)
@@ -64,22 +64,27 @@
if( cudaSuccess != err) { \ if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \ __FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \ mm_exit(EXIT_FAILURE); \
} } while (0) } } while (0)
# define MY_CUDA_SAFE_CALL( call) do { \ # define MY_CUDA_SAFE_CALL( call) do { \
MY_CUDA_SAFE_CALL_NO_SYNC(call); \ MY_CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \ cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \ if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \ __FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \ mm_exit(EXIT_FAILURE); \
} } while (0) } } while (0)
extern "C" extern "C"
{ {
void mm_exit(int val)
{
exit(val);
}
void cudaInit(int argc, char **argv) void cudaInit(int argc, char **argv)
{ {
//CUT_DEVICE_INIT(argc, argv); //CUT_DEVICE_INIT(argc, argv);
@@ -112,6 +117,26 @@ void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int s
void copyArrayToDevice(void* device, const void* host, int offset, int size) void copyArrayToDevice(void* device, const void* host, int offset, int size)
{ {
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice)); MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
/*
cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice);
switch(err)
{
case cudaSuccess :
return;
case cudaErrorInvalidValue :
printf("\ncudaErrorInvalidValue : %d\n", err);
return;
case cudaErrorInvalidDevicePointer :
printf("\ncudaErrorInvalidDevicePointer : %d\n", err);
return;
case cudaErrorInvalidMemcpyDirection :
printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err);
return;
default :
printf("\nX3 : %d\n", err);
return;
}
*/
} }
void registerGLBufferObject(uint vbo) void registerGLBufferObject(uint vbo)
@@ -255,6 +280,27 @@ reorderDataAndFindCellStart(uint* particleHash,
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos)); MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
} }
#if 1
void
findCellStart( uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells)
{
int numThreads, numBlocks;
computeGridSize(numBodies, 256, numBlocks, numThreads);
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
findCellStartD<<< numBlocks, numThreads >>>(
(uint2 *) particleHash,
(uint *) cellStart);
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
}
#endif
void void
collide(uint vboOldPos, uint vboNewPos, collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel, float* sortedPos, float* sortedVel,
@@ -328,4 +374,67 @@ collide(uint vboOldPos, uint vboNewPos,
#endif #endif
} }
void
btCudaFindOverlappingPairs( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles)
{
// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4);
// if(err != cudaSuccess)
// {
// printf("\nAAAAA\n");
// }
int numThreads, numBlocks;
// computeGridSize(numParticles, 256, numBlocks, numThreads);
computeGridSize(numParticles, 64, numBlocks, numThreads);
// numThreads = 1;
// numBlocks = 1;
btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>(
(float4 *)pAABB,
(uint2*)pParticleHash,
(uint*)pCellStart,
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
numParticles
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD");
} // btCudaFindOverlappingPairs()
void
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
} // btCudaFindOverlappingPairs()
void
btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>(
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
(uint*)pPairScan,
pPairOut
);
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
}
} // extern "C" } // extern "C"

View File

@@ -42,6 +42,12 @@ reorderDataAndFindCellStart(uint* particleHash,
uint numBodies, uint numBodies,
uint numCells); uint numCells);
void
findCellStart( uint* particleHash,
uint* cellStart,
uint numBodies,
uint numCells);
void void
collide(uint vboOldPos, uint vboNewPos, collide(uint vboOldPos, uint vboNewPos,
float* sortedPos, float* sortedVel, float* sortedPos, float* sortedVel,
@@ -53,5 +59,20 @@ collide(uint vboOldPos, uint vboNewPos,
uint numBodies, uint numBodies,
uint numCells, uint numCells,
uint maxParticlesPerCell); uint maxParticlesPerCell);
void
btCudaFindOverlappingPairs( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles);
void
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles);
void btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles);
} }

View File

@@ -79,6 +79,7 @@ public:
class btDefaultCollisionConfiguration* m_collisionConfiguration; class btDefaultCollisionConfiguration* m_collisionConfiguration;
class btCollisionDispatcher* m_dispatcher; class btCollisionDispatcher* m_dispatcher;
class btCudaBroadphase* m_broadphase; class btCudaBroadphase* m_broadphase;
// class btBroadphaseInterface* m_broadphase;
class btSequentialImpulseConstraintSolver* m_constraintSolver; class btSequentialImpulseConstraintSolver* m_constraintSolver;
btAlignedObjectArray<class btRigidBody*> m_bulletParticles; btAlignedObjectArray<class btRigidBody*> m_bulletParticles;

View File

@@ -45,6 +45,9 @@
#include <GL/glut.h> #include <GL/glut.h>
#endif #endif
#include "LinearMath/btQuickprof.h"
#include "particleSystem.h" #include "particleSystem.h"
#include "render_particles.h" #include "render_particles.h"
#include "paramgl.h" #include "paramgl.h"
@@ -165,7 +168,9 @@ void display()
glColor3f(1.0, 1.0, 1.0); glColor3f(1.0, 1.0, 1.0);
glutWireCube(2.0); glutWireCube(2.0);
// collider
// collider
glPushMatrix(); glPushMatrix();
float4 p = psystem->getColliderPos(); float4 p = psystem->getColliderPos();
glTranslatef(p.x, p.y, p.z); glTranslatef(p.x, p.y, p.z);
@@ -188,6 +193,19 @@ void display()
} }
psystem->debugDraw(); psystem->debugDraw();
glDisable(GL_DEPTH_TEST);
// glDisable(GL_LIGHTING);
// glColor3f(0, 0, 0);
float offsX = 10.f;
float offsY = 10.f;
renderer->showProfileInfo(offsX, offsY, 20.f);
// glEnable(GL_LIGHTING);
glEnable(GL_DEPTH_TEST);
glutSwapBuffers(); glutSwapBuffers();
@@ -357,6 +375,19 @@ inline float frand()
// commented out to remove unused parameter warnings in Linux // commented out to remove unused parameter warnings in Linux
void key(unsigned char key, int /*x*/, int /*y*/) void key(unsigned char key, int /*x*/, int /*y*/)
{ {
#ifndef BT_NO_PROFILE
if (key >= 0x31 && key < 0x37)
{
int child = key-0x31;
renderer->m_profileIterator->Enter_Child(child);
}
if (key==0x30)
{
renderer->m_profileIterator->Enter_Parent();
}
#endif //BT_NO_PROFILE
switch (key) switch (key)
{ {
case ' ': case ' ':
@@ -394,13 +425,13 @@ void key(unsigned char key, int /*x*/, int /*y*/)
displayEnabled = !displayEnabled; displayEnabled = !displayEnabled;
break; break;
case '1': case 'g':
psystem->reset(ParticleSystem::CONFIG_GRID); psystem->reset(ParticleSystem::CONFIG_GRID);
break; break;
case '2': case 'a':
psystem->reset(ParticleSystem::CONFIG_RANDOM); psystem->reset(ParticleSystem::CONFIG_RANDOM);
break; break;
case '3': case 'e':
{ {
// inject a sphere of particles // inject a sphere of particles
float pr = psystem->getParticleRadius(); float pr = psystem->getParticleRadius();
@@ -414,7 +445,7 @@ void key(unsigned char key, int /*x*/, int /*y*/)
psystem->addSphere(0, pos, vel, ballr, pr*2.0f); psystem->addSphere(0, pos, vel, ballr, pr*2.0f);
} }
break; break;
case '4': case 'b':
{ {
// shoot ball from camera // shoot ball from camera
float pr = psystem->getParticleRadius(); float pr = psystem->getParticleRadius();
@@ -484,9 +515,10 @@ void mainMenu(int i)
void initMenus() void initMenus()
{ {
glutCreateMenu(mainMenu); glutCreateMenu(mainMenu);
glutAddMenuEntry("Reset block [1]", '1'); glutAddMenuEntry("Reset block [g]", 'g');
glutAddMenuEntry("Reset random [2]", '2'); glutAddMenuEntry("Reset random [a]", 'a');
glutAddMenuEntry("Add sphere [3]", '3'); glutAddMenuEntry("Add sphere [e]", 'e');
glutAddMenuEntry("Shoot ball [b]", 'b');
glutAddMenuEntry("View mode [v]", 'v'); glutAddMenuEntry("View mode [v]", 'v');
glutAddMenuEntry("Move cursor mode [m]", 'm'); glutAddMenuEntry("Move cursor mode [m]", 'm');
glutAddMenuEntry("Toggle point rendering [p]", 'p'); glutAddMenuEntry("Toggle point rendering [p]", 'p');
@@ -504,7 +536,8 @@ void initMenus()
int int
main(int argc, char** argv) main(int argc, char** argv)
{ {
numParticles =1024;//1024;//64;//16380;//32768; // numParticles =1024;//1024;//64;//16380;//32768;
numParticles =8192;
uint gridDim = 64; uint gridDim = 64;
numIterations = 0; numIterations = 0;

View File

@@ -199,6 +199,35 @@ reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by has
} }
__global__ void
findCellStartD(uint2* particleHash, // particle id sorted by hash
uint* cellStart)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
uint2 sortedData = particleHash[index];
// Load hash data into shared memory so that we can look
// at neighboring particle's hash value without loading
// two hash values per thread
__shared__ uint sharedHash[257];
sharedHash[threadIdx.x+1] = sortedData.x;
if (index > 0 && threadIdx.x == 0)
{
// first thread in block must load neighbor particle hash
volatile uint2 prevData = particleHash[index-1];
sharedHash[0] = prevData.x;
}
__syncthreads();
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
{
cellStart[sortedData.x] = index;
}
}
// collide two spheres using DEM method // collide two spheres using DEM method
__device__ float3 collideSpheres(float4 posA, float4 posB, __device__ float3 collideSpheres(float4 posA, float4 posB,
float4 velA, float4 velB, float4 velA, float4 velB,
@@ -378,4 +407,217 @@ collideD(float4* newPos, float4* newVel,
#endif #endif
} }
__device__
uint cudaTestAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
}
#if 0
__device__
void findPairsInCell( int3 gridPos,
uint index,
uint2* pParticleHash,
uint* pCellStart,
float4* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr)
{
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return;
}
uint gridHash = calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
return; // cell empty
// iterate over particles in this cell
float4 min0 = pAABB[index*2];
float4 max0 = pAABB[index*2 + 1];
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
// uint* pBuf = pPairBuff; // + curr;
for(uint q=0; q < params.maxParticlesPerCell; q++)
{
uint index2 = bucketStart + q;
uint2 cellData = pParticleHash[index2];
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) // check not colliding with self
{
float4 min1 = pAABB[index2*2];
float4 max1 = pAABB[index2*2 + 1];
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
curr++;
}
//*pBuf = index2;
//pPairBuff[curr] = index2;
//pPairBuff[0] = index2;
//pBuf++;
}
}
// uint del = curr - start;
if(curr != start)
{
curr = start;
}
pPairBuffStartCurr[index] = make_uint2(start, curr);
//
return;
}
#endif
__device__
void findPairsInCell( int3 gridPos,
uint index,
uint2* pParticleHash,
uint* pCellStart,
float4* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles)
{
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return;
}
uint gridHash = calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
return; // cell empty
// iterate over particles in this cell
float4 min0 = pAABB[index*2];
float4 max0 = pAABB[index*2+1];
uint2 sortedData = pParticleHash[index];
uint unsorted_indx = sortedData.y;
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
// uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint bucketEnd = bucketStart + params.maxParticlesPerCell;
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pParticleHash[index2];
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 < index) // check not colliding with self
{
float4 min1 = pAABB[index2*2];
float4 max1 = pAABB[index2*2 + 1];
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint k;
uint unsorted_indx2 = cellData.y;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
if(old_pair == unsorted_indx2)
{
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
curr++;
}
}
}
}
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
// pPairBuffStartCurr[index] = make_uint2(start, curr);
return;
}
__global__ void
btCudaFindOverlappingPairsD(
float4* pAABB,
uint2* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles
)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2+1];
float4 pos = (bbMin + bbMax) * 0.5f;
// get address in grid
int3 gridPos = calcGridPos(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
findPairsInCell(gridPos + make_int3(x, y, z), index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
}
}
}
}
__global__ void
btCudaComputePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
__global__ void
btCudaSqueezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
{
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint* pOut = pPairOut + pPairScan[index];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[index] = make_uint2(start, num);
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
#endif #endif

View File

@@ -14,6 +14,12 @@
#define FETCH(t, i) t[i] #define FETCH(t, i) t[i]
#endif #endif
#define BT_CUDA_PAIR_FOUND_FLG (0x40000000)
#define BT_CUDA_PAIR_NEW_FLG (0x20000000)
#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG)
#include "vector_types.h" #include "vector_types.h"
typedef unsigned int uint; typedef unsigned int uint;

View File

@@ -33,8 +33,12 @@
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include "BMF_Api.h"
#include "render_particles.h" #include "render_particles.h"
#include "shaders.h" #include "shaders.h"
#include "LinearMath/btQuickprof.h"
#include "paramgl.h"
#ifndef M_PI #ifndef M_PI
#define M_PI 3.1415926535897932384626433832795 #define M_PI 3.1415926535897932384626433832795
@@ -50,6 +54,9 @@ ParticleRenderer::ParticleRenderer()
m_colorVBO(0) m_colorVBO(0)
{ {
_initGL(); _initGL();
#ifndef BT_NO_PROFILE
m_profileIterator = CProfileManager::Get_Iterator();
#endif //BT_NO_PROFILE
} }
ParticleRenderer::~ParticleRenderer() ParticleRenderer::~ParticleRenderer()
@@ -178,3 +185,77 @@ void ParticleRenderer::_initGL()
glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE); glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
#endif #endif
} }
#if 1
void ParticleRenderer::showProfileInfo(float& xOffset,float& yStart, float yIncr)
{
#ifndef BT_NO_PROFILE
static double time_since_reset = 0.f;
// if (!m_idle)
{
time_since_reset = CProfileManager::Get_Time_Since_Reset();
}
beginWinCoords();
{
//recompute profiling data, and store profile strings
char blockTime[128];
double totalTime = 0;
int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
m_profileIterator->First();
double parent_time = m_profileIterator->Is_Root() ? time_since_reset : m_profileIterator->Get_Current_Parent_Total_Time();
{
sprintf(blockTime,"--- Profiling: %s (total running time: %.3f ms) ---", m_profileIterator->Get_Current_Parent_Name(), parent_time );
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
sprintf(blockTime,"press number (1,2...) to display child timings, or 0 to go up to parent" );
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
}
double accumulated_time = 0.f;
for (int i = 0; !m_profileIterator->Is_Done(); m_profileIterator->Next())
{
double current_total_time = m_profileIterator->Get_Current_Total_Time();
accumulated_time += current_total_time;
double fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
sprintf(blockTime,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)",
++i, m_profileIterator->Get_Current_Name(), fraction,
(current_total_time / (double)frames_since_reset),m_profileIterator->Get_Current_Total_Calls());
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
totalTime += current_total_time;
}
sprintf(blockTime,"%s (%.3f %%) :: %.3f ms", "Unaccounted",
// (min(0, time_since_reset - totalTime) / time_since_reset) * 100);
parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
sprintf(blockTime,"-------------------------------------------------");
displayProfileString(xOffset,yStart,blockTime);
yStart += yIncr;
}
endWinCoords();
#endif//BT_NO_PROFILE
}
void ParticleRenderer::displayProfileString(int xOffset,int yStart,char* message)
{
glRasterPos3f(xOffset,yStart,0);
BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),message);
}
#endif

View File

@@ -30,6 +30,9 @@
#ifndef __RENDER_PARTICLES__ #ifndef __RENDER_PARTICLES__
#define __RENDER_PARTICLES__ #define __RENDER_PARTICLES__
class CProfileIterator;
class ParticleRenderer class ParticleRenderer
{ {
public: public:
@@ -55,6 +58,11 @@ public:
void setFOV(float fov) { m_fov = fov; } void setFOV(float fov) { m_fov = fov; }
void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; } void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; }
void showProfileInfo(float& xOffset,float& yStart, float yIncr);
void displayProfileString(int xOffset,int yStart,char* message);
class CProfileIterator* m_profileIterator;
protected: // methods protected: // methods
void _initGL(); void _initGL();
void _drawPoints(); void _drawPoints();