more CUDA work
This commit is contained in:
@@ -34,17 +34,21 @@ subject to the following restrictions:
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define MAX_COLL_PAIR_PER_PARTICLE 64
|
||||
|
||||
#define USE_SORT 1
|
||||
#define USE_OLD 0
|
||||
#define USE_CUDA 1
|
||||
|
||||
#include "btCudaBroadphase.h"
|
||||
#include "LinearMath/btAlignedAllocator.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
||||
|
||||
btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) :
|
||||
btSimpleBroadphase(maxProxies,
|
||||
new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
|
||||
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
|
||||
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
|
||||
m_bInitialized(false),
|
||||
m_numParticles(simParams.numBodies),
|
||||
m_hPos(0),
|
||||
@@ -155,6 +159,26 @@ void btCudaBroadphase::_initialize(int numParticles)
|
||||
m_hCellStart = new uint[m_simParams.numCells];
|
||||
memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint));
|
||||
|
||||
|
||||
m_hPairBuffStartCurr = new unsigned int[m_numParticles * 2 + 1];
|
||||
// --------------- for now, init with MAX_COLL_PAIR_PER_PARTICLE for each particle
|
||||
m_hPairBuffStartCurr[0] = 0;
|
||||
m_hPairBuffStartCurr[1] = 0;
|
||||
for(uint i = 1; i <= m_numParticles; i++)
|
||||
{
|
||||
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + MAX_COLL_PAIR_PER_PARTICLE;
|
||||
// m_hPairBuffStartCurr[i * 2 + 1] = m_hPairBuffStartCurr[i * 2];
|
||||
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
||||
}
|
||||
//----------------
|
||||
m_hAABB = new float[numParticles*4*2]; // BB Min & Max
|
||||
|
||||
m_hPairBuff = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
|
||||
memset(m_hPairBuff, 0x00, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*4);
|
||||
|
||||
m_hPairScan = new unsigned int[m_numParticles + 1];
|
||||
m_hPairOut = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
|
||||
|
||||
// allocate GPU data
|
||||
unsigned int memSize = sizeof(float) * 4 * m_numParticles;
|
||||
|
||||
@@ -176,7 +200,18 @@ void btCudaBroadphase::_initialize(int numParticles)
|
||||
allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
|
||||
#endif
|
||||
|
||||
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
|
||||
allocateArray((void**)&m_dPairBuff, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
|
||||
copyArrayToDevice(m_dPairBuff, m_hPairBuff, 0, sizeof(unsigned int)*m_numParticles*MAX_COLL_PAIR_PER_PARTICLE);
|
||||
|
||||
allocateArray((void**)&m_dPairBuffStartCurr, (m_numParticles*2 + 1)*sizeof(unsigned int));
|
||||
allocateArray((void**)&m_dAABB, memSize*2);
|
||||
|
||||
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, 0, sizeof(unsigned int)*(m_numParticles*2 + 1));
|
||||
|
||||
allocateArray((void**)&m_dPairScan, (m_numParticles + 1)*sizeof(unsigned int));
|
||||
allocateArray((void**)&m_dPairOut, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
|
||||
|
||||
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
|
||||
|
||||
#if 1
|
||||
// fill color buffer
|
||||
@@ -201,6 +236,10 @@ void btCudaBroadphase::_initialize(int numParticles)
|
||||
|
||||
setParameters(&m_simParams);
|
||||
|
||||
// Pair cache data
|
||||
m_maxPairsPerParticle = 0;
|
||||
m_numOverflows = 0;
|
||||
|
||||
m_bInitialized = true;
|
||||
}
|
||||
|
||||
@@ -217,6 +256,14 @@ void btCudaBroadphase::_finalize()
|
||||
delete [] m_hGridCounters;
|
||||
delete [] m_hGridCells;
|
||||
|
||||
delete [] m_dPairBuff;
|
||||
delete [] m_dPairBuffStartCurr;
|
||||
delete [] m_hAABB;
|
||||
|
||||
delete [] m_hPairBuff;
|
||||
delete [] m_hPairScan;
|
||||
delete [] m_hPairOut;
|
||||
|
||||
freeArray(m_dVel[0]);
|
||||
freeArray(m_dVel[1]);
|
||||
|
||||
@@ -231,12 +278,20 @@ void btCudaBroadphase::_finalize()
|
||||
freeArray(m_dGridCounters);
|
||||
freeArray(m_dGridCells);
|
||||
#endif
|
||||
freeArray(m_dPairBuff);
|
||||
freeArray(m_dPairBuffStartCurr);
|
||||
freeArray(m_dAABB);
|
||||
|
||||
freeArray(m_hPairBuff);
|
||||
freeArray(m_hPairScan);
|
||||
freeArray(m_hPairOut);
|
||||
|
||||
unregisterGLBufferObject(m_posVbo[0]);
|
||||
unregisterGLBufferObject(m_posVbo[1]);
|
||||
glDeleteBuffers(2, (const GLuint*)m_posVbo);
|
||||
|
||||
glDeleteBuffers(1, (const GLuint*)&m_colorVBO);
|
||||
|
||||
}
|
||||
|
||||
btCudaBroadphase::~btCudaBroadphase()
|
||||
@@ -300,9 +355,6 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
static int frameCount = 0;
|
||||
//printf("framecount=%d\n",frameCount++);
|
||||
|
||||
|
||||
int numRejected=0;
|
||||
|
||||
if (m_numHandles >= 0)
|
||||
{
|
||||
|
||||
@@ -373,9 +425,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
// sort and search method
|
||||
|
||||
// calculate hash
|
||||
calcHash(m_posVbo[m_currentPosRead],
|
||||
m_dParticleHash[0],
|
||||
m_numParticles);
|
||||
{
|
||||
BT_PROFILE("calcHash-- CUDA");
|
||||
calcHash( m_posVbo[m_currentPosRead], m_dParticleHash[0], m_numParticles);
|
||||
}
|
||||
|
||||
#if DEBUG_GRID
|
||||
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
||||
@@ -388,7 +441,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
|
||||
|
||||
// sort particles based on hash
|
||||
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
|
||||
{
|
||||
BT_PROFILE("RadixSort-- CUDA");
|
||||
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
|
||||
}
|
||||
|
||||
#if DEBUG_GRID
|
||||
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
||||
@@ -401,14 +457,24 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
|
||||
// reorder particle arrays into sorted order and
|
||||
// find start of each cell
|
||||
reorderDataAndFindCellStart(m_dParticleHash[0],
|
||||
m_posVbo[m_currentPosRead],
|
||||
m_dVel[m_currentVelRead],
|
||||
m_dSortedPos,
|
||||
m_dSortedVel,
|
||||
m_dCellStart,
|
||||
m_numParticles,
|
||||
m_simParams.numCells);
|
||||
{
|
||||
BT_PROFILE("Reorder-- CUDA");
|
||||
#if USE_OLD
|
||||
reorderDataAndFindCellStart(m_dParticleHash[0],
|
||||
m_posVbo[m_currentPosRead],
|
||||
m_dVel[m_currentVelRead],
|
||||
m_dSortedPos,
|
||||
m_dSortedVel,
|
||||
m_dCellStart,
|
||||
m_numParticles,
|
||||
m_simParams.numCells);
|
||||
#else
|
||||
findCellStart(m_dParticleHash[0],
|
||||
m_dCellStart,
|
||||
m_numParticles,
|
||||
m_simParams.numCells);
|
||||
#endif
|
||||
}
|
||||
|
||||
//#define DEBUG_GRID2
|
||||
#ifdef DEBUG_GRID2
|
||||
@@ -455,9 +521,10 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
}
|
||||
*/
|
||||
|
||||
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
||||
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
||||
copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
|
||||
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
||||
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
||||
|
||||
// copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
|
||||
|
||||
//#define DEBUG_INDICES 1
|
||||
#ifdef DEBUG_INDICES
|
||||
@@ -482,6 +549,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
// }
|
||||
}
|
||||
|
||||
#if USE_OLD
|
||||
//printf("particle hash sorted:\n");
|
||||
for(uint pi=0; pi<m_numParticles; pi++)
|
||||
{
|
||||
@@ -494,7 +562,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
|
||||
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
|
||||
btVector3 mypos = (proxy0->m_min+proxy0->m_max)*0.5f;
|
||||
btVector3 mypos = (proxy0->m_aabbMin + proxy0->m_aabbMax)*0.5f;
|
||||
|
||||
// float4* p = (float4*)&m_hSortedPos[index*4];
|
||||
|
||||
@@ -504,6 +572,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
|
||||
particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
|
||||
|
||||
int numRejected=0;
|
||||
|
||||
//for(int z=0; z<1; z++)
|
||||
for(int z=-1; z<=1; z++)
|
||||
@@ -575,15 +644,19 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#else // USE_OLD
|
||||
btBroadphasePairArray& overlappingPairArrayA = m_pairCache->getOverlappingPairArray();
|
||||
findOverlappingPairs(dispatcher);
|
||||
#endif
|
||||
|
||||
#endif //_USE_BRUTEFORCE_N
|
||||
|
||||
#if USE_OLD
|
||||
///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache
|
||||
if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
|
||||
{
|
||||
|
||||
BT_PROFILE("Cleaning-- CPU");
|
||||
|
||||
btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray();
|
||||
|
||||
//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
|
||||
@@ -622,6 +695,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
needsRemoval = false;//callback->processOverlap(pair);
|
||||
} else
|
||||
{
|
||||
bool hasOverlapA = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
|
||||
needsRemoval = true;
|
||||
}
|
||||
} else
|
||||
@@ -629,7 +703,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
//remove duplicate
|
||||
needsRemoval = true;
|
||||
//should have no algorithm
|
||||
btAssert(!pair.m_algorithm);
|
||||
// btAssert(!pair.m_algorithm);
|
||||
}
|
||||
|
||||
if (needsRemoval)
|
||||
@@ -661,6 +735,7 @@ void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
||||
#endif//CLEAN_INVALID_PAIRS
|
||||
|
||||
}
|
||||
#endif // USE_OLD
|
||||
}
|
||||
|
||||
//printf("numRejected=%d\n",numRejected);
|
||||
@@ -1137,3 +1212,333 @@ void btCudaBroadphase::quickHack2()
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btCudaBroadphase::findOverlappingPairs(btDispatcher* dispatcher)
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairs -- CPU");
|
||||
int numRejected=0;
|
||||
m_numPairsAdded = 0;
|
||||
|
||||
{
|
||||
BT_PROFILE("copy AABB -- CPU");
|
||||
|
||||
// do it faster ?
|
||||
float* pVec = m_hAABB;
|
||||
for(uint pi=0; pi<m_numParticles; pi++)
|
||||
{
|
||||
int index = m_hParticleHash[pi*2+1];
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
|
||||
*pVec++ = proxy0->m_aabbMin.getX();
|
||||
*pVec++ = proxy0->m_aabbMin.getY();
|
||||
*pVec++ = proxy0->m_aabbMin.getZ();
|
||||
*pVec++ = 0.0F;
|
||||
*pVec++ = proxy0->m_aabbMax.getX();
|
||||
*pVec++ = proxy0->m_aabbMax.getY();
|
||||
*pVec++ = proxy0->m_aabbMax.getZ();
|
||||
*pVec++ = 0.0F;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_CUDA
|
||||
{
|
||||
|
||||
{
|
||||
BT_PROFILE("CopyBB to CUDA");
|
||||
copyArrayToDevice(m_dAABB, m_hAABB, 0, sizeof(float)*4*2*m_numParticles);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("btCudaFindOverlappingPairs");
|
||||
btCudaFindOverlappingPairs( m_dAABB,
|
||||
m_dParticleHash[0],
|
||||
m_dCellStart,
|
||||
m_dPairBuff,
|
||||
m_dPairBuffStartCurr,
|
||||
m_numParticles
|
||||
);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("btCudaComputePairCacheChanges");
|
||||
btCudaComputePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_numParticles);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("scanOverlappingPairBuffCPU");
|
||||
copyArrayFromDevice(m_hPairScan, m_dPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
|
||||
scanOverlappingPairBuffCPU();
|
||||
copyArrayToDevice(m_dPairScan, m_hPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
|
||||
}
|
||||
{
|
||||
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
|
||||
btCudaSqueezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_numParticles);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
|
||||
copyArrayFromDevice(m_hPairOut, m_dPairOut, 0, sizeof(unsigned int) * m_hPairScan[m_numParticles]);
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
findOverlappingPairsCPU( m_hAABB,
|
||||
m_hParticleHash,
|
||||
m_hCellStart,
|
||||
m_hPairBuff,
|
||||
m_hPairBuffStartCurr,
|
||||
m_numParticles);
|
||||
computePairCacheChangesCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_numParticles);
|
||||
scanOverlappingPairBuffCPU();
|
||||
squeezeOverlappingPairBuffCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_numParticles);
|
||||
#endif
|
||||
{
|
||||
BT_PROFILE("addPairsToCache");
|
||||
addPairsToCacheCPU(dispatcher);
|
||||
}
|
||||
} // btCudaBroadphase::fillOverlappingPairCache()
|
||||
|
||||
|
||||
|
||||
// calculate position in uniform grid
|
||||
int3 btCudaBroadphase::calcGridPosCPU(float4 p)
|
||||
{
|
||||
int3 gridPos;
|
||||
gridPos.x = floor((p.x - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
|
||||
gridPos.y = floor((p.y - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
|
||||
gridPos.z = floor((p.z - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
|
||||
return gridPos;
|
||||
} // btCudaBroadphase::calcGridPos()
|
||||
|
||||
// calculate address in grid from position (clamping to edges)
|
||||
uint btCudaBroadphase::calcGridHashCPU(int3 gridPos)
|
||||
{
|
||||
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
|
||||
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
|
||||
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
|
||||
return (gridPos.z * m_simParams.gridSize.y) * m_simParams.gridSize.x + gridPos.y * m_simParams.gridSize.x + gridPos.x;
|
||||
}
|
||||
|
||||
void btCudaBroadphase::computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
|
||||
{
|
||||
for(uint i = 0; i < numParticles; i++)
|
||||
{
|
||||
computePairCacheChangesCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan);
|
||||
}
|
||||
}
|
||||
|
||||
void btCudaBroadphase::computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
|
||||
{
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint *pInp = pPairBuff + start;
|
||||
uint num_changes = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
void btCudaBroadphase::findOverlappingPairsCPU( float* pAABB,
|
||||
uint* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairsCPU -- CPU");
|
||||
for(uint i = 0; i < numParticles; i++)
|
||||
{
|
||||
findOverlappingPairsCPU_D(
|
||||
i,
|
||||
(float4 *)pAABB,
|
||||
(uint2*)pParticleHash,
|
||||
(uint*)pCellStart,
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
numParticles);
|
||||
}
|
||||
} // btCudaBroadphase::findOverlappingPairsCPU()
|
||||
|
||||
void btCudaBroadphase::findOverlappingPairsCPU_D( uint index,
|
||||
float4* pAABB,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2+1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
||||
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPosCPU(pos);
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
int3 gridPos2;
|
||||
gridPos2.x = gridPos.x + x;
|
||||
gridPos2.y = gridPos.y + y;
|
||||
gridPos2.z = gridPos.z + z;
|
||||
findPairsInCellCPU(gridPos2, index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // btCudaBroadphase::findOverlappingPairsCPU_D()
|
||||
|
||||
|
||||
void btCudaBroadphase::findPairsInCellCPU( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) {
|
||||
return;
|
||||
}
|
||||
uint gridHash = calcGridHashCPU(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
return; // cell empty
|
||||
// iterate over particles in this cell
|
||||
float4 min0 = pAABB[index*2];
|
||||
float4 max0 = pAABB[index*2+1];
|
||||
|
||||
uint2 sortedData = pParticleHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint curr1 = curr;
|
||||
uint bucketEnd = bucketStart + m_simParams.maxParticlesPerCell;
|
||||
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
|
||||
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
uint2 cellData = pParticleHash[index2];
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
if (index2 != index) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[index2*2];
|
||||
float4 max1 = pAABB[index2*2 + 1];
|
||||
if(cudaTestAABBOverlapCPU(min0, max0, min1, max1))
|
||||
{
|
||||
uint k;
|
||||
uint unsorted_indx2 = cellData.y;
|
||||
for(k = 0; k < curr1; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
if(old_pair == unsorted_indx2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr1)
|
||||
{
|
||||
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
|
||||
return;
|
||||
} // btCudaBroadphase::findPairsInCellCPU()
|
||||
|
||||
uint btCudaBroadphase::cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||
{
|
||||
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||
} // btCudaBroadphase::cudaTestAABBOverlapCPU()
|
||||
|
||||
|
||||
void btCudaBroadphase::scanOverlappingPairBuffCPU()
|
||||
{
|
||||
m_hPairScan[0] = 0;
|
||||
for(uint i = 1; i <= m_numParticles; i++)
|
||||
{
|
||||
unsigned int delta = m_hPairScan[i];
|
||||
m_hPairScan[i] = m_hPairScan[i-1] + delta;
|
||||
}
|
||||
} // btCudaBroadphase::scanOverlappingPairBuffCPU()
|
||||
|
||||
void btCudaBroadphase::squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
|
||||
{
|
||||
for(uint i = 0; i < numParticles; i++)
|
||||
{
|
||||
squeezeOverlappingPairBuffCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan, pPairOut);
|
||||
}
|
||||
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU()
|
||||
|
||||
void btCudaBroadphase::squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
|
||||
{
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint* pInp = pPairBuff + start;
|
||||
uint* pOut = pPairOut + pPairScan[index];
|
||||
uint* pOut2 = pInp;
|
||||
uint num = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
*pOut = *pInp;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[index] = make_uint2(start, num);
|
||||
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
|
||||
|
||||
unsigned int gNumPairsAdded = 0;
|
||||
|
||||
void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher)
|
||||
{
|
||||
gNumPairsAdded = 0;
|
||||
for(uint i = 0; i < m_numParticles; i++)
|
||||
{
|
||||
unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
|
||||
if(!num)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
unsigned int* pInp = m_hPairOut + m_hPairScan[i];
|
||||
unsigned int index0 = i;
|
||||
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
|
||||
for(uint j = 0; j < num; j++)
|
||||
{
|
||||
unsigned int indx1_s = pInp[j];
|
||||
unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[index1];
|
||||
if(indx1_s & BT_CUDA_PAIR_NEW_FLG)
|
||||
{
|
||||
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
||||
gNumPairsAdded++;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // btCudaBroadphase::addPairsToCacheCPU()
|
||||
|
||||
@@ -37,7 +37,13 @@ class btCudaBroadphase : public btSimpleBroadphase
|
||||
|
||||
unsigned int* m_hParticleHash;
|
||||
unsigned int* m_hCellStart;
|
||||
|
||||
unsigned int* m_hPairBuffStartCurr;
|
||||
float* m_hAABB;
|
||||
|
||||
unsigned int* m_hPairBuff;
|
||||
unsigned int* m_hPairScan;
|
||||
unsigned int* m_hPairOut;
|
||||
|
||||
// GPU data
|
||||
float* m_dPos[2];
|
||||
@@ -59,6 +65,14 @@ class btCudaBroadphase : public btSimpleBroadphase
|
||||
unsigned int m_currentPosRead, m_currentVelRead;
|
||||
unsigned int m_currentPosWrite, m_currentVelWrite;
|
||||
|
||||
// buffers on GPU
|
||||
unsigned int* m_dPairBuff;
|
||||
unsigned int* m_dPairBuffStartCurr;
|
||||
float* m_dAABB;
|
||||
|
||||
unsigned int* m_dPairScan;
|
||||
unsigned int* m_dPairOut;
|
||||
|
||||
// params
|
||||
struct SimParams& m_simParams;
|
||||
|
||||
@@ -66,6 +80,11 @@ class btCudaBroadphase : public btSimpleBroadphase
|
||||
|
||||
unsigned int m_maxParticlesPerCell;
|
||||
|
||||
// debug
|
||||
unsigned int m_numPairsAdded;
|
||||
unsigned int m_maxPairsPerParticle;
|
||||
unsigned int m_numOverflows;
|
||||
|
||||
protected:
|
||||
|
||||
unsigned int createVBO(unsigned int size);
|
||||
@@ -120,5 +139,48 @@ public:
|
||||
void quickHack2();
|
||||
void integrate();
|
||||
|
||||
|
||||
void findOverlappingPairs(btDispatcher* dispatcher);
|
||||
|
||||
int3 calcGridPosCPU(float4 p);
|
||||
uint calcGridHashCPU(int3 gridPos);
|
||||
|
||||
void computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles);
|
||||
void computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan);
|
||||
|
||||
void findOverlappingPairsCPU( float* pAABB,
|
||||
uint* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint* pPairBuffStartCurr,
|
||||
uint numParticles);
|
||||
void findOverlappingPairsCPU_D( uint index,
|
||||
float4* pAABB,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles);
|
||||
|
||||
void findPairsInCellCPU(int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles);
|
||||
uint cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1);
|
||||
|
||||
|
||||
void scanOverlappingPairBuffCPU();
|
||||
|
||||
void squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles);
|
||||
void squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut);
|
||||
|
||||
void addPairsToCacheCPU(btDispatcher* dispatcher);
|
||||
void resetOverlappingPairBuffCPU();
|
||||
|
||||
|
||||
};
|
||||
#endif //CUDA_BROADPHASE_H
|
||||
@@ -44,7 +44,7 @@
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src;../../Demos/OpenGL"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
@@ -100,249 +100,6 @@
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src"
|
||||
PreprocessorDefinitions="WIN32;_CONSOLE"
|
||||
RuntimeLibrary="0"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../ReleaseCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuDebug|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="1"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../EmuDebugCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
ProgramDatabaseFile="$(OutDir)/btCudaBroadphase.pdb"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuRelease|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src"
|
||||
PreprocessorDefinitions="WIN32;_CONSOLE"
|
||||
RuntimeLibrary="0"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../EmuReleaseCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
@@ -426,6 +183,86 @@
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src;../../Demos/OpenGL"
|
||||
PreprocessorDefinitions="WIN32;_CONSOLE"
|
||||
RuntimeLibrary="0"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../ReleaseCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
@@ -507,6 +344,88 @@
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuDebug|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src;../../Demos/OpenGL"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="1"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../EmuDebugCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
ProgramDatabaseFile="$(OutDir)/btCudaBroadphase.pdb"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuDebug|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
@@ -590,6 +509,87 @@
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuRelease|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="../../Glut;"$(CUDA_INC_PATH)";./;../../src;../../Demos/OpenGL"
|
||||
PreprocessorDefinitions="WIN32;_CONSOLE"
|
||||
RuntimeLibrary="0"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalDependencies="cudart.lib glew32.lib"
|
||||
OutputFile="../../EmuReleaseCudaBroadphase.exe"
|
||||
LinkIncremental="1"
|
||||
AdditionalLibraryDirectories="$(CUDA_LIB_PATH);../../Glut"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
CommandLine="IF EXIST cg_variables.h move cg_variables.h $(ConfigurationName)\"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="EmuRelease|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
@@ -720,30 +720,6 @@
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
ExcludedFromBuild="true"
|
||||
@@ -752,6 +728,14 @@
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
ExcludedFromBuild="true"
|
||||
@@ -760,6 +744,14 @@
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|x64"
|
||||
ExcludedFromBuild="true"
|
||||
@@ -768,6 +760,14 @@
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|x64"
|
||||
ExcludedFromBuild="true"
|
||||
@@ -793,37 +793,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I "$(CUDA_INC_PATH)"; -I./ -Icommon/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -use_fast_math -ccbin "$(VCInstallDir)\bin" -c -I "$(CUDA_INC_PATH)"; -I./ -Icommon/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I "$(CUDA_INC_PATH)"; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -deviceemu -c -I "$(CUDA_INC_PATH)"; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I "$(CUDA_INC_PATH)"; -I./ -Icommon/inc -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
@@ -838,6 +808,16 @@
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -use_fast_math -ccbin "$(VCInstallDir)\bin" -c -I "$(CUDA_INC_PATH)"; -I./ -Icommon/inc -I../../Glut -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
@@ -848,6 +828,16 @@
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -deviceemu -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I "$(CUDA_INC_PATH)"; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|x64"
|
||||
>
|
||||
@@ -858,6 +848,16 @@
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""$(CUDA_BIN_PATH)\nvcc.exe" -arch=sm_10 -ccbin "$(VCInstallDir)\bin" -deviceemu -c -I "$(CUDA_INC_PATH)"; -I./ -I../../common/inc -o $(ConfigurationName)\particleSystem_cu.obj particleSystem.cu
"
|
||||
AdditionalDependencies="particleSystem.cuh; particles_kernel.cu; radixsort.cu"
|
||||
Outputs="$(ConfigurationName)\particleSystem_cu.obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuRelease|x64"
|
||||
>
|
||||
@@ -899,6 +899,16 @@
|
||||
Outputs=""
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""
|
||||
AdditionalDependencies=""
|
||||
Outputs=""
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="EmuDebug|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
@@ -915,16 +925,6 @@
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine=""
|
||||
AdditionalDependencies=""
|
||||
Outputs=""
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\radixsort.cuh"
|
||||
@@ -1003,6 +1003,10 @@
|
||||
RelativePath=".\btCudaBroadphase.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\VTune\btCudaBroadphase.vpj"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\cutil_math.h"
|
||||
>
|
||||
@@ -1019,10 +1023,6 @@
|
||||
RelativePath=".\paramgl.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\VTune\btCudaBroadphase.vpj"
|
||||
>
|
||||
</File>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
|
||||
@@ -72,7 +72,7 @@ void ParticleSystem::initializeBullet()
|
||||
|
||||
m_collisionConfiguration = new btDefaultCollisionConfiguration();
|
||||
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
|
||||
//m_broadphase = new btDbvtBroadphase();
|
||||
// m_broadphase = new btDbvtBroadphase();
|
||||
//m_broadphase = new btAxisSweep3(btVector3(-3,-3,-3),btVector3(3,3,3));
|
||||
m_broadphase = new btCudaBroadphase(m_params,m_params.numBodies+6);
|
||||
|
||||
@@ -202,9 +202,9 @@ ParticleSystem::update(float deltaTime)
|
||||
hPosData[i*4+1] = trans.getOrigin().getY();
|
||||
hPosData[i*4+2] = trans.getOrigin().getZ();
|
||||
|
||||
m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.;
|
||||
m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.;
|
||||
m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.;
|
||||
m_hVel[i*4] = m_bulletParticles[i]->getLinearVelocity().getX()/10.f;
|
||||
m_hVel[i*4+1] = m_bulletParticles[i]->getLinearVelocity().getY()/10.f;
|
||||
m_hVel[i*4+2] = m_bulletParticles[i]->getLinearVelocity().getZ()/10.f;
|
||||
}
|
||||
|
||||
m_broadphase->copyBuffersFromHostToDevice();
|
||||
|
||||
@@ -49,13 +49,13 @@
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} \
|
||||
err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
|
||||
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
@@ -64,22 +64,27 @@
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
# define MY_CUDA_SAFE_CALL( call) do { \
|
||||
MY_CUDA_SAFE_CALL_NO_SYNC(call); \
|
||||
cudaError err = cudaThreadSynchronize(); \
|
||||
if( cudaSuccess != err) { \
|
||||
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
|
||||
fprintf(stderr, "Cuda errorSync in file '%s' in line %i : %s.\n", \
|
||||
__FILE__, __LINE__, cudaGetErrorString( err) ); \
|
||||
exit(EXIT_FAILURE); \
|
||||
mm_exit(EXIT_FAILURE); \
|
||||
} } while (0)
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
void mm_exit(int val)
|
||||
{
|
||||
exit(val);
|
||||
}
|
||||
|
||||
void cudaInit(int argc, char **argv)
|
||||
{
|
||||
//CUT_DEVICE_INIT(argc, argv);
|
||||
@@ -112,6 +117,26 @@ void copyArrayFromDevice(void* host, const void* device, unsigned int vbo, int s
|
||||
void copyArrayToDevice(void* device, const void* host, int offset, int size)
|
||||
{
|
||||
MY_CUDA_SAFE_CALL(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
|
||||
/*
|
||||
cudaError_t err = cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice);
|
||||
switch(err)
|
||||
{
|
||||
case cudaSuccess :
|
||||
return;
|
||||
case cudaErrorInvalidValue :
|
||||
printf("\ncudaErrorInvalidValue : %d\n", err);
|
||||
return;
|
||||
case cudaErrorInvalidDevicePointer :
|
||||
printf("\ncudaErrorInvalidDevicePointer : %d\n", err);
|
||||
return;
|
||||
case cudaErrorInvalidMemcpyDirection :
|
||||
printf("\ncudaErrorInvalidMemcpyDirection : %d\n", err);
|
||||
return;
|
||||
default :
|
||||
printf("\nX3 : %d\n", err);
|
||||
return;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void registerGLBufferObject(uint vbo)
|
||||
@@ -255,6 +280,27 @@ reorderDataAndFindCellStart(uint* particleHash,
|
||||
MY_CUDA_SAFE_CALL(cudaGLUnmapBufferObject(vboOldPos));
|
||||
}
|
||||
|
||||
#if 1
|
||||
void
|
||||
findCellStart( uint* particleHash,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numBodies, 256, numBlocks, numThreads);
|
||||
|
||||
MY_CUDA_SAFE_CALL(cudaMemset(cellStart, 0xffffffff, numCells*sizeof(uint)));
|
||||
|
||||
findCellStartD<<< numBlocks, numThreads >>>(
|
||||
(uint2 *) particleHash,
|
||||
(uint *) cellStart);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: findCellStartD");
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
collide(uint vboOldPos, uint vboNewPos,
|
||||
float* sortedPos, float* sortedVel,
|
||||
@@ -328,4 +374,67 @@ collide(uint vboOldPos, uint vboNewPos,
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
btCudaFindOverlappingPairs( float* pAABB,
|
||||
uint* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
|
||||
// cudaError err = cudaMemset(pPairBuff, 0x00, numParticles*32*4);
|
||||
// if(err != cudaSuccess)
|
||||
// {
|
||||
// printf("\nAAAAA\n");
|
||||
// }
|
||||
|
||||
int numThreads, numBlocks;
|
||||
// computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
computeGridSize(numParticles, 64, numBlocks, numThreads);
|
||||
// numThreads = 1;
|
||||
// numBlocks = 1;
|
||||
btCudaFindOverlappingPairsD<<< numBlocks, numThreads >>>(
|
||||
(float4 *)pAABB,
|
||||
(uint2*)pParticleHash,
|
||||
(uint*)pCellStart,
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
numParticles
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaFindOverlappingPairsD");
|
||||
} // btCudaFindOverlappingPairs()
|
||||
|
||||
|
||||
void
|
||||
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
|
||||
btCudaComputePairCacheChangesD<<< numBlocks, numThreads >>>(
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
(uint*)pPairScan
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
|
||||
} // btCudaFindOverlappingPairs()
|
||||
|
||||
|
||||
void
|
||||
btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
|
||||
{
|
||||
int numThreads, numBlocks;
|
||||
computeGridSize(numParticles, 256, numBlocks, numThreads);
|
||||
|
||||
btCudaSqueezeOverlappingPairBuffD<<< numBlocks, numThreads >>>(
|
||||
(uint*)pPairBuff,
|
||||
(uint2*)pPairBuffStartCurr,
|
||||
(uint*)pPairScan,
|
||||
pPairOut
|
||||
);
|
||||
CUT_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
|
||||
}
|
||||
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -42,6 +42,12 @@ reorderDataAndFindCellStart(uint* particleHash,
|
||||
uint numBodies,
|
||||
uint numCells);
|
||||
|
||||
void
|
||||
findCellStart( uint* particleHash,
|
||||
uint* cellStart,
|
||||
uint numBodies,
|
||||
uint numCells);
|
||||
|
||||
void
|
||||
collide(uint vboOldPos, uint vboNewPos,
|
||||
float* sortedPos, float* sortedVel,
|
||||
@@ -53,5 +59,20 @@ collide(uint vboOldPos, uint vboNewPos,
|
||||
uint numBodies,
|
||||
uint numCells,
|
||||
uint maxParticlesPerCell);
|
||||
|
||||
void
|
||||
btCudaFindOverlappingPairs( float* pAABB,
|
||||
uint* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint* pPairBuffStartCurr,
|
||||
uint numParticles);
|
||||
|
||||
void
|
||||
btCudaComputePairCacheChanges(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles);
|
||||
|
||||
|
||||
void btCudaSqueezeOverlappingPairBuff(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -79,6 +79,7 @@ public:
|
||||
class btDefaultCollisionConfiguration* m_collisionConfiguration;
|
||||
class btCollisionDispatcher* m_dispatcher;
|
||||
class btCudaBroadphase* m_broadphase;
|
||||
// class btBroadphaseInterface* m_broadphase;
|
||||
class btSequentialImpulseConstraintSolver* m_constraintSolver;
|
||||
btAlignedObjectArray<class btRigidBody*> m_bulletParticles;
|
||||
|
||||
|
||||
@@ -45,6 +45,9 @@
|
||||
#include <GL/glut.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
|
||||
#include "particleSystem.h"
|
||||
#include "render_particles.h"
|
||||
#include "paramgl.h"
|
||||
@@ -165,7 +168,9 @@ void display()
|
||||
glColor3f(1.0, 1.0, 1.0);
|
||||
glutWireCube(2.0);
|
||||
|
||||
// collider
|
||||
|
||||
|
||||
// collider
|
||||
glPushMatrix();
|
||||
float4 p = psystem->getColliderPos();
|
||||
glTranslatef(p.x, p.y, p.z);
|
||||
@@ -188,6 +193,19 @@ void display()
|
||||
}
|
||||
|
||||
psystem->debugDraw();
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
// glDisable(GL_LIGHTING);
|
||||
// glColor3f(0, 0, 0);
|
||||
float offsX = 10.f;
|
||||
float offsY = 10.f;
|
||||
renderer->showProfileInfo(offsX, offsY, 20.f);
|
||||
// glEnable(GL_LIGHTING);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
glutSwapBuffers();
|
||||
|
||||
@@ -357,6 +375,19 @@ inline float frand()
|
||||
// commented out to remove unused parameter warnings in Linux
|
||||
void key(unsigned char key, int /*x*/, int /*y*/)
|
||||
{
|
||||
|
||||
#ifndef BT_NO_PROFILE
|
||||
if (key >= 0x31 && key < 0x37)
|
||||
{
|
||||
int child = key-0x31;
|
||||
renderer->m_profileIterator->Enter_Child(child);
|
||||
}
|
||||
if (key==0x30)
|
||||
{
|
||||
renderer->m_profileIterator->Enter_Parent();
|
||||
}
|
||||
#endif //BT_NO_PROFILE
|
||||
|
||||
switch (key)
|
||||
{
|
||||
case ' ':
|
||||
@@ -394,13 +425,13 @@ void key(unsigned char key, int /*x*/, int /*y*/)
|
||||
displayEnabled = !displayEnabled;
|
||||
break;
|
||||
|
||||
case '1':
|
||||
case 'g':
|
||||
psystem->reset(ParticleSystem::CONFIG_GRID);
|
||||
break;
|
||||
case '2':
|
||||
case 'a':
|
||||
psystem->reset(ParticleSystem::CONFIG_RANDOM);
|
||||
break;
|
||||
case '3':
|
||||
case 'e':
|
||||
{
|
||||
// inject a sphere of particles
|
||||
float pr = psystem->getParticleRadius();
|
||||
@@ -414,7 +445,7 @@ void key(unsigned char key, int /*x*/, int /*y*/)
|
||||
psystem->addSphere(0, pos, vel, ballr, pr*2.0f);
|
||||
}
|
||||
break;
|
||||
case '4':
|
||||
case 'b':
|
||||
{
|
||||
// shoot ball from camera
|
||||
float pr = psystem->getParticleRadius();
|
||||
@@ -484,9 +515,10 @@ void mainMenu(int i)
|
||||
void initMenus()
|
||||
{
|
||||
glutCreateMenu(mainMenu);
|
||||
glutAddMenuEntry("Reset block [1]", '1');
|
||||
glutAddMenuEntry("Reset random [2]", '2');
|
||||
glutAddMenuEntry("Add sphere [3]", '3');
|
||||
glutAddMenuEntry("Reset block [g]", 'g');
|
||||
glutAddMenuEntry("Reset random [a]", 'a');
|
||||
glutAddMenuEntry("Add sphere [e]", 'e');
|
||||
glutAddMenuEntry("Shoot ball [b]", 'b');
|
||||
glutAddMenuEntry("View mode [v]", 'v');
|
||||
glutAddMenuEntry("Move cursor mode [m]", 'm');
|
||||
glutAddMenuEntry("Toggle point rendering [p]", 'p');
|
||||
@@ -504,7 +536,8 @@ void initMenus()
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
numParticles =1024;//1024;//64;//16380;//32768;
|
||||
// numParticles =1024;//1024;//64;//16380;//32768;
|
||||
numParticles =8192;
|
||||
uint gridDim = 64;
|
||||
numIterations = 0;
|
||||
|
||||
|
||||
@@ -199,6 +199,35 @@ reorderDataAndFindCellStartD(uint2* particleHash, // particle id sorted by has
|
||||
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
findCellStartD(uint2* particleHash, // particle id sorted by hash
|
||||
uint* cellStart)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
|
||||
uint2 sortedData = particleHash[index];
|
||||
|
||||
// Load hash data into shared memory so that we can look
|
||||
// at neighboring particle's hash value without loading
|
||||
// two hash values per thread
|
||||
__shared__ uint sharedHash[257];
|
||||
sharedHash[threadIdx.x+1] = sortedData.x;
|
||||
if (index > 0 && threadIdx.x == 0)
|
||||
{
|
||||
// first thread in block must load neighbor particle hash
|
||||
volatile uint2 prevData = particleHash[index-1];
|
||||
sharedHash[0] = prevData.x;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
if (index == 0 || sortedData.x != sharedHash[threadIdx.x])
|
||||
{
|
||||
cellStart[sortedData.x] = index;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// collide two spheres using DEM method
|
||||
__device__ float3 collideSpheres(float4 posA, float4 posB,
|
||||
float4 velA, float4 velB,
|
||||
@@ -378,4 +407,217 @@ collideD(float4* newPos, float4* newVel,
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__
|
||||
uint cudaTestAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
|
||||
{
|
||||
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
||||
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
||||
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
||||
}
|
||||
|
||||
#if 0
|
||||
__device__
|
||||
void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr)
|
||||
{
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return;
|
||||
}
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
return; // cell empty
|
||||
// iterate over particles in this cell
|
||||
float4 min0 = pAABB[index*2];
|
||||
float4 max0 = pAABB[index*2 + 1];
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
// uint* pBuf = pPairBuff; // + curr;
|
||||
for(uint q=0; q < params.maxParticlesPerCell; q++)
|
||||
{
|
||||
uint index2 = bucketStart + q;
|
||||
uint2 cellData = pParticleHash[index2];
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
if (index2 != index) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[index2*2];
|
||||
float4 max1 = pAABB[index2*2 + 1];
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
curr++;
|
||||
}
|
||||
//*pBuf = index2;
|
||||
//pPairBuff[curr] = index2;
|
||||
//pPairBuff[0] = index2;
|
||||
//pBuf++;
|
||||
}
|
||||
}
|
||||
// uint del = curr - start;
|
||||
if(curr != start)
|
||||
{
|
||||
curr = start;
|
||||
}
|
||||
pPairBuffStartCurr[index] = make_uint2(start, curr);
|
||||
|
||||
//
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
__device__
|
||||
void findPairsInCell( int3 gridPos,
|
||||
uint index,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
float4* pAABB,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles)
|
||||
{
|
||||
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
||||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
||||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
||||
return;
|
||||
}
|
||||
uint gridHash = calcGridHash(gridPos);
|
||||
// get start of bucket for this cell
|
||||
uint bucketStart = pCellStart[gridHash];
|
||||
if (bucketStart == 0xffffffff)
|
||||
return; // cell empty
|
||||
// iterate over particles in this cell
|
||||
float4 min0 = pAABB[index*2];
|
||||
float4 max0 = pAABB[index*2+1];
|
||||
|
||||
uint2 sortedData = pParticleHash[index];
|
||||
uint unsorted_indx = sortedData.y;
|
||||
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
|
||||
// uint2 start_curr = pPairBuffStartCurr[index];
|
||||
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint bucketEnd = bucketStart + params.maxParticlesPerCell;
|
||||
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
|
||||
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
|
||||
{
|
||||
uint2 cellData = pParticleHash[index2];
|
||||
if (cellData.x != gridHash) break; // no longer in same bucket
|
||||
if (index2 < index) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[index2*2];
|
||||
float4 max1 = pAABB[index2*2 + 1];
|
||||
if(cudaTestAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
uint k;
|
||||
uint unsorted_indx2 = cellData.y;
|
||||
for(k = 0; k < curr; k++)
|
||||
{
|
||||
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
if(old_pair == unsorted_indx2)
|
||||
{
|
||||
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(k == curr)
|
||||
{
|
||||
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
|
||||
// pPairBuffStartCurr[index] = make_uint2(start, curr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaFindOverlappingPairsD(
|
||||
float4* pAABB,
|
||||
uint2* pParticleHash,
|
||||
uint* pCellStart,
|
||||
uint* pPairBuff,
|
||||
uint2* pPairBuffStartCurr,
|
||||
uint numParticles
|
||||
)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2+1];
|
||||
float4 pos = (bbMin + bbMax) * 0.5f;
|
||||
// get address in grid
|
||||
int3 gridPos = calcGridPos(pos);
|
||||
// examine only neighbouring cells
|
||||
for(int z=-1; z<=1; z++) {
|
||||
for(int y=-1; y<=1; y++) {
|
||||
for(int x=-1; x<=1; x++) {
|
||||
findPairsInCell(gridPos + make_int3(x, y, z), index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaComputePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint *pInp = pPairBuff + start;
|
||||
uint num_changes = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
btCudaSqueezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
|
||||
{
|
||||
int index = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
|
||||
uint2 start_curr = pPairBuffStartCurr[index];
|
||||
uint start = start_curr.x;
|
||||
uint curr = start_curr.y;
|
||||
uint* pInp = pPairBuff + start;
|
||||
uint* pOut = pPairOut + pPairScan[index];
|
||||
uint* pOut2 = pInp;
|
||||
uint num = 0;
|
||||
for(uint k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
||||
{
|
||||
*pOut = *pInp;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
pPairBuffStartCurr[index] = make_uint2(start, num);
|
||||
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,6 +14,12 @@
|
||||
#define FETCH(t, i) t[i]
|
||||
#endif
|
||||
|
||||
|
||||
#define BT_CUDA_PAIR_FOUND_FLG (0x40000000)
|
||||
#define BT_CUDA_PAIR_NEW_FLG (0x20000000)
|
||||
#define BT_CUDA_PAIR_ANY_FLG (BT_CUDA_PAIR_FOUND_FLG | BT_CUDA_PAIR_NEW_FLG)
|
||||
|
||||
|
||||
#include "vector_types.h"
|
||||
typedef unsigned int uint;
|
||||
|
||||
|
||||
@@ -33,8 +33,12 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "BMF_Api.h"
|
||||
|
||||
#include "render_particles.h"
|
||||
#include "shaders.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "paramgl.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.1415926535897932384626433832795
|
||||
@@ -50,6 +54,9 @@ ParticleRenderer::ParticleRenderer()
|
||||
m_colorVBO(0)
|
||||
{
|
||||
_initGL();
|
||||
#ifndef BT_NO_PROFILE
|
||||
m_profileIterator = CProfileManager::Get_Iterator();
|
||||
#endif //BT_NO_PROFILE
|
||||
}
|
||||
|
||||
ParticleRenderer::~ParticleRenderer()
|
||||
@@ -178,3 +185,77 @@ void ParticleRenderer::_initGL()
|
||||
glClampColorARB(GL_CLAMP_FRAGMENT_COLOR_ARB, GL_FALSE);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 1
|
||||
void ParticleRenderer::showProfileInfo(float& xOffset,float& yStart, float yIncr)
|
||||
{
|
||||
#ifndef BT_NO_PROFILE
|
||||
|
||||
static double time_since_reset = 0.f;
|
||||
// if (!m_idle)
|
||||
{
|
||||
time_since_reset = CProfileManager::Get_Time_Since_Reset();
|
||||
}
|
||||
beginWinCoords();
|
||||
|
||||
{
|
||||
//recompute profiling data, and store profile strings
|
||||
|
||||
char blockTime[128];
|
||||
|
||||
double totalTime = 0;
|
||||
|
||||
int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
|
||||
|
||||
m_profileIterator->First();
|
||||
|
||||
double parent_time = m_profileIterator->Is_Root() ? time_since_reset : m_profileIterator->Get_Current_Parent_Total_Time();
|
||||
|
||||
{
|
||||
sprintf(blockTime,"--- Profiling: %s (total running time: %.3f ms) ---", m_profileIterator->Get_Current_Parent_Name(), parent_time );
|
||||
displayProfileString(xOffset,yStart,blockTime);
|
||||
yStart += yIncr;
|
||||
sprintf(blockTime,"press number (1,2...) to display child timings, or 0 to go up to parent" );
|
||||
displayProfileString(xOffset,yStart,blockTime);
|
||||
yStart += yIncr;
|
||||
|
||||
}
|
||||
double accumulated_time = 0.f;
|
||||
|
||||
for (int i = 0; !m_profileIterator->Is_Done(); m_profileIterator->Next())
|
||||
{
|
||||
double current_total_time = m_profileIterator->Get_Current_Total_Time();
|
||||
accumulated_time += current_total_time;
|
||||
double fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
|
||||
|
||||
sprintf(blockTime,"%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)",
|
||||
++i, m_profileIterator->Get_Current_Name(), fraction,
|
||||
(current_total_time / (double)frames_since_reset),m_profileIterator->Get_Current_Total_Calls());
|
||||
displayProfileString(xOffset,yStart,blockTime);
|
||||
yStart += yIncr;
|
||||
totalTime += current_total_time;
|
||||
}
|
||||
|
||||
sprintf(blockTime,"%s (%.3f %%) :: %.3f ms", "Unaccounted",
|
||||
// (min(0, time_since_reset - totalTime) / time_since_reset) * 100);
|
||||
parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
|
||||
|
||||
displayProfileString(xOffset,yStart,blockTime);
|
||||
yStart += yIncr;
|
||||
sprintf(blockTime,"-------------------------------------------------");
|
||||
displayProfileString(xOffset,yStart,blockTime);
|
||||
yStart += yIncr;
|
||||
|
||||
}
|
||||
endWinCoords();
|
||||
#endif//BT_NO_PROFILE
|
||||
}
|
||||
|
||||
|
||||
void ParticleRenderer::displayProfileString(int xOffset,int yStart,char* message)
|
||||
{
|
||||
glRasterPos3f(xOffset,yStart,0);
|
||||
BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),message);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -30,6 +30,9 @@
|
||||
#ifndef __RENDER_PARTICLES__
|
||||
#define __RENDER_PARTICLES__
|
||||
|
||||
|
||||
class CProfileIterator;
|
||||
|
||||
class ParticleRenderer
|
||||
{
|
||||
public:
|
||||
@@ -55,6 +58,11 @@ public:
|
||||
void setFOV(float fov) { m_fov = fov; }
|
||||
void setWindowSize(int w, int h) { m_window_w = w; m_window_h = h; }
|
||||
|
||||
void showProfileInfo(float& xOffset,float& yStart, float yIncr);
|
||||
void displayProfileString(int xOffset,int yStart,char* message);
|
||||
class CProfileIterator* m_profileIterator;
|
||||
|
||||
|
||||
protected: // methods
|
||||
void _initGL();
|
||||
void _drawPoints();
|
||||
|
||||
Reference in New Issue
Block a user