Files
bullet3/Extras/CUDA/btCudaBroadphase.cpp
2008-10-28 23:25:59 +00:00

1545 lines
45 KiB
C++

/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "particles_kernel.cuh"
#include "particleSystem.cuh"
#include "radixsort.cuh"
#include "vector_functions.h"
#include <stdio.h>
#ifdef WIN32//for glut.h
#include <windows.h>
#endif
#include <GL/glew.h>
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif
#define MAX_COLL_PAIR_PER_PARTICLE 64
#define USE_SORT 1
#define USE_OLD 0
#define USE_CUDA 1
#include "btCudaBroadphase.h"
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) :
btSimpleBroadphase(maxProxies,
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
m_bInitialized(false),
m_numParticles(simParams.numBodies),
m_hPos(0),
m_hVel(0),
m_currentPosRead(0),
m_currentVelRead(0),
m_currentPosWrite(1),
m_currentVelWrite(1),
m_maxParticlesPerCell(4),
m_simParams(simParams)
{
m_ownsPairCache = true;
m_dPos[0] = m_dPos[1] = 0;
m_dVel[0] = m_dVel[1] = 0;
m_simParams.gridSize.x = 64;
m_simParams.gridSize.y = 64;
m_simParams.gridSize.z = 64;
m_simParams.numCells = m_simParams.gridSize.x*m_simParams.gridSize.y*m_simParams.gridSize.z;
m_simParams.worldSize = make_float3(2.0f, 2.0f, 2.0f);
// set simulation parameters
m_simParams.numBodies = m_numParticles;
m_simParams.maxParticlesPerCell = m_maxParticlesPerCell;
m_simParams.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f);
m_simParams.cellSize = make_float3(m_simParams.worldSize.x / m_simParams.gridSize.x, m_simParams.worldSize.y / m_simParams.gridSize.y, m_simParams.worldSize.z / m_simParams.gridSize.z);
m_simParams.particleRadius = m_simParams.cellSize.x * 0.5f;
m_simParams.colliderPos = make_float4(-1.2f, -0.8f, 0.8f, 1.0f);
m_simParams.colliderRadius = 0.2f;
m_simParams.spring = 0.5f;
m_simParams.damping = 0.02f;
m_simParams.shear = 0.1f;
m_simParams.attraction = 0.0f;
m_simParams.boundaryDamping = -0.5f;
m_simParams.gravity = make_float3(0.0f, -0.0003f, 0.0f);
m_simParams.globalDamping = 1.0f;
_initialize(m_numParticles);
}
static inline float lerp(float a, float b, float t)
{
return a + t*(b-a);
}
static void colorRamp(float t, float *r)
{
const int ncolors = 7;
float c[ncolors][3] = {
{ 1.0, 0.0, 0.0, },
{ 1.0, 0.5, 0.0, },
{ 1.0, 1.0, 0.0, },
{ 0.0, 1.0, 0.0, },
{ 0.0, 1.0, 1.0, },
{ 0.0, 0.0, 1.0, },
{ 1.0, 0.0, 1.0, },
};
t = t * (ncolors-1);
int i = (int) t;
float u = t - floor(t);
r[0] = lerp(c[i][0], c[i+1][0], u);
r[1] = lerp(c[i][1], c[i+1][1], u);
r[2] = lerp(c[i][2], c[i+1][2], u);
}
unsigned int btCudaBroadphase::createVBO(unsigned int size)
{
GLuint vbo;
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
registerGLBufferObject(vbo);
return vbo;
}
void btCudaBroadphase::_initialize(int numParticles)
{
assert(!m_bInitialized);
// allocate host storage
m_hPos = new float[numParticles*4];
m_hVel = new float[numParticles*4];
m_hSortedPos = new float[numParticles*4];
memset(m_hPos, 0, numParticles*4*sizeof(float));
memset(m_hVel, 0, numParticles*4*sizeof(float));
memset(m_hSortedPos, 0, numParticles*4*sizeof(float));
m_hGridCounters = new uint[m_simParams.numCells];
m_hGridCells = new uint[m_simParams.numCells*m_maxParticlesPerCell];
memset(m_hGridCounters, 0, m_simParams.numCells*sizeof(uint));
memset(m_hGridCells, 0, m_simParams.numCells*m_maxParticlesPerCell*sizeof(uint));
m_hParticleHash = new uint[numParticles*2];
memset(m_hParticleHash, 0, numParticles*2*sizeof(uint));
m_hCellStart = new uint[m_simParams.numCells];
memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint));
m_hPairBuffStartCurr = new unsigned int[m_numParticles * 2 + 1];
// --------------- for now, init with MAX_COLL_PAIR_PER_PARTICLE for each particle
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(uint i = 1; i <= m_numParticles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + MAX_COLL_PAIR_PER_PARTICLE;
// m_hPairBuffStartCurr[i * 2 + 1] = m_hPairBuffStartCurr[i * 2];
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
//----------------
m_hAABB = new float[numParticles*4*2]; // BB Min & Max
m_hPairBuff = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
memset(m_hPairBuff, 0x00, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*4);
m_hPairScan = new unsigned int[m_numParticles + 1];
m_hPairOut = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
// allocate GPU data
unsigned int memSize = sizeof(float) * 4 * m_numParticles;
m_posVbo[0] = createVBO(memSize);
m_posVbo[1] = createVBO(memSize);
allocateArray((void**)&m_dVel[0], memSize);
allocateArray((void**)&m_dVel[1], memSize);
allocateArray((void**)&m_dSortedPos, memSize);
allocateArray((void**)&m_dSortedVel, memSize);
#if USE_SORT
allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint));
allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint));
allocateArray((void**)&m_dCellStart, m_simParams.numCells*sizeof(uint));
#else
allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint));
allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
#endif
allocateArray((void**)&m_dPairBuff, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, 0, sizeof(unsigned int)*m_numParticles*MAX_COLL_PAIR_PER_PARTICLE);
allocateArray((void**)&m_dPairBuffStartCurr, (m_numParticles*2 + 1)*sizeof(unsigned int));
allocateArray((void**)&m_dAABB, memSize*2);
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, 0, sizeof(unsigned int)*(m_numParticles*2 + 1));
allocateArray((void**)&m_dPairScan, (m_numParticles + 1)*sizeof(unsigned int));
allocateArray((void**)&m_dPairOut, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
#if 1
// fill color buffer
glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO);
float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
float *ptr = data;
for(uint i=0; i<m_numParticles; i++) {
float t = i / (float) m_numParticles;
#if 0
*ptr++ = rand() / (float) RAND_MAX;
*ptr++ = rand() / (float) RAND_MAX;
*ptr++ = rand() / (float) RAND_MAX;
#else
colorRamp(t, ptr);
ptr+=3;
#endif
*ptr++ = 1.0f;
}
glUnmapBufferARB(GL_ARRAY_BUFFER);
#endif
setParameters(&m_simParams);
// Pair cache data
m_maxPairsPerParticle = 0;
m_numOverflows = 0;
m_bInitialized = true;
}
void btCudaBroadphase::_finalize()
{
assert(m_bInitialized);
delete [] m_hPos;
delete [] m_hVel;
delete [] m_hSortedPos;
delete [] m_hGridCounters;
delete [] m_hGridCells;
delete [] m_dPairBuff;
delete [] m_dPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScan;
delete [] m_hPairOut;
freeArray(m_dVel[0]);
freeArray(m_dVel[1]);
freeArray(m_dSortedPos);
freeArray(m_dSortedVel);
#if USE_SORT
freeArray(m_dParticleHash[0]);
freeArray(m_dParticleHash[1]);
freeArray(m_dCellStart);
#else
freeArray(m_dGridCounters);
freeArray(m_dGridCells);
#endif
freeArray(m_dPairBuff);
freeArray(m_dPairBuffStartCurr);
freeArray(m_dAABB);
freeArray(m_hPairBuff);
freeArray(m_hPairScan);
freeArray(m_hPairOut);
unregisterGLBufferObject(m_posVbo[0]);
unregisterGLBufferObject(m_posVbo[1]);
glDeleteBuffers(2, (const GLuint*)m_posVbo);
glDeleteBuffers(1, (const GLuint*)&m_colorVBO);
}
btCudaBroadphase::~btCudaBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
}
/*
int btCudaBroadphase::myCollideCell2(int3 gridPos,
uint index,
unsigned int* particleHash,
unsigned int* cellStart)
{
int numOverlap = 0;
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
return force;
}
uint gridHash = calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = FETCH(cellStart, gridHash);
if (bucketStart == 0xffffffff)
return force; // cell empty
// iterate over particles in this cell
for(uint i=0; i<params.maxParticlesPerCell; i++) {
uint index2 = bucketStart + i;
uint2 cellData = FETCH(particleHash, index2);
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) { // check not colliding with self
float4 pos2 = FETCH(oldPos, index2);
float4 vel2 = FETCH(oldVel, index2);
// collide two spheres
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
force += projVec;
}
}
return force;
}
*/
void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
{
//first check for new overlapping pairs
int j;
static int frameCount = 0;
//printf("framecount=%d\n",frameCount++);
if (m_numHandles >= 0)
{
//#define _USE_BRUTEFORCE_N 1
#ifdef _USE_BRUTEFORCE_N
int i;
for (i=0;i<m_numHandles;i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
for (j=i+1;j<m_numHandles;j++)
{
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[i];
if (proxy0 != proxy1)
{
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
if (aabbOverlap(p0,p1))
{
if ( !m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
}
} else
{
if (!m_pairCache->hasDeferredRemoval())
{
if ( m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
}
proxy1 = &m_pHandles[proxy1->GetNextAllocated()];
}
proxy0 = &m_pHandles[proxy0->GetNextAllocated()];
}
#else //_USE_BRUTEFORCE_N
// update constants
setParameters(&m_simParams);
float deltaTime = 1./60.f;
/*
// integrate
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
deltaTime,
m_numParticles);
btSwap(m_currentPosRead, m_currentPosWrite);
btSwap(m_currentVelRead, m_currentVelWrite);
*/
#if USE_SORT
// sort and search method
// calculate hash
{
BT_PROFILE("calcHash-- CUDA");
calcHash( m_posVbo[m_currentPosRead], m_dParticleHash[0], m_numParticles);
}
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// sort particles based on hash
{
BT_PROFILE("RadixSort-- CUDA");
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
}
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash sorted:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// reorder particle arrays into sorted order and
// find start of each cell
{
BT_PROFILE("Reorder-- CUDA");
#if USE_OLD
reorderDataAndFindCellStart(m_dParticleHash[0],
m_posVbo[m_currentPosRead],
m_dVel[m_currentVelRead],
m_dSortedPos,
m_dSortedVel,
m_dCellStart,
m_numParticles,
m_simParams.numCells);
#else
findCellStart(m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_simParams.numCells);
#endif
}
//#define DEBUG_GRID2
#ifdef DEBUG_GRID2
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
printf("cell start:\n");
for(uint i=0; i<16; i++) {
printf("%d: %d//", i, m_hCellStart[i]);
}
#endif
#else
// update grid using atomics
updateGrid(m_posVbo[m_currentPosRead],
m_dGridCounters,
m_dGridCells,
m_numParticles,
m_numGridCells);
#endif
/*
dsadsa
*/
/*
int m_solverIterations = 1;
// process collisions
for(uint i=0; i<m_solverIterations; i++) {
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dSortedPos, m_dSortedVel,
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
m_dGridCounters,
m_dGridCells,
m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_simParams.numCells,
m_maxParticlesPerCell
);
btSwap(m_currentVelRead, m_currentVelWrite);
}
*/
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
// copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
//#define DEBUG_INDICES 1
#ifdef DEBUG_INDICES
{
printf("cell start:\n");
for(uint i=0; i<16; i++) {
printf("%d: %d\n", i, m_hCellStart[i]);
}
}
{
printf("particle hash sorted:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
}
#endif //DEBUG_INDICES
{
// printf("cell start:\n");
// for(uint i=0; i<m_simParams.numCells; i++) {
// printf("%d: %d\n", i, m_hCellStart[i]);
// }
}
#if USE_OLD
//printf("particle hash sorted:\n");
for(uint pi=0; pi<m_numParticles; pi++)
{
int index = m_hParticleHash[pi*2+1];
//printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
//perform an AABB check?
// examine only neighbouring cells
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
btVector3 mypos = (proxy0->m_aabbMin + proxy0->m_aabbMax)*0.5f;
// float4* p = (float4*)&m_hSortedPos[index*4];
int3 particleGridPos;
particleGridPos.x = floor((mypos.x() - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
int numRejected=0;
//for(int z=0; z<1; z++)
for(int z=-1; z<=1; z++)
{
// for(int y=0; y<1; y++)
for(int y=-1; y<=1; y++)
{
// for(int x=0; x<1; x++)
for(int x=-1; x<=1; x++)
{
int3 gridPos;
gridPos.x = particleGridPos.x + x;
gridPos.y = particleGridPos.y + y;
gridPos.z = particleGridPos.z + z;
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1))
{
continue;
}
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
uint gridHash = ((gridPos.z*m_simParams.gridSize.y)* m_simParams.gridSize.x) + (gridPos.y* m_simParams.gridSize.x) + gridPos.x;
// get start of bucket for this cell
unsigned int bucketStart = m_hCellStart[gridHash];
if (bucketStart == 0xffffffff)
continue;
// iterate over particles in this cell
for(uint q=0; q<m_simParams.maxParticlesPerCell; q++)
{
///add overlap with planes
uint cellIndex2 = bucketStart + q;
int cellData = m_hParticleHash[cellIndex2*2];
if (cellData != gridHash)
break; // no longer in same bucket
int particleIndex2 = m_hParticleHash[cellIndex2*2+1];
if (particleIndex2!= index && particleIndex2<index)
{ // check not colliding with self
//add an overlapping pair
//printf("add pair (%d,%d)\n",particleIndex2,index);
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[particleIndex2];
//do a more exact AABB overlap test before adding the pair
bool hasOverlap = testAabbOverlap(proxy0,proxy1);
if (hasOverlap)
m_pairCache->addOverlappingPair(proxy0,proxy1);
else
{
numRejected++;
}
}
}
//int numOverlap += myCollideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
}
}
}
}
#else // USE_OLD
btBroadphasePairArray& overlappingPairArrayA = m_pairCache->getOverlappingPairArray();
findOverlappingPairs(dispatcher);
#endif
#endif //_USE_BRUTEFORCE_N
#if USE_OLD
///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache
if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
{
BT_PROFILE("Cleaning-- CPU");
btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray();
//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
//overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
overlappingPairArray.heapSort(btBroadphasePairSortPredicate());
//printf("A) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
m_invalidPair = 0;
btBroadphasePair previousPair;
previousPair.m_pProxy0 = 0;
previousPair.m_pProxy1 = 0;
previousPair.m_algorithm = 0;
int i;
for (i=0;i<overlappingPairArray.size();i++)
{
btBroadphasePair& pair = overlappingPairArray[i];
bool isDuplicate = (pair == previousPair);
previousPair = pair;
bool needsRemoval = false;
if (!isDuplicate)
{
bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
if (hasOverlap)
{
needsRemoval = false;//callback->processOverlap(pair);
} else
{
bool hasOverlapA = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
needsRemoval = true;
}
} else
{
//remove duplicate
needsRemoval = true;
//should have no algorithm
// btAssert(!pair.m_algorithm);
}
if (needsRemoval)
{
m_pairCache->cleanOverlappingPair(pair,dispatcher);
// m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
// m_overlappingPairArray.pop_back();
pair.m_pProxy0 = 0;
pair.m_pProxy1 = 0;
m_invalidPair++;
}
}
///if you don't like to skip the invalid pairs in the array, execute following code:
#define CLEAN_INVALID_PAIRS 1
#ifdef CLEAN_INVALID_PAIRS
//perform a sort, to sort 'invalid' pairs to the end
//overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
overlappingPairArray.heapSort(btBroadphasePairSortPredicate());
//printf("B) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
// printf("C) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
m_invalidPair = 0;
#endif//CLEAN_INVALID_PAIRS
}
#endif // USE_OLD
}
//printf("numRejected=%d\n",numRejected);
}
static inline float frand()
{
return rand() / (float) RAND_MAX;
}
void btCudaBroadphase::initGrid(unsigned int* size, float spacing, float jitter, unsigned int numParticles)
{
srand(1973);
#ifdef CONTROLLED_START
float extra=0.01f;
for(uint z=0; z<size[2]; z++) {
for(uint y=0; y<size[1]; y++) {
for(uint x=0; x<size[0]; x++) {
uint i = (z*size[1]*size[0]) + (y*size[0]) + x;
if (i < numParticles) {
m_hPos[i*4] = (spacing * x) + m_simParams.particleRadius - 1.0f+extra;//+ (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+1] = (spacing * y) + m_simParams.particleRadius - 1.0f;//+ (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+2] = (spacing * z) + m_simParams.particleRadius - 1.0f;//+ (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+3] = 1.0f;
extra=0.f;
m_hVel[i*4] = 0.0f;
m_hVel[i*4+1] = 0.0f;
m_hVel[i*4+2] = 0.0f;
m_hVel[i*4+3] = 0.0f;
}
}
extra=0.f;
}
}
#else
for(uint z=0; z<size[2]; z++) {
for(uint y=0; y<size[1]; y++) {
for(uint x=0; x<size[0]; x++) {
uint i = (z*size[1]*size[0]) + (y*size[0]) + x;
if (i < numParticles) {
m_hPos[i*4] = (spacing * x) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+1] = (spacing * y) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+2] = (spacing * z) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
m_hPos[i*4+3] = 1.0f;
m_hVel[i*4] = 0.0f;
m_hVel[i*4+1] = 0.0f;
m_hVel[i*4+2] = 0.0f;
m_hVel[i*4+3] = 0.0f;
}
}
}
}
#endif
}
void btCudaBroadphase::reset(ParticleConfig config)
{
switch(config)
{
default:
case CONFIG_RANDOM:
{
int p = 0, v = 0;
for(uint i=0; i < m_numParticles; i++)
{
float point[3];
point[0] = frand();
point[1] = frand();
point[2] = frand();
m_hPos[p++] = 2 * (point[0] - 0.5f);
m_hPos[p++] = 2 * (point[1] - 0.5f);
m_hPos[p++] = 2 * (point[2] - 0.5f);
m_hPos[p++] = 1.0f; // radius
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
m_hVel[v++] = 0.0f;
}
}
break;
case CONFIG_GRID:
{
float jitter = m_simParams.particleRadius*0.01f;
uint s = (int) ceilf(powf((float) m_numParticles, 1.0f / 3.0f));
uint gridSize[3];
gridSize[0] = gridSize[1] = gridSize[2] = s;
initGrid(gridSize, m_simParams.particleRadius*2.0f, jitter, m_numParticles);
}
break;
}
setArray(POSITION, m_hPos, 0, m_numParticles);
setArray(VELOCITY, m_hVel, 0, m_numParticles);
}
void btCudaBroadphase::addSphere(int start, float *pos, float *vel, int r, float spacing)
{
uint index = start;
for(int z=-r; z<=r; z++) {
for(int y=-r; y<=r; y++) {
for(int x=-r; x<=r; x++) {
float dx = x*spacing;
float dy = y*spacing;
float dz = z*spacing;
float l = sqrtf(dx*dx + dy*dy + dz*dz);
if ((l <= m_simParams.particleRadius*2.0f*r) && (index < m_numParticles)) {
m_hPos[index*4] = pos[0] + dx;
m_hPos[index*4+1] = pos[1] + dy;
m_hPos[index*4+2] = pos[2] + dz;
m_hPos[index*4+3] = pos[3];
m_hVel[index*4] = vel[0];
m_hVel[index*4+1] = vel[1];
m_hVel[index*4+2] = vel[2];
m_hVel[index*4+3] = vel[3];
index++;
}
}
}
}
setArray(POSITION, m_hPos, start, index);
setArray(VELOCITY, m_hVel, start, index);
}
void btCudaBroadphase::setArray(ParticleArray array, const float* data, int start, int count)
{
assert(m_bInitialized);
switch (array)
{
default:
case POSITION:
{
unregisterGLBufferObject(m_posVbo[m_currentPosRead]);
glBindBuffer(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
glBufferSubData(GL_ARRAY_BUFFER, start*4*sizeof(float), count*4*sizeof(float), data);
glBindBuffer(GL_ARRAY_BUFFER, 0);
registerGLBufferObject(m_posVbo[m_currentPosRead]);
}
break;
case VELOCITY:
copyArrayToDevice(m_dVel[m_currentVelRead], data, start*4*sizeof(float), count*4*sizeof(float));
break;
}
}
float* btCudaBroadphase::getArray(ParticleArray array)
{
assert(m_bInitialized);
float* hdata = 0;
float* ddata = 0;
unsigned int vbo = 0;
switch (array)
{
default:
case POSITION:
hdata = m_hPos;
ddata = m_dPos[m_currentPosRead];
vbo = m_posVbo[m_currentPosRead];
break;
case VELOCITY:
hdata = m_hVel;
ddata = m_dVel[m_currentVelRead];
break;
}
copyArrayFromDevice(hdata, ddata, vbo, m_numParticles*4*sizeof(float));
return hdata;
}
void btCudaBroadphase::dumpGrid()
{
// debug
copyArrayFromDevice(m_hGridCounters, m_dGridCounters, 0, sizeof(uint)*m_simParams.numCells);
copyArrayFromDevice(m_hGridCells, m_dGridCells, 0, sizeof(uint)*m_simParams.numCells*m_maxParticlesPerCell);
uint total = 0;
uint maxPerCell = 0;
for(uint i=0; i<m_simParams.numCells; i++) {
if (m_hGridCounters[i] > maxPerCell)
maxPerCell = m_hGridCounters[i];
if (m_hGridCounters[i] > 0) {
printf("%d (%d): ", i, m_hGridCounters[i]);
for(uint j=0; j<m_hGridCounters[i]; j++) {
printf("%d ", m_hGridCells[i*m_maxParticlesPerCell + j]);
}
total += m_hGridCounters[i];
printf("\n");
}
}
printf("max per cell = %d\n", maxPerCell);
printf("total = %d\n", total);
}
void btCudaBroadphase::dumpParticles(unsigned int start, unsigned int count)
{
// debug
copyArrayFromDevice(m_hPos, 0, m_posVbo[m_currentPosRead], sizeof(float)*4*count);
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*count);
for(uint i=start; i<start+count; i++) {
// printf("%d: ", i);
printf("pos: (%.4f, %.4f, %.4f, %.4f)\n", m_hPos[i*4+0], m_hPos[i*4+1], m_hPos[i*4+2], m_hPos[i*4+3]);
printf("vel: (%.4f, %.4f, %.4f, %.4f)\n", m_hVel[i*4+0], m_hVel[i*4+1], m_hVel[i*4+2], m_hVel[i*4+3]);
}
}
float* btCudaBroadphase::copyBuffersFromDeviceToHost()
{
// copyArrayFromDevice(m_hPos, 0, m_posVbo[m_currentPosRead], sizeof(float)*4*m_numParticles);
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*m_numParticles);
// fill color buffer
glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
float* hPosData = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY);
return hPosData;
}
void btCudaBroadphase::copyBuffersFromHostToDevice()
{
glUnmapBufferARB(GL_ARRAY_BUFFER);
copyArrayToDevice(m_dVel[m_currentVelRead],m_hVel, 0, sizeof(float)*4*m_numParticles);
}
float* btCudaBroadphase::getHvelPtr()
{
return m_hVel;
}
float* btCudaBroadphase::getHposPtr()
{
return m_hPos;
}
void btCudaBroadphase::quickHack(float deltaTime)
{
// update constants
setParameters(&m_simParams);
// integrate
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
deltaTime,
m_numParticles);
btSwap(m_currentPosRead, m_currentPosWrite);
btSwap(m_currentVelRead, m_currentVelWrite);
#if USE_SORT
// sort and search method
// calculate hash
calcHash(m_posVbo[m_currentPosRead],
m_dParticleHash[0],
m_numParticles);
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// sort particles based on hash
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash sorted:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// reorder particle arrays into sorted order and
// find start of each cell
reorderDataAndFindCellStart(m_dParticleHash[0],
m_posVbo[m_currentPosRead],
m_dVel[m_currentVelRead],
m_dSortedPos,
m_dSortedVel,
m_dCellStart,
m_numParticles,
m_simParams.numCells);
//#define DEBUG_GRID2
#ifdef DEBUG_GRID2
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
printf("cell start:\n");
for(uint i=0; i<m_simParams.numCells; i++) {
printf("%d: %d\n", i, m_hCellStart[i]);
}
#endif
#else
// update grid using atomics
updateGrid(m_posVbo[m_currentPosRead],
m_dGridCounters,
m_dGridCells,
m_numParticles,
m_numGridCells);
#endif
/*
dsadsa
*/
int m_solverIterations = 1;
// process collisions
for(uint i=0; i<m_solverIterations; i++) {
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dSortedPos, m_dSortedVel,
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
m_dGridCounters,
m_dGridCells,
m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_simParams.numCells,
m_maxParticlesPerCell
);
btSwap(m_currentVelRead, m_currentVelWrite);
}
}
void btCudaBroadphase::integrate()
{
// update constants
setParameters(&m_simParams);
float deltaTime = 1./60.f;
// integrate
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
deltaTime,
m_numParticles);
btSwap(m_currentPosRead, m_currentPosWrite);
btSwap(m_currentVelRead, m_currentVelWrite);
}
void btCudaBroadphase::quickHack2()
{
// update constants
setParameters(&m_simParams);
// integrate
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
0.f,
m_numParticles);
btSwap(m_currentPosRead, m_currentPosWrite);
btSwap(m_currentVelRead, m_currentVelWrite);
#if USE_SORT
// sort and search method
// calculate hash
calcHash(m_posVbo[m_currentPosRead],
m_dParticleHash[0],
m_numParticles);
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// sort particles based on hash
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
#if DEBUG_GRID
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
printf("particle hash sorted:\n");
for(uint i=0; i<m_numParticles; i++) {
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
}
#endif
// reorder particle arrays into sorted order and
// find start of each cell
reorderDataAndFindCellStart(m_dParticleHash[0],
m_posVbo[m_currentPosRead],
m_dVel[m_currentVelRead],
m_dSortedPos,
m_dSortedVel,
m_dCellStart,
m_numParticles,
m_simParams.numCells);
//#define DEBUG_GRID2
#ifdef DEBUG_GRID2
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
printf("cell start:\n");
for(uint i=0; i<m_simParams.numCells; i++) {
printf("%d: %d\n", i, m_hCellStart[i]);
}
#endif
#else
// update grid using atomics
updateGrid(m_posVbo[m_currentPosRead],
m_dGridCounters,
m_dGridCells,
m_numParticles,
m_numGridCells);
#endif
/*
dsadsa
*/
/*
int m_solverIterations = 1;
// process collisions
for(uint i=0; i<m_solverIterations; i++) {
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
m_dSortedPos, m_dSortedVel,
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
m_dGridCounters,
m_dGridCells,
m_dParticleHash[0],
m_dCellStart,
m_numParticles,
m_simParams.numCells,
m_maxParticlesPerCell
);
btSwap(m_currentVelRead, m_currentVelWrite);
}
*/
}
void btCudaBroadphase::findOverlappingPairs(btDispatcher* dispatcher)
{
BT_PROFILE("findOverlappingPairs -- CPU");
int numRejected=0;
m_numPairsAdded = 0;
{
BT_PROFILE("copy AABB -- CPU");
// do it faster ?
float* pVec = m_hAABB;
for(uint pi=0; pi<m_numParticles; pi++)
{
int index = m_hParticleHash[pi*2+1];
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
*pVec++ = proxy0->m_aabbMin.getX();
*pVec++ = proxy0->m_aabbMin.getY();
*pVec++ = proxy0->m_aabbMin.getZ();
*pVec++ = 0.0F;
*pVec++ = proxy0->m_aabbMax.getX();
*pVec++ = proxy0->m_aabbMax.getY();
*pVec++ = proxy0->m_aabbMax.getZ();
*pVec++ = 0.0F;
}
}
#if USE_CUDA
{
{
BT_PROFILE("CopyBB to CUDA");
copyArrayToDevice(m_dAABB, m_hAABB, 0, sizeof(float)*4*2*m_numParticles);
}
{
BT_PROFILE("btCudaFindOverlappingPairs");
btCudaFindOverlappingPairs( m_dAABB,
m_dParticleHash[0],
m_dCellStart,
m_dPairBuff,
m_dPairBuffStartCurr,
m_numParticles
);
}
{
BT_PROFILE("btCudaComputePairCacheChanges");
btCudaComputePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_numParticles);
}
{
BT_PROFILE("scanOverlappingPairBuffCPU");
copyArrayFromDevice(m_hPairScan, m_dPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
scanOverlappingPairBuffCPU();
copyArrayToDevice(m_dPairScan, m_hPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
}
{
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
btCudaSqueezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_numParticles);
}
{
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
copyArrayFromDevice(m_hPairOut, m_dPairOut, 0, sizeof(unsigned int) * m_hPairScan[m_numParticles]);
}
}
#else
findOverlappingPairsCPU( m_hAABB,
m_hParticleHash,
m_hCellStart,
m_hPairBuff,
m_hPairBuffStartCurr,
m_numParticles);
computePairCacheChangesCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_numParticles);
scanOverlappingPairBuffCPU();
squeezeOverlappingPairBuffCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_numParticles);
#endif
{
BT_PROFILE("addPairsToCache");
addPairsToCacheCPU(dispatcher);
}
} // btCudaBroadphase::fillOverlappingPairCache()
// calculate position in uniform grid
int3 btCudaBroadphase::calcGridPosCPU(float4 p)
{
int3 gridPos;
gridPos.x = floor((p.x - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
gridPos.y = floor((p.y - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
gridPos.z = floor((p.z - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
return gridPos;
} // btCudaBroadphase::calcGridPos()
// calculate address in grid from position (clamping to edges)
uint btCudaBroadphase::calcGridHashCPU(int3 gridPos)
{
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
return (gridPos.z * m_simParams.gridSize.y) * m_simParams.gridSize.x + gridPos.y * m_simParams.gridSize.x + gridPos.x;
}
void btCudaBroadphase::computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
{
for(uint i = 0; i < numParticles; i++)
{
computePairCacheChangesCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan);
}
}
void btCudaBroadphase::computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
{
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
void btCudaBroadphase::findOverlappingPairsCPU( float* pAABB,
uint* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint* pPairBuffStartCurr,
uint numParticles)
{
BT_PROFILE("findOverlappingPairsCPU -- CPU");
for(uint i = 0; i < numParticles; i++)
{
findOverlappingPairsCPU_D(
i,
(float4 *)pAABB,
(uint2*)pParticleHash,
(uint*)pCellStart,
(uint*)pPairBuff,
(uint2*)pPairBuffStartCurr,
numParticles);
}
} // btCudaBroadphase::findOverlappingPairsCPU()
void btCudaBroadphase::findOverlappingPairsCPU_D( uint index,
float4* pAABB,
uint2* pParticleHash,
uint* pCellStart,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles)
{
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2+1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
// get address in grid
int3 gridPos = calcGridPosCPU(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
int3 gridPos2;
gridPos2.x = gridPos.x + x;
gridPos2.y = gridPos.y + y;
gridPos2.z = gridPos.z + z;
findPairsInCellCPU(gridPos2, index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
}
}
}
} // btCudaBroadphase::findOverlappingPairsCPU_D()
void btCudaBroadphase::findPairsInCellCPU( int3 gridPos,
uint index,
uint2* pParticleHash,
uint* pCellStart,
float4* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numParticles)
{
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) {
return;
}
uint gridHash = calcGridHashCPU(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
return; // cell empty
// iterate over particles in this cell
float4 min0 = pAABB[index*2];
float4 max0 = pAABB[index*2+1];
uint2 sortedData = pParticleHash[index];
uint unsorted_indx = sortedData.y;
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
uint start = start_curr.x;
uint curr = start_curr.y;
uint curr1 = curr;
uint bucketEnd = bucketStart + m_simParams.maxParticlesPerCell;
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pParticleHash[index2];
if (cellData.x != gridHash) break; // no longer in same bucket
if (index2 != index) // check not colliding with self
{
float4 min1 = pAABB[index2*2];
float4 max1 = pAABB[index2*2 + 1];
if(cudaTestAABBOverlapCPU(min0, max0, min1, max1))
{
uint k;
uint unsorted_indx2 = cellData.y;
for(k = 0; k < curr1; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
if(old_pair == unsorted_indx2)
{
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
break;
}
}
if(k == curr1)
{
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
curr++;
}
}
}
}
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
return;
} // btCudaBroadphase::findPairsInCellCPU()
uint btCudaBroadphase::cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
} // btCudaBroadphase::cudaTestAABBOverlapCPU()
void btCudaBroadphase::scanOverlappingPairBuffCPU()
{
m_hPairScan[0] = 0;
for(uint i = 1; i <= m_numParticles; i++)
{
unsigned int delta = m_hPairScan[i];
m_hPairScan[i] = m_hPairScan[i-1] + delta;
}
} // btCudaBroadphase::scanOverlappingPairBuffCPU()
void btCudaBroadphase::squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
{
for(uint i = 0; i < numParticles; i++)
{
squeezeOverlappingPairBuffCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan, pPairOut);
}
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU()
void btCudaBroadphase::squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
{
uint2 start_curr = pPairBuffStartCurr[index];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint* pOut = pPairOut + pPairScan[index];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[index] = make_uint2(start, num);
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
unsigned int gNumPairsAdded = 0;
void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher)
{
gNumPairsAdded = 0;
for(uint i = 0; i < m_numParticles; i++)
{
unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
if(!num)
{
continue;
}
unsigned int* pInp = m_hPairOut + m_hPairScan[i];
unsigned int index0 = i;
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
for(uint j = 0; j < num; j++)
{
unsigned int indx1_s = pInp[j];
unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG);
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[index1];
if(indx1_s & BT_CUDA_PAIR_NEW_FLG)
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
gNumPairsAdded++;
}
else
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
} // btCudaBroadphase::addPairsToCacheCPU()