1545 lines
45 KiB
C++
1545 lines
45 KiB
C++
|
|
/*
|
|
Bullet Continuous Collision Detection and Physics Library
|
|
Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it freely,
|
|
subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
|
|
#include "particles_kernel.cuh"
|
|
#include "particleSystem.cuh"
|
|
#include "radixsort.cuh"
|
|
#include "vector_functions.h"
|
|
#include <stdio.h>
|
|
|
|
#ifdef WIN32//for glut.h
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#include <GL/glew.h>
|
|
//think different
|
|
#if defined(__APPLE__) && !defined (VMDMESA)
|
|
#include <OpenGL/gl.h>
|
|
#include <OpenGL/glu.h>
|
|
#include <GLUT/glut.h>
|
|
#else
|
|
#include <GL/glut.h>
|
|
#endif
|
|
|
|
#define MAX_COLL_PAIR_PER_PARTICLE 64
|
|
|
|
#define USE_SORT 1
|
|
#define USE_OLD 0
|
|
#define USE_CUDA 1
|
|
|
|
#include "btCudaBroadphase.h"
|
|
#include "LinearMath/btAlignedAllocator.h"
|
|
#include "LinearMath/btQuickprof.h"
|
|
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
|
|
|
|
btCudaBroadphase::btCudaBroadphase(SimParams& simParams,int maxProxies) :
|
|
btSimpleBroadphase(maxProxies,
|
|
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
|
|
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
|
|
m_bInitialized(false),
|
|
m_numParticles(simParams.numBodies),
|
|
m_hPos(0),
|
|
m_hVel(0),
|
|
m_currentPosRead(0),
|
|
m_currentVelRead(0),
|
|
m_currentPosWrite(1),
|
|
m_currentVelWrite(1),
|
|
m_maxParticlesPerCell(4),
|
|
m_simParams(simParams)
|
|
{
|
|
m_ownsPairCache = true;
|
|
|
|
m_dPos[0] = m_dPos[1] = 0;
|
|
m_dVel[0] = m_dVel[1] = 0;
|
|
|
|
m_simParams.gridSize.x = 64;
|
|
m_simParams.gridSize.y = 64;
|
|
m_simParams.gridSize.z = 64;
|
|
|
|
|
|
m_simParams.numCells = m_simParams.gridSize.x*m_simParams.gridSize.y*m_simParams.gridSize.z;
|
|
m_simParams.worldSize = make_float3(2.0f, 2.0f, 2.0f);
|
|
|
|
// set simulation parameters
|
|
|
|
m_simParams.numBodies = m_numParticles;
|
|
m_simParams.maxParticlesPerCell = m_maxParticlesPerCell;
|
|
|
|
m_simParams.worldOrigin = make_float3(-1.0f, -1.0f, -1.0f);
|
|
m_simParams.cellSize = make_float3(m_simParams.worldSize.x / m_simParams.gridSize.x, m_simParams.worldSize.y / m_simParams.gridSize.y, m_simParams.worldSize.z / m_simParams.gridSize.z);
|
|
|
|
m_simParams.particleRadius = m_simParams.cellSize.x * 0.5f;
|
|
m_simParams.colliderPos = make_float4(-1.2f, -0.8f, 0.8f, 1.0f);
|
|
m_simParams.colliderRadius = 0.2f;
|
|
|
|
m_simParams.spring = 0.5f;
|
|
m_simParams.damping = 0.02f;
|
|
m_simParams.shear = 0.1f;
|
|
m_simParams.attraction = 0.0f;
|
|
m_simParams.boundaryDamping = -0.5f;
|
|
|
|
m_simParams.gravity = make_float3(0.0f, -0.0003f, 0.0f);
|
|
m_simParams.globalDamping = 1.0f;
|
|
|
|
_initialize(m_numParticles);
|
|
|
|
}
|
|
|
|
static inline float lerp(float a, float b, float t)
|
|
{
|
|
return a + t*(b-a);
|
|
}
|
|
|
|
static void colorRamp(float t, float *r)
|
|
{
|
|
const int ncolors = 7;
|
|
float c[ncolors][3] = {
|
|
{ 1.0, 0.0, 0.0, },
|
|
{ 1.0, 0.5, 0.0, },
|
|
{ 1.0, 1.0, 0.0, },
|
|
{ 0.0, 1.0, 0.0, },
|
|
{ 0.0, 1.0, 1.0, },
|
|
{ 0.0, 0.0, 1.0, },
|
|
{ 1.0, 0.0, 1.0, },
|
|
};
|
|
t = t * (ncolors-1);
|
|
int i = (int) t;
|
|
float u = t - floor(t);
|
|
r[0] = lerp(c[i][0], c[i+1][0], u);
|
|
r[1] = lerp(c[i][1], c[i+1][1], u);
|
|
r[2] = lerp(c[i][2], c[i+1][2], u);
|
|
}
|
|
|
|
|
|
unsigned int btCudaBroadphase::createVBO(unsigned int size)
|
|
{
|
|
GLuint vbo;
|
|
glGenBuffers(1, &vbo);
|
|
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
registerGLBufferObject(vbo);
|
|
return vbo;
|
|
}
|
|
|
|
|
|
void btCudaBroadphase::_initialize(int numParticles)
|
|
{
|
|
assert(!m_bInitialized);
|
|
|
|
// allocate host storage
|
|
m_hPos = new float[numParticles*4];
|
|
m_hVel = new float[numParticles*4];
|
|
m_hSortedPos = new float[numParticles*4];
|
|
memset(m_hPos, 0, numParticles*4*sizeof(float));
|
|
memset(m_hVel, 0, numParticles*4*sizeof(float));
|
|
memset(m_hSortedPos, 0, numParticles*4*sizeof(float));
|
|
|
|
m_hGridCounters = new uint[m_simParams.numCells];
|
|
m_hGridCells = new uint[m_simParams.numCells*m_maxParticlesPerCell];
|
|
memset(m_hGridCounters, 0, m_simParams.numCells*sizeof(uint));
|
|
memset(m_hGridCells, 0, m_simParams.numCells*m_maxParticlesPerCell*sizeof(uint));
|
|
|
|
m_hParticleHash = new uint[numParticles*2];
|
|
memset(m_hParticleHash, 0, numParticles*2*sizeof(uint));
|
|
|
|
m_hCellStart = new uint[m_simParams.numCells];
|
|
memset(m_hCellStart, 0, m_simParams.numCells*sizeof(uint));
|
|
|
|
|
|
m_hPairBuffStartCurr = new unsigned int[m_numParticles * 2 + 1];
|
|
// --------------- for now, init with MAX_COLL_PAIR_PER_PARTICLE for each particle
|
|
m_hPairBuffStartCurr[0] = 0;
|
|
m_hPairBuffStartCurr[1] = 0;
|
|
for(uint i = 1; i <= m_numParticles; i++)
|
|
{
|
|
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + MAX_COLL_PAIR_PER_PARTICLE;
|
|
// m_hPairBuffStartCurr[i * 2 + 1] = m_hPairBuffStartCurr[i * 2];
|
|
m_hPairBuffStartCurr[i * 2 + 1] = 0;
|
|
}
|
|
//----------------
|
|
m_hAABB = new float[numParticles*4*2]; // BB Min & Max
|
|
|
|
m_hPairBuff = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
|
|
memset(m_hPairBuff, 0x00, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*4);
|
|
|
|
m_hPairScan = new unsigned int[m_numParticles + 1];
|
|
m_hPairOut = new unsigned int[m_numParticles * MAX_COLL_PAIR_PER_PARTICLE];
|
|
|
|
// allocate GPU data
|
|
unsigned int memSize = sizeof(float) * 4 * m_numParticles;
|
|
|
|
m_posVbo[0] = createVBO(memSize);
|
|
m_posVbo[1] = createVBO(memSize);
|
|
|
|
allocateArray((void**)&m_dVel[0], memSize);
|
|
allocateArray((void**)&m_dVel[1], memSize);
|
|
|
|
allocateArray((void**)&m_dSortedPos, memSize);
|
|
allocateArray((void**)&m_dSortedVel, memSize);
|
|
|
|
#if USE_SORT
|
|
allocateArray((void**)&m_dParticleHash[0], m_numParticles*2*sizeof(uint));
|
|
allocateArray((void**)&m_dParticleHash[1], m_numParticles*2*sizeof(uint));
|
|
allocateArray((void**)&m_dCellStart, m_simParams.numCells*sizeof(uint));
|
|
#else
|
|
allocateArray((void**)&m_dGridCounters, m_numGridCells*sizeof(uint));
|
|
allocateArray((void**)&m_dGridCells, m_numGridCells*m_maxParticlesPerCell*sizeof(uint));
|
|
#endif
|
|
|
|
allocateArray((void**)&m_dPairBuff, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
|
|
copyArrayToDevice(m_dPairBuff, m_hPairBuff, 0, sizeof(unsigned int)*m_numParticles*MAX_COLL_PAIR_PER_PARTICLE);
|
|
|
|
allocateArray((void**)&m_dPairBuffStartCurr, (m_numParticles*2 + 1)*sizeof(unsigned int));
|
|
allocateArray((void**)&m_dAABB, memSize*2);
|
|
|
|
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, 0, sizeof(unsigned int)*(m_numParticles*2 + 1));
|
|
|
|
allocateArray((void**)&m_dPairScan, (m_numParticles + 1)*sizeof(unsigned int));
|
|
allocateArray((void**)&m_dPairOut, m_numParticles*MAX_COLL_PAIR_PER_PARTICLE*sizeof(unsigned int));
|
|
|
|
m_colorVBO = createVBO(m_numParticles*4*sizeof(float));
|
|
|
|
#if 1
|
|
// fill color buffer
|
|
glBindBufferARB(GL_ARRAY_BUFFER, m_colorVBO);
|
|
float *data = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
|
|
float *ptr = data;
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
float t = i / (float) m_numParticles;
|
|
#if 0
|
|
*ptr++ = rand() / (float) RAND_MAX;
|
|
*ptr++ = rand() / (float) RAND_MAX;
|
|
*ptr++ = rand() / (float) RAND_MAX;
|
|
#else
|
|
colorRamp(t, ptr);
|
|
ptr+=3;
|
|
#endif
|
|
*ptr++ = 1.0f;
|
|
}
|
|
glUnmapBufferARB(GL_ARRAY_BUFFER);
|
|
#endif
|
|
|
|
|
|
setParameters(&m_simParams);
|
|
|
|
// Pair cache data
|
|
m_maxPairsPerParticle = 0;
|
|
m_numOverflows = 0;
|
|
|
|
m_bInitialized = true;
|
|
}
|
|
|
|
|
|
|
|
void btCudaBroadphase::_finalize()
|
|
{
|
|
assert(m_bInitialized);
|
|
|
|
delete [] m_hPos;
|
|
delete [] m_hVel;
|
|
delete [] m_hSortedPos;
|
|
|
|
delete [] m_hGridCounters;
|
|
delete [] m_hGridCells;
|
|
|
|
delete [] m_dPairBuff;
|
|
delete [] m_dPairBuffStartCurr;
|
|
delete [] m_hAABB;
|
|
|
|
delete [] m_hPairBuff;
|
|
delete [] m_hPairScan;
|
|
delete [] m_hPairOut;
|
|
|
|
freeArray(m_dVel[0]);
|
|
freeArray(m_dVel[1]);
|
|
|
|
freeArray(m_dSortedPos);
|
|
freeArray(m_dSortedVel);
|
|
|
|
#if USE_SORT
|
|
freeArray(m_dParticleHash[0]);
|
|
freeArray(m_dParticleHash[1]);
|
|
freeArray(m_dCellStart);
|
|
#else
|
|
freeArray(m_dGridCounters);
|
|
freeArray(m_dGridCells);
|
|
#endif
|
|
freeArray(m_dPairBuff);
|
|
freeArray(m_dPairBuffStartCurr);
|
|
freeArray(m_dAABB);
|
|
|
|
freeArray(m_hPairBuff);
|
|
freeArray(m_hPairScan);
|
|
freeArray(m_hPairOut);
|
|
|
|
unregisterGLBufferObject(m_posVbo[0]);
|
|
unregisterGLBufferObject(m_posVbo[1]);
|
|
glDeleteBuffers(2, (const GLuint*)m_posVbo);
|
|
|
|
glDeleteBuffers(1, (const GLuint*)&m_colorVBO);
|
|
|
|
}
|
|
|
|
btCudaBroadphase::~btCudaBroadphase()
|
|
{
|
|
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
|
|
assert(m_bInitialized);
|
|
|
|
_finalize();
|
|
|
|
}
|
|
|
|
/*
|
|
int btCudaBroadphase::myCollideCell2(int3 gridPos,
|
|
uint index,
|
|
unsigned int* particleHash,
|
|
unsigned int* cellStart)
|
|
{
|
|
int numOverlap = 0;
|
|
|
|
|
|
if ((gridPos.x < 0) || (gridPos.x > params.gridSize.x-1) ||
|
|
(gridPos.y < 0) || (gridPos.y > params.gridSize.y-1) ||
|
|
(gridPos.z < 0) || (gridPos.z > params.gridSize.z-1)) {
|
|
return force;
|
|
}
|
|
|
|
uint gridHash = calcGridHash(gridPos);
|
|
|
|
// get start of bucket for this cell
|
|
uint bucketStart = FETCH(cellStart, gridHash);
|
|
if (bucketStart == 0xffffffff)
|
|
return force; // cell empty
|
|
|
|
// iterate over particles in this cell
|
|
for(uint i=0; i<params.maxParticlesPerCell; i++) {
|
|
uint index2 = bucketStart + i;
|
|
uint2 cellData = FETCH(particleHash, index2);
|
|
if (cellData.x != gridHash) break; // no longer in same bucket
|
|
|
|
if (index2 != index) { // check not colliding with self
|
|
float4 pos2 = FETCH(oldPos, index2);
|
|
float4 vel2 = FETCH(oldVel, index2);
|
|
|
|
// collide two spheres
|
|
float3 projVec = collideSpheres(pos, pos2, vel, vel2, params.particleRadius, params.particleRadius, params.attraction);
|
|
force += projVec;
|
|
}
|
|
}
|
|
|
|
return force;
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
|
|
void btCudaBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
|
|
{
|
|
//first check for new overlapping pairs
|
|
int j;
|
|
static int frameCount = 0;
|
|
//printf("framecount=%d\n",frameCount++);
|
|
|
|
if (m_numHandles >= 0)
|
|
{
|
|
|
|
//#define _USE_BRUTEFORCE_N 1
|
|
#ifdef _USE_BRUTEFORCE_N
|
|
|
|
int i;
|
|
for (i=0;i<m_numHandles;i++)
|
|
{
|
|
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
|
|
|
|
for (j=i+1;j<m_numHandles;j++)
|
|
{
|
|
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[i];
|
|
|
|
if (proxy0 != proxy1)
|
|
{
|
|
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
|
|
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
|
|
|
|
if (aabbOverlap(p0,p1))
|
|
{
|
|
if ( !m_pairCache->findPair(proxy0,proxy1))
|
|
{
|
|
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
|
}
|
|
} else
|
|
{
|
|
if (!m_pairCache->hasDeferredRemoval())
|
|
{
|
|
if ( m_pairCache->findPair(proxy0,proxy1))
|
|
{
|
|
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
proxy1 = &m_pHandles[proxy1->GetNextAllocated()];
|
|
|
|
}
|
|
proxy0 = &m_pHandles[proxy0->GetNextAllocated()];
|
|
|
|
}
|
|
#else //_USE_BRUTEFORCE_N
|
|
|
|
// update constants
|
|
setParameters(&m_simParams);
|
|
|
|
float deltaTime = 1./60.f;
|
|
|
|
/*
|
|
|
|
// integrate
|
|
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
deltaTime,
|
|
m_numParticles);
|
|
|
|
|
|
|
|
|
|
btSwap(m_currentPosRead, m_currentPosWrite);
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
*/
|
|
|
|
#if USE_SORT
|
|
// sort and search method
|
|
|
|
// calculate hash
|
|
{
|
|
BT_PROFILE("calcHash-- CUDA");
|
|
calcHash( m_posVbo[m_currentPosRead], m_dParticleHash[0], m_numParticles);
|
|
}
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
|
|
|
|
// sort particles based on hash
|
|
{
|
|
BT_PROFILE("RadixSort-- CUDA");
|
|
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
|
|
}
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash sorted:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
|
|
// reorder particle arrays into sorted order and
|
|
// find start of each cell
|
|
{
|
|
BT_PROFILE("Reorder-- CUDA");
|
|
#if USE_OLD
|
|
reorderDataAndFindCellStart(m_dParticleHash[0],
|
|
m_posVbo[m_currentPosRead],
|
|
m_dVel[m_currentVelRead],
|
|
m_dSortedPos,
|
|
m_dSortedVel,
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells);
|
|
#else
|
|
findCellStart(m_dParticleHash[0],
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells);
|
|
#endif
|
|
}
|
|
|
|
//#define DEBUG_GRID2
|
|
#ifdef DEBUG_GRID2
|
|
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
|
printf("cell start:\n");
|
|
for(uint i=0; i<16; i++) {
|
|
printf("%d: %d//", i, m_hCellStart[i]);
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
// update grid using atomics
|
|
updateGrid(m_posVbo[m_currentPosRead],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_numParticles,
|
|
m_numGridCells);
|
|
#endif
|
|
|
|
/*
|
|
dsadsa
|
|
*/
|
|
|
|
|
|
/*
|
|
int m_solverIterations = 1;
|
|
|
|
// process collisions
|
|
for(uint i=0; i<m_solverIterations; i++) {
|
|
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dSortedPos, m_dSortedVel,
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_dParticleHash[0],
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells,
|
|
m_maxParticlesPerCell
|
|
);
|
|
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
|
|
}
|
|
*/
|
|
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
|
|
|
// copyArrayFromDevice((void *) m_hSortedPos, (void*) m_dSortedPos,0 , sizeof(float)*4*m_numParticles);
|
|
|
|
//#define DEBUG_INDICES 1
|
|
#ifdef DEBUG_INDICES
|
|
{
|
|
printf("cell start:\n");
|
|
for(uint i=0; i<16; i++) {
|
|
printf("%d: %d\n", i, m_hCellStart[i]);
|
|
}
|
|
}
|
|
{
|
|
printf("particle hash sorted:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
}
|
|
#endif //DEBUG_INDICES
|
|
|
|
{
|
|
// printf("cell start:\n");
|
|
// for(uint i=0; i<m_simParams.numCells; i++) {
|
|
// printf("%d: %d\n", i, m_hCellStart[i]);
|
|
// }
|
|
}
|
|
|
|
#if USE_OLD
|
|
//printf("particle hash sorted:\n");
|
|
for(uint pi=0; pi<m_numParticles; pi++)
|
|
{
|
|
int index = m_hParticleHash[pi*2+1];
|
|
|
|
//printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
//perform an AABB check?
|
|
// examine only neighbouring cells
|
|
|
|
|
|
|
|
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
|
|
btVector3 mypos = (proxy0->m_aabbMin + proxy0->m_aabbMax)*0.5f;
|
|
|
|
// float4* p = (float4*)&m_hSortedPos[index*4];
|
|
|
|
|
|
int3 particleGridPos;
|
|
particleGridPos.x = floor((mypos.x() - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
|
|
particleGridPos.y = floor((mypos.y() - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
|
|
particleGridPos.z = floor((mypos.z() - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
|
|
|
|
int numRejected=0;
|
|
|
|
//for(int z=0; z<1; z++)
|
|
for(int z=-1; z<=1; z++)
|
|
{
|
|
// for(int y=0; y<1; y++)
|
|
for(int y=-1; y<=1; y++)
|
|
{
|
|
// for(int x=0; x<1; x++)
|
|
for(int x=-1; x<=1; x++)
|
|
{
|
|
int3 gridPos;
|
|
gridPos.x = particleGridPos.x + x;
|
|
gridPos.y = particleGridPos.y + y;
|
|
gridPos.z = particleGridPos.z + z;
|
|
|
|
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
|
|
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
|
|
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
|
|
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
|
|
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
|
|
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
|
|
uint gridHash = ((gridPos.z*m_simParams.gridSize.y)* m_simParams.gridSize.x) + (gridPos.y* m_simParams.gridSize.x) + gridPos.x;
|
|
|
|
// get start of bucket for this cell
|
|
unsigned int bucketStart = m_hCellStart[gridHash];
|
|
if (bucketStart == 0xffffffff)
|
|
continue;
|
|
|
|
// iterate over particles in this cell
|
|
for(uint q=0; q<m_simParams.maxParticlesPerCell; q++)
|
|
{
|
|
///add overlap with planes
|
|
|
|
|
|
uint cellIndex2 = bucketStart + q;
|
|
int cellData = m_hParticleHash[cellIndex2*2];
|
|
if (cellData != gridHash)
|
|
break; // no longer in same bucket
|
|
|
|
int particleIndex2 = m_hParticleHash[cellIndex2*2+1];
|
|
if (particleIndex2!= index && particleIndex2<index)
|
|
{ // check not colliding with self
|
|
//add an overlapping pair
|
|
//printf("add pair (%d,%d)\n",particleIndex2,index);
|
|
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[particleIndex2];
|
|
|
|
//do a more exact AABB overlap test before adding the pair
|
|
bool hasOverlap = testAabbOverlap(proxy0,proxy1);
|
|
if (hasOverlap)
|
|
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
|
else
|
|
{
|
|
numRejected++;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//int numOverlap += myCollideCell2(gridPos + make_int3(x, y, z), index, pos, vel, oldPos, oldVel, particleHash, cellStart);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#else // USE_OLD
|
|
btBroadphasePairArray& overlappingPairArrayA = m_pairCache->getOverlappingPairArray();
|
|
findOverlappingPairs(dispatcher);
|
|
#endif
|
|
|
|
#endif //_USE_BRUTEFORCE_N
|
|
|
|
#if USE_OLD
|
|
///if this broadphase is used in a btMultiSapBroadphase, we shouldn't sort the overlapping paircache
|
|
if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
|
|
{
|
|
BT_PROFILE("Cleaning-- CPU");
|
|
|
|
btBroadphasePairArray& overlappingPairArray = m_pairCache->getOverlappingPairArray();
|
|
|
|
//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
|
|
//overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
|
|
overlappingPairArray.heapSort(btBroadphasePairSortPredicate());
|
|
//printf("A) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
|
|
|
|
overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
|
|
m_invalidPair = 0;
|
|
|
|
|
|
btBroadphasePair previousPair;
|
|
previousPair.m_pProxy0 = 0;
|
|
previousPair.m_pProxy1 = 0;
|
|
previousPair.m_algorithm = 0;
|
|
|
|
|
|
int i;
|
|
for (i=0;i<overlappingPairArray.size();i++)
|
|
{
|
|
|
|
btBroadphasePair& pair = overlappingPairArray[i];
|
|
|
|
bool isDuplicate = (pair == previousPair);
|
|
|
|
previousPair = pair;
|
|
|
|
bool needsRemoval = false;
|
|
|
|
if (!isDuplicate)
|
|
{
|
|
bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
|
|
|
|
if (hasOverlap)
|
|
{
|
|
needsRemoval = false;//callback->processOverlap(pair);
|
|
} else
|
|
{
|
|
bool hasOverlapA = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
|
|
needsRemoval = true;
|
|
}
|
|
} else
|
|
{
|
|
//remove duplicate
|
|
needsRemoval = true;
|
|
//should have no algorithm
|
|
// btAssert(!pair.m_algorithm);
|
|
}
|
|
|
|
if (needsRemoval)
|
|
{
|
|
m_pairCache->cleanOverlappingPair(pair,dispatcher);
|
|
|
|
// m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
|
|
// m_overlappingPairArray.pop_back();
|
|
pair.m_pProxy0 = 0;
|
|
pair.m_pProxy1 = 0;
|
|
m_invalidPair++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
///if you don't like to skip the invalid pairs in the array, execute following code:
|
|
#define CLEAN_INVALID_PAIRS 1
|
|
#ifdef CLEAN_INVALID_PAIRS
|
|
|
|
//perform a sort, to sort 'invalid' pairs to the end
|
|
//overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
|
|
overlappingPairArray.heapSort(btBroadphasePairSortPredicate());
|
|
//printf("B) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
|
|
|
|
overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
|
|
// printf("C) overlappingPairArray.size()=%d\n",overlappingPairArray.size());
|
|
m_invalidPair = 0;
|
|
#endif//CLEAN_INVALID_PAIRS
|
|
|
|
}
|
|
#endif // USE_OLD
|
|
}
|
|
|
|
//printf("numRejected=%d\n",numRejected);
|
|
}
|
|
|
|
static inline float frand()
|
|
{
|
|
return rand() / (float) RAND_MAX;
|
|
}
|
|
|
|
|
|
void btCudaBroadphase::initGrid(unsigned int* size, float spacing, float jitter, unsigned int numParticles)
|
|
{
|
|
srand(1973);
|
|
#ifdef CONTROLLED_START
|
|
float extra=0.01f;
|
|
for(uint z=0; z<size[2]; z++) {
|
|
for(uint y=0; y<size[1]; y++) {
|
|
for(uint x=0; x<size[0]; x++) {
|
|
uint i = (z*size[1]*size[0]) + (y*size[0]) + x;
|
|
if (i < numParticles) {
|
|
m_hPos[i*4] = (spacing * x) + m_simParams.particleRadius - 1.0f+extra;//+ (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+1] = (spacing * y) + m_simParams.particleRadius - 1.0f;//+ (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+2] = (spacing * z) + m_simParams.particleRadius - 1.0f;//+ (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+3] = 1.0f;
|
|
extra=0.f;
|
|
|
|
m_hVel[i*4] = 0.0f;
|
|
m_hVel[i*4+1] = 0.0f;
|
|
m_hVel[i*4+2] = 0.0f;
|
|
m_hVel[i*4+3] = 0.0f;
|
|
}
|
|
}
|
|
extra=0.f;
|
|
}
|
|
}
|
|
#else
|
|
for(uint z=0; z<size[2]; z++) {
|
|
for(uint y=0; y<size[1]; y++) {
|
|
for(uint x=0; x<size[0]; x++) {
|
|
uint i = (z*size[1]*size[0]) + (y*size[0]) + x;
|
|
if (i < numParticles) {
|
|
m_hPos[i*4] = (spacing * x) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+1] = (spacing * y) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+2] = (spacing * z) + m_simParams.particleRadius - 1.0f + (frand()*2.0f-1.0f)*jitter;
|
|
m_hPos[i*4+3] = 1.0f;
|
|
|
|
m_hVel[i*4] = 0.0f;
|
|
m_hVel[i*4+1] = 0.0f;
|
|
m_hVel[i*4+2] = 0.0f;
|
|
m_hVel[i*4+3] = 0.0f;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void btCudaBroadphase::reset(ParticleConfig config)
|
|
{
|
|
switch(config)
|
|
{
|
|
default:
|
|
case CONFIG_RANDOM:
|
|
{
|
|
int p = 0, v = 0;
|
|
for(uint i=0; i < m_numParticles; i++)
|
|
{
|
|
float point[3];
|
|
point[0] = frand();
|
|
point[1] = frand();
|
|
point[2] = frand();
|
|
m_hPos[p++] = 2 * (point[0] - 0.5f);
|
|
m_hPos[p++] = 2 * (point[1] - 0.5f);
|
|
m_hPos[p++] = 2 * (point[2] - 0.5f);
|
|
m_hPos[p++] = 1.0f; // radius
|
|
m_hVel[v++] = 0.0f;
|
|
m_hVel[v++] = 0.0f;
|
|
m_hVel[v++] = 0.0f;
|
|
m_hVel[v++] = 0.0f;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CONFIG_GRID:
|
|
{
|
|
float jitter = m_simParams.particleRadius*0.01f;
|
|
uint s = (int) ceilf(powf((float) m_numParticles, 1.0f / 3.0f));
|
|
uint gridSize[3];
|
|
gridSize[0] = gridSize[1] = gridSize[2] = s;
|
|
initGrid(gridSize, m_simParams.particleRadius*2.0f, jitter, m_numParticles);
|
|
}
|
|
break;
|
|
}
|
|
|
|
setArray(POSITION, m_hPos, 0, m_numParticles);
|
|
setArray(VELOCITY, m_hVel, 0, m_numParticles);
|
|
|
|
}
|
|
|
|
|
|
|
|
void btCudaBroadphase::addSphere(int start, float *pos, float *vel, int r, float spacing)
|
|
{
|
|
uint index = start;
|
|
for(int z=-r; z<=r; z++) {
|
|
for(int y=-r; y<=r; y++) {
|
|
for(int x=-r; x<=r; x++) {
|
|
float dx = x*spacing;
|
|
float dy = y*spacing;
|
|
float dz = z*spacing;
|
|
float l = sqrtf(dx*dx + dy*dy + dz*dz);
|
|
if ((l <= m_simParams.particleRadius*2.0f*r) && (index < m_numParticles)) {
|
|
m_hPos[index*4] = pos[0] + dx;
|
|
m_hPos[index*4+1] = pos[1] + dy;
|
|
m_hPos[index*4+2] = pos[2] + dz;
|
|
m_hPos[index*4+3] = pos[3];
|
|
|
|
m_hVel[index*4] = vel[0];
|
|
m_hVel[index*4+1] = vel[1];
|
|
m_hVel[index*4+2] = vel[2];
|
|
m_hVel[index*4+3] = vel[3];
|
|
index++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
setArray(POSITION, m_hPos, start, index);
|
|
setArray(VELOCITY, m_hVel, start, index);
|
|
}
|
|
|
|
|
|
void btCudaBroadphase::setArray(ParticleArray array, const float* data, int start, int count)
|
|
{
|
|
assert(m_bInitialized);
|
|
|
|
switch (array)
|
|
{
|
|
default:
|
|
case POSITION:
|
|
{
|
|
unregisterGLBufferObject(m_posVbo[m_currentPosRead]);
|
|
glBindBuffer(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
|
|
glBufferSubData(GL_ARRAY_BUFFER, start*4*sizeof(float), count*4*sizeof(float), data);
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
|
registerGLBufferObject(m_posVbo[m_currentPosRead]);
|
|
}
|
|
break;
|
|
case VELOCITY:
|
|
copyArrayToDevice(m_dVel[m_currentVelRead], data, start*4*sizeof(float), count*4*sizeof(float));
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
float* btCudaBroadphase::getArray(ParticleArray array)
|
|
{
|
|
assert(m_bInitialized);
|
|
|
|
float* hdata = 0;
|
|
float* ddata = 0;
|
|
|
|
unsigned int vbo = 0;
|
|
|
|
switch (array)
|
|
{
|
|
default:
|
|
case POSITION:
|
|
hdata = m_hPos;
|
|
ddata = m_dPos[m_currentPosRead];
|
|
vbo = m_posVbo[m_currentPosRead];
|
|
break;
|
|
case VELOCITY:
|
|
hdata = m_hVel;
|
|
ddata = m_dVel[m_currentVelRead];
|
|
break;
|
|
}
|
|
|
|
copyArrayFromDevice(hdata, ddata, vbo, m_numParticles*4*sizeof(float));
|
|
return hdata;
|
|
}
|
|
|
|
void btCudaBroadphase::dumpGrid()
|
|
{
|
|
// debug
|
|
copyArrayFromDevice(m_hGridCounters, m_dGridCounters, 0, sizeof(uint)*m_simParams.numCells);
|
|
copyArrayFromDevice(m_hGridCells, m_dGridCells, 0, sizeof(uint)*m_simParams.numCells*m_maxParticlesPerCell);
|
|
uint total = 0;
|
|
uint maxPerCell = 0;
|
|
for(uint i=0; i<m_simParams.numCells; i++) {
|
|
if (m_hGridCounters[i] > maxPerCell)
|
|
maxPerCell = m_hGridCounters[i];
|
|
if (m_hGridCounters[i] > 0) {
|
|
printf("%d (%d): ", i, m_hGridCounters[i]);
|
|
for(uint j=0; j<m_hGridCounters[i]; j++) {
|
|
printf("%d ", m_hGridCells[i*m_maxParticlesPerCell + j]);
|
|
}
|
|
total += m_hGridCounters[i];
|
|
printf("\n");
|
|
}
|
|
}
|
|
printf("max per cell = %d\n", maxPerCell);
|
|
printf("total = %d\n", total);
|
|
}
|
|
|
|
void btCudaBroadphase::dumpParticles(unsigned int start, unsigned int count)
|
|
{
|
|
// debug
|
|
copyArrayFromDevice(m_hPos, 0, m_posVbo[m_currentPosRead], sizeof(float)*4*count);
|
|
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*count);
|
|
|
|
for(uint i=start; i<start+count; i++) {
|
|
// printf("%d: ", i);
|
|
printf("pos: (%.4f, %.4f, %.4f, %.4f)\n", m_hPos[i*4+0], m_hPos[i*4+1], m_hPos[i*4+2], m_hPos[i*4+3]);
|
|
printf("vel: (%.4f, %.4f, %.4f, %.4f)\n", m_hVel[i*4+0], m_hVel[i*4+1], m_hVel[i*4+2], m_hVel[i*4+3]);
|
|
}
|
|
}
|
|
|
|
float* btCudaBroadphase::copyBuffersFromDeviceToHost()
|
|
{
|
|
// copyArrayFromDevice(m_hPos, 0, m_posVbo[m_currentPosRead], sizeof(float)*4*m_numParticles);
|
|
copyArrayFromDevice(m_hVel, m_dVel[m_currentVelRead], 0, sizeof(float)*4*m_numParticles);
|
|
// fill color buffer
|
|
glBindBufferARB(GL_ARRAY_BUFFER, m_posVbo[m_currentPosRead]);
|
|
float* hPosData = (float *) glMapBufferARB(GL_ARRAY_BUFFER, GL_READ_WRITE);//GL_WRITE_ONLY);
|
|
return hPosData;
|
|
}
|
|
|
|
void btCudaBroadphase::copyBuffersFromHostToDevice()
|
|
{
|
|
glUnmapBufferARB(GL_ARRAY_BUFFER);
|
|
copyArrayToDevice(m_dVel[m_currentVelRead],m_hVel, 0, sizeof(float)*4*m_numParticles);
|
|
}
|
|
|
|
float* btCudaBroadphase::getHvelPtr()
|
|
{
|
|
return m_hVel;
|
|
}
|
|
|
|
float* btCudaBroadphase::getHposPtr()
|
|
{
|
|
return m_hPos;
|
|
}
|
|
|
|
void btCudaBroadphase::quickHack(float deltaTime)
|
|
{
|
|
// update constants
|
|
setParameters(&m_simParams);
|
|
|
|
|
|
|
|
|
|
// integrate
|
|
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
deltaTime,
|
|
m_numParticles);
|
|
|
|
|
|
|
|
|
|
btSwap(m_currentPosRead, m_currentPosWrite);
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
|
|
#if USE_SORT
|
|
// sort and search method
|
|
|
|
// calculate hash
|
|
calcHash(m_posVbo[m_currentPosRead],
|
|
m_dParticleHash[0],
|
|
m_numParticles);
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
// sort particles based on hash
|
|
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash sorted:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
// reorder particle arrays into sorted order and
|
|
// find start of each cell
|
|
reorderDataAndFindCellStart(m_dParticleHash[0],
|
|
m_posVbo[m_currentPosRead],
|
|
m_dVel[m_currentVelRead],
|
|
m_dSortedPos,
|
|
m_dSortedVel,
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells);
|
|
|
|
//#define DEBUG_GRID2
|
|
#ifdef DEBUG_GRID2
|
|
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
|
printf("cell start:\n");
|
|
for(uint i=0; i<m_simParams.numCells; i++) {
|
|
printf("%d: %d\n", i, m_hCellStart[i]);
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
// update grid using atomics
|
|
updateGrid(m_posVbo[m_currentPosRead],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_numParticles,
|
|
m_numGridCells);
|
|
#endif
|
|
|
|
/*
|
|
dsadsa
|
|
*/
|
|
|
|
|
|
|
|
int m_solverIterations = 1;
|
|
|
|
// process collisions
|
|
for(uint i=0; i<m_solverIterations; i++) {
|
|
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dSortedPos, m_dSortedVel,
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_dParticleHash[0],
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells,
|
|
m_maxParticlesPerCell
|
|
);
|
|
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
void btCudaBroadphase::integrate()
|
|
{
|
|
// update constants
|
|
setParameters(&m_simParams);
|
|
|
|
float deltaTime = 1./60.f;
|
|
|
|
|
|
// integrate
|
|
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
deltaTime,
|
|
m_numParticles);
|
|
|
|
btSwap(m_currentPosRead, m_currentPosWrite);
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
}
|
|
|
|
void btCudaBroadphase::quickHack2()
|
|
{
|
|
// update constants
|
|
setParameters(&m_simParams);
|
|
|
|
|
|
// integrate
|
|
integrateSystem(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
0.f,
|
|
m_numParticles);
|
|
|
|
|
|
|
|
|
|
|
|
btSwap(m_currentPosRead, m_currentPosWrite);
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
|
|
#if USE_SORT
|
|
// sort and search method
|
|
|
|
// calculate hash
|
|
calcHash(m_posVbo[m_currentPosRead],
|
|
m_dParticleHash[0],
|
|
m_numParticles);
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
// sort particles based on hash
|
|
RadixSort((KeyValuePair *) m_dParticleHash[0], (KeyValuePair *) m_dParticleHash[1], m_numParticles, 32);
|
|
|
|
#if DEBUG_GRID
|
|
copyArrayFromDevice((void *) m_hParticleHash, (void *) m_dParticleHash[0], 0, sizeof(uint)*2*m_numParticles);
|
|
printf("particle hash sorted:\n");
|
|
for(uint i=0; i<m_numParticles; i++) {
|
|
printf("%d: %d, %d\n", i, m_hParticleHash[i*2], m_hParticleHash[i*2+1]);
|
|
}
|
|
#endif
|
|
|
|
// reorder particle arrays into sorted order and
|
|
// find start of each cell
|
|
reorderDataAndFindCellStart(m_dParticleHash[0],
|
|
m_posVbo[m_currentPosRead],
|
|
m_dVel[m_currentVelRead],
|
|
m_dSortedPos,
|
|
m_dSortedVel,
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells);
|
|
|
|
//#define DEBUG_GRID2
|
|
#ifdef DEBUG_GRID2
|
|
copyArrayFromDevice((void *) m_hCellStart, (void *) m_dCellStart, 0, sizeof(uint)*m_simParams.numCells);
|
|
printf("cell start:\n");
|
|
for(uint i=0; i<m_simParams.numCells; i++) {
|
|
printf("%d: %d\n", i, m_hCellStart[i]);
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
// update grid using atomics
|
|
updateGrid(m_posVbo[m_currentPosRead],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_numParticles,
|
|
m_numGridCells);
|
|
#endif
|
|
|
|
/*
|
|
dsadsa
|
|
*/
|
|
|
|
|
|
/*
|
|
int m_solverIterations = 1;
|
|
|
|
// process collisions
|
|
for(uint i=0; i<m_solverIterations; i++) {
|
|
collide(m_posVbo[m_currentPosRead], m_posVbo[m_currentPosWrite],
|
|
m_dSortedPos, m_dSortedVel,
|
|
m_dVel[m_currentVelRead], m_dVel[m_currentVelWrite],
|
|
m_dGridCounters,
|
|
m_dGridCells,
|
|
m_dParticleHash[0],
|
|
m_dCellStart,
|
|
m_numParticles,
|
|
m_simParams.numCells,
|
|
m_maxParticlesPerCell
|
|
);
|
|
|
|
btSwap(m_currentVelRead, m_currentVelWrite);
|
|
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void btCudaBroadphase::findOverlappingPairs(btDispatcher* dispatcher)
|
|
{
|
|
BT_PROFILE("findOverlappingPairs -- CPU");
|
|
int numRejected=0;
|
|
m_numPairsAdded = 0;
|
|
|
|
{
|
|
BT_PROFILE("copy AABB -- CPU");
|
|
|
|
// do it faster ?
|
|
float* pVec = m_hAABB;
|
|
for(uint pi=0; pi<m_numParticles; pi++)
|
|
{
|
|
int index = m_hParticleHash[pi*2+1];
|
|
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index];
|
|
*pVec++ = proxy0->m_aabbMin.getX();
|
|
*pVec++ = proxy0->m_aabbMin.getY();
|
|
*pVec++ = proxy0->m_aabbMin.getZ();
|
|
*pVec++ = 0.0F;
|
|
*pVec++ = proxy0->m_aabbMax.getX();
|
|
*pVec++ = proxy0->m_aabbMax.getY();
|
|
*pVec++ = proxy0->m_aabbMax.getZ();
|
|
*pVec++ = 0.0F;
|
|
}
|
|
}
|
|
|
|
#if USE_CUDA
|
|
{
|
|
|
|
{
|
|
BT_PROFILE("CopyBB to CUDA");
|
|
copyArrayToDevice(m_dAABB, m_hAABB, 0, sizeof(float)*4*2*m_numParticles);
|
|
}
|
|
{
|
|
BT_PROFILE("btCudaFindOverlappingPairs");
|
|
btCudaFindOverlappingPairs( m_dAABB,
|
|
m_dParticleHash[0],
|
|
m_dCellStart,
|
|
m_dPairBuff,
|
|
m_dPairBuffStartCurr,
|
|
m_numParticles
|
|
);
|
|
}
|
|
{
|
|
BT_PROFILE("btCudaComputePairCacheChanges");
|
|
btCudaComputePairCacheChanges(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_numParticles);
|
|
}
|
|
{
|
|
BT_PROFILE("scanOverlappingPairBuffCPU");
|
|
copyArrayFromDevice(m_hPairScan, m_dPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
|
|
scanOverlappingPairBuffCPU();
|
|
copyArrayToDevice(m_dPairScan, m_hPairScan, 0, sizeof(unsigned int)*(m_numParticles + 1));
|
|
}
|
|
{
|
|
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
|
|
btCudaSqueezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScan, m_dPairOut, m_numParticles);
|
|
}
|
|
{
|
|
BT_PROFILE("btCudaSqueezeOverlappingPairBuff");
|
|
copyArrayFromDevice(m_hPairOut, m_dPairOut, 0, sizeof(unsigned int) * m_hPairScan[m_numParticles]);
|
|
}
|
|
|
|
}
|
|
#else
|
|
findOverlappingPairsCPU( m_hAABB,
|
|
m_hParticleHash,
|
|
m_hCellStart,
|
|
m_hPairBuff,
|
|
m_hPairBuffStartCurr,
|
|
m_numParticles);
|
|
computePairCacheChangesCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_numParticles);
|
|
scanOverlappingPairBuffCPU();
|
|
squeezeOverlappingPairBuffCPU(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_numParticles);
|
|
#endif
|
|
{
|
|
BT_PROFILE("addPairsToCache");
|
|
addPairsToCacheCPU(dispatcher);
|
|
}
|
|
} // btCudaBroadphase::fillOverlappingPairCache()
|
|
|
|
|
|
|
|
// calculate position in uniform grid
|
|
int3 btCudaBroadphase::calcGridPosCPU(float4 p)
|
|
{
|
|
int3 gridPos;
|
|
gridPos.x = floor((p.x - m_simParams.worldOrigin.x) / m_simParams.cellSize.x);
|
|
gridPos.y = floor((p.y - m_simParams.worldOrigin.y) / m_simParams.cellSize.y);
|
|
gridPos.z = floor((p.z - m_simParams.worldOrigin.z) / m_simParams.cellSize.z);
|
|
return gridPos;
|
|
} // btCudaBroadphase::calcGridPos()
|
|
|
|
// calculate address in grid from position (clamping to edges)
|
|
uint btCudaBroadphase::calcGridHashCPU(int3 gridPos)
|
|
{
|
|
gridPos.x = max(0, min(gridPos.x, m_simParams.gridSize.x-1));
|
|
gridPos.y = max(0, min(gridPos.y, m_simParams.gridSize.y-1));
|
|
gridPos.z = max(0, min(gridPos.z, m_simParams.gridSize.z-1));
|
|
return (gridPos.z * m_simParams.gridSize.y) * m_simParams.gridSize.x + gridPos.y * m_simParams.gridSize.x + gridPos.x;
|
|
}
|
|
|
|
void btCudaBroadphase::computePairCacheChangesCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint numParticles)
|
|
{
|
|
for(uint i = 0; i < numParticles; i++)
|
|
{
|
|
computePairCacheChangesCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan);
|
|
}
|
|
}
|
|
|
|
void btCudaBroadphase::computePairCacheChangesCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan)
|
|
{
|
|
uint2 start_curr = pPairBuffStartCurr[index];
|
|
uint start = start_curr.x;
|
|
uint curr = start_curr.y;
|
|
uint *pInp = pPairBuff + start;
|
|
uint num_changes = 0;
|
|
for(uint k = 0; k < curr; k++, pInp++)
|
|
{
|
|
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
|
{
|
|
num_changes++;
|
|
}
|
|
}
|
|
pPairScan[index+1] = num_changes;
|
|
}
|
|
|
|
void btCudaBroadphase::findOverlappingPairsCPU( float* pAABB,
|
|
uint* pParticleHash,
|
|
uint* pCellStart,
|
|
uint* pPairBuff,
|
|
uint* pPairBuffStartCurr,
|
|
uint numParticles)
|
|
{
|
|
BT_PROFILE("findOverlappingPairsCPU -- CPU");
|
|
for(uint i = 0; i < numParticles; i++)
|
|
{
|
|
findOverlappingPairsCPU_D(
|
|
i,
|
|
(float4 *)pAABB,
|
|
(uint2*)pParticleHash,
|
|
(uint*)pCellStart,
|
|
(uint*)pPairBuff,
|
|
(uint2*)pPairBuffStartCurr,
|
|
numParticles);
|
|
}
|
|
} // btCudaBroadphase::findOverlappingPairsCPU()
|
|
|
|
void btCudaBroadphase::findOverlappingPairsCPU_D( uint index,
|
|
float4* pAABB,
|
|
uint2* pParticleHash,
|
|
uint* pCellStart,
|
|
uint* pPairBuff,
|
|
uint2* pPairBuffStartCurr,
|
|
uint numParticles)
|
|
{
|
|
float4 bbMin = pAABB[index*2];
|
|
float4 bbMax = pAABB[index*2+1];
|
|
float4 pos;
|
|
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
|
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
|
pos.z = (bbMin.z + bbMax.z) * 0.5f;
|
|
|
|
// get address in grid
|
|
int3 gridPos = calcGridPosCPU(pos);
|
|
// examine only neighbouring cells
|
|
for(int z=-1; z<=1; z++) {
|
|
for(int y=-1; y<=1; y++) {
|
|
for(int x=-1; x<=1; x++) {
|
|
int3 gridPos2;
|
|
gridPos2.x = gridPos.x + x;
|
|
gridPos2.y = gridPos.y + y;
|
|
gridPos2.z = gridPos.z + z;
|
|
findPairsInCellCPU(gridPos2, index, pParticleHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numParticles);
|
|
}
|
|
}
|
|
}
|
|
} // btCudaBroadphase::findOverlappingPairsCPU_D()
|
|
|
|
|
|
void btCudaBroadphase::findPairsInCellCPU( int3 gridPos,
|
|
uint index,
|
|
uint2* pParticleHash,
|
|
uint* pCellStart,
|
|
float4* pAABB,
|
|
uint* pPairBuff,
|
|
uint2* pPairBuffStartCurr,
|
|
uint numParticles)
|
|
{
|
|
if ((gridPos.x < 0) || (gridPos.x > m_simParams.gridSize.x-1) ||
|
|
(gridPos.y < 0) || (gridPos.y > m_simParams.gridSize.y-1) ||
|
|
(gridPos.z < 0) || (gridPos.z > m_simParams.gridSize.z-1)) {
|
|
return;
|
|
}
|
|
uint gridHash = calcGridHashCPU(gridPos);
|
|
// get start of bucket for this cell
|
|
uint bucketStart = pCellStart[gridHash];
|
|
if (bucketStart == 0xffffffff)
|
|
return; // cell empty
|
|
// iterate over particles in this cell
|
|
float4 min0 = pAABB[index*2];
|
|
float4 max0 = pAABB[index*2+1];
|
|
|
|
uint2 sortedData = pParticleHash[index];
|
|
uint unsorted_indx = sortedData.y;
|
|
uint2 start_curr = pPairBuffStartCurr[unsorted_indx];
|
|
uint start = start_curr.x;
|
|
uint curr = start_curr.y;
|
|
uint curr1 = curr;
|
|
uint bucketEnd = bucketStart + m_simParams.maxParticlesPerCell;
|
|
bucketEnd = (bucketEnd > numParticles) ? numParticles : bucketEnd;
|
|
for(uint index2=bucketStart; index2 < bucketEnd; index2++)
|
|
{
|
|
uint2 cellData = pParticleHash[index2];
|
|
if (cellData.x != gridHash) break; // no longer in same bucket
|
|
if (index2 != index) // check not colliding with self
|
|
{
|
|
float4 min1 = pAABB[index2*2];
|
|
float4 max1 = pAABB[index2*2 + 1];
|
|
if(cudaTestAABBOverlapCPU(min0, max0, min1, max1))
|
|
{
|
|
uint k;
|
|
uint unsorted_indx2 = cellData.y;
|
|
for(k = 0; k < curr1; k++)
|
|
{
|
|
uint old_pair = pPairBuff[start+k] & (~BT_CUDA_PAIR_ANY_FLG);
|
|
if(old_pair == unsorted_indx2)
|
|
{
|
|
pPairBuff[start+k] |= BT_CUDA_PAIR_FOUND_FLG;
|
|
break;
|
|
}
|
|
}
|
|
if(k == curr1)
|
|
{
|
|
pPairBuff[start+curr] = unsorted_indx2 | BT_CUDA_PAIR_NEW_FLG;
|
|
curr++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
pPairBuffStartCurr[unsorted_indx] = make_uint2(start, curr);
|
|
return;
|
|
} // btCudaBroadphase::findPairsInCellCPU()
|
|
|
|
uint btCudaBroadphase::cudaTestAABBOverlapCPU(float4 min0, float4 max0, float4 min1, float4 max1)
|
|
{
|
|
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
|
|
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
|
|
(min0.z <= max1.z)&& (min1.z <= max0.z);
|
|
} // btCudaBroadphase::cudaTestAABBOverlapCPU()
|
|
|
|
|
|
void btCudaBroadphase::scanOverlappingPairBuffCPU()
|
|
{
|
|
m_hPairScan[0] = 0;
|
|
for(uint i = 1; i <= m_numParticles; i++)
|
|
{
|
|
unsigned int delta = m_hPairScan[i];
|
|
m_hPairScan[i] = m_hPairScan[i-1] + delta;
|
|
}
|
|
} // btCudaBroadphase::scanOverlappingPairBuffCPU()
|
|
|
|
void btCudaBroadphase::squeezeOverlappingPairBuffCPU(uint* pPairBuff, uint* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut, uint numParticles)
|
|
{
|
|
for(uint i = 0; i < numParticles; i++)
|
|
{
|
|
squeezeOverlappingPairBuffCPU_D(i, pPairBuff, (uint2*)pPairBuffStartCurr, pPairScan, pPairOut);
|
|
}
|
|
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU()
|
|
|
|
void btCudaBroadphase::squeezeOverlappingPairBuffCPU_D(uint index, uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan, uint* pPairOut)
|
|
{
|
|
uint2 start_curr = pPairBuffStartCurr[index];
|
|
uint start = start_curr.x;
|
|
uint curr = start_curr.y;
|
|
uint* pInp = pPairBuff + start;
|
|
uint* pOut = pPairOut + pPairScan[index];
|
|
uint* pOut2 = pInp;
|
|
uint num = 0;
|
|
for(uint k = 0; k < curr; k++, pInp++)
|
|
{
|
|
if(!((*pInp) & BT_CUDA_PAIR_FOUND_FLG))
|
|
{
|
|
*pOut = *pInp;
|
|
pOut++;
|
|
}
|
|
if((*pInp) & BT_CUDA_PAIR_ANY_FLG)
|
|
{
|
|
*pOut2 = (*pInp) & (~BT_CUDA_PAIR_ANY_FLG);
|
|
pOut2++;
|
|
num++;
|
|
}
|
|
}
|
|
pPairBuffStartCurr[index] = make_uint2(start, num);
|
|
} // btCudaBroadphase::squeezeOverlappingPairBuffCPU_D()
|
|
|
|
unsigned int gNumPairsAdded = 0;
|
|
|
|
void btCudaBroadphase::addPairsToCacheCPU(btDispatcher* dispatcher)
|
|
{
|
|
gNumPairsAdded = 0;
|
|
for(uint i = 0; i < m_numParticles; i++)
|
|
{
|
|
unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
|
|
if(!num)
|
|
{
|
|
continue;
|
|
}
|
|
unsigned int* pInp = m_hPairOut + m_hPairScan[i];
|
|
unsigned int index0 = i;
|
|
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
|
|
for(uint j = 0; j < num; j++)
|
|
{
|
|
unsigned int indx1_s = pInp[j];
|
|
unsigned int index1 = indx1_s & (~BT_CUDA_PAIR_ANY_FLG);
|
|
btSimpleBroadphaseProxy* proxy1 = &m_pHandles[index1];
|
|
if(indx1_s & BT_CUDA_PAIR_NEW_FLG)
|
|
{
|
|
m_pairCache->addOverlappingPair(proxy0,proxy1);
|
|
gNumPairsAdded++;
|
|
}
|
|
else
|
|
{
|
|
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
|
|
}
|
|
}
|
|
}
|
|
} // btCudaBroadphase::addPairsToCacheCPU()
|