fix bug in assignment of contact constraints to solver grid (always use dynamic body to determine constraint assignment, otherwise write conflicts can occur)

implement CPU version of narrowphase convex collision, for comparison/debug purposes
start towards cpu/gpu sync, for adding/removing bodies (work in progress)
This commit is contained in:
erwin coumans
2013-05-02 09:49:16 -07:00
parent de17d6044c
commit 6ee9eb9bb5
15 changed files with 966 additions and 59 deletions

View File

@@ -1,4 +1,7 @@
bool b3GpuBatchContacts = true;
bool b3GpuSolveConstraint = true;
#include "b3GpuBatchingPgsSolver.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
@@ -37,8 +40,6 @@ enum
};
bool b3GpuBatchContacts = true;//true;
bool b3GpuSolveConstraint = true;//true;
struct b3GpuBatchingPgsSolverInternalData
@@ -401,10 +402,14 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
static bool sortfnc(const b3SortData& a,const b3SortData& b)
{
return (a.m_key<b.m_key);
}
void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config)
void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
{
m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
@@ -420,7 +425,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3ConstraintCfg csCfg( dt );
csCfg.m_enableParallelSolve = true;
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
csCfg.m_staticIdx = static0Index;
b3OpenCLArray<b3RigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
@@ -500,6 +505,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
launcher.setConst( cdata.m_nContacts );
launcher.setConst( cdata.m_scale );
launcher.setConst(cdata.m_nSplit);
launcher.setConst(cdata.m_staticIdx);
launcher.launch1D( sortSize, 64 );
@@ -519,12 +525,16 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);
/*b3AlignedObjectArray<b3SortData> hostValues;
keyValuesInOut.copyToHost(hostValues);
printf("hostValues.size=%d\n",hostValues.size());
*/
}
} else
{
b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
b3AlignedObjectArray<b3SortData> hostValues;
keyValuesInOut.copyToHost(hostValues);
hostValues.quickSort(sortfnc);
keyValuesInOut.copyFromHost(hostValues);
}
{
// 4. find entries
@@ -630,7 +640,9 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
int simdWidth =64;//-1;//32;
//int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
int numBatches = sortConstraintByBatch2( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
//int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
maxNumBatches = b3Max(numBatches,maxNumBatches);
@@ -724,10 +736,6 @@ void b3GpuBatchingPgsSolver::batchContacts( b3OpenCLArray<b3Contact4>* contacts,
static bool sortfnc(const b3SortData& a,const b3SortData& b)
{
return (a.m_key<b.m_key);
}

View File

@@ -32,7 +32,7 @@ public:
b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity);
virtual ~b3GpuBatchingPgsSolver();
void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config);
void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index);
};

View File

@@ -156,7 +156,9 @@ m_queue(queue)
//m_data->m_contactCGPU = new b3OpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false);
//m_data->m_frictionCGPU = new b3OpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs);
}

View File

@@ -76,6 +76,10 @@ public:
int allocateCollidable();
int getStatic0Index() const
{
return m_static0Index;
}
b3Collidable& getCollidableCpu(int collidableIndex);
const b3Collidable& getCollidableCpu(int collidableIndex) const;

View File

@@ -276,7 +276,9 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
#endif //TEST_OTHER_GPU_SOLVER
{
b3Config config;
m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),config);
int static0Index = m_data->m_narrowphase->getStatic0Index();
m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),config, static0Index);
//m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
@@ -366,10 +368,11 @@ void b3GpuRigidBodyPipeline::writeAllInstancesToGpu()
int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu)
{
b3Vector3 aabbMin(0,0,0),aabbMax(0,0,0);
int bodyIndex = m_data->m_narrowphase->getNumRigidBodies();
if (collidableIndex>=0)
{

View File

@@ -452,20 +452,21 @@ typedef struct
__kernel
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
int nContacts,
float scale,
int N_SPLIT
)
int nContacts,float scale,int N_SPLIT, int staticIdx)
{
int gIdx = GET_GLOBAL_IDX;
if( gIdx < nContacts )
{
int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
int aIdx = abs(aPtrAndSignBit );
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
int idx = (gContact[gIdx].m_bodyAPtrAndSignBit<0)? bIdx: aIdx;
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
int idx = (aStatic)? bIdx: aIdx;
float4 p = gBodies[idx].m_pos;
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);

View File

@@ -454,20 +454,21 @@ static const char* solverSetup2CL= \
"__kernel\n"
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
"int nContacts,\n"
"float scale,\n"
"int N_SPLIT\n"
")\n"
"int nContacts,float scale,int N_SPLIT, int staticIdx)\n"
"\n"
"{\n"
" int gIdx = GET_GLOBAL_IDX;\n"
" \n"
" if( gIdx < nContacts )\n"
" {\n"
" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n"
" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n"
"\n"
" int aIdx = abs(aPtrAndSignBit );\n"
" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n"
"\n"
" int idx = (gContact[gIdx].m_bodyAPtrAndSignBit<0)? bIdx: aIdx;\n"
" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n"
" \n"
" int idx = (aStatic)? bIdx: aIdx;\n"
" float4 p = gBodies[idx].m_pos;\n"
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"