Deterministic simulation for concave and compound collision shapes: added childShapeA/B to b3Contact4 + sort on them

Use tetrahedra instead of barrel for convex demo (until performance for edge-edge is improved)
Increased #overlapping pair capacity from 12 to 16 / objec
This commit is contained in:
erwincoumans
2013-07-17 22:42:50 -07:00
parent ab125fbb6d
commit 733f9027fb
26 changed files with 358 additions and 70 deletions

View File

@@ -29,7 +29,7 @@ struct b3Config
m_maxTriConvexPairCapacity(256*1024)
{
m_maxConvexShapes = m_maxConvexBodies;
m_maxBroadphasePairs = 12*m_maxConvexBodies;
m_maxBroadphasePairs = 16*m_maxConvexBodies;
m_maxContactCapacity = m_maxBroadphasePairs;
}
};

View File

@@ -3,8 +3,9 @@ bool b3GpuBatchContacts = true;
bool b3GpuSolveConstraint = true;
bool gpuRadixSort=true;
bool gpuSetSortData = true;
bool gpuSortContacts = true;
bool optionalSortContactsDeterminism = true;
bool gpuSortContactsDeterminism = true;
#include "b3GpuBatchingPgsSolver.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
@@ -61,6 +62,10 @@ struct b3GpuBatchingPgsSolverInternalData
cl_kernel m_setDeterminismSortDataBodyAKernel;
cl_kernel m_setDeterminismSortDataBodyBKernel;
cl_kernel m_setDeterminismSortDataChildShapeAKernel;
cl_kernel m_setDeterminismSortDataChildShapeBKernel;
class b3RadixSort32CL* m_sort32;
@@ -143,7 +148,9 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
b3Assert(solveFrictionProg);
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
//cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH,true);
b3Assert(solverSetup2Prog);
@@ -168,6 +175,13 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
m_data->m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyB", &pErrNum, solverSetup2Prog,additionalMacros );
b3Assert(m_data->m_setDeterminismSortDataBodyBKernel);
m_data->m_setDeterminismSortDataChildShapeAKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeA", &pErrNum, solverSetup2Prog,additionalMacros );
b3Assert(m_data->m_setDeterminismSortDataChildShapeAKernel);
m_data->m_setDeterminismSortDataChildShapeBKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeB", &pErrNum, solverSetup2Prog,additionalMacros );
b3Assert(m_data->m_setDeterminismSortDataChildShapeBKernel);
m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
b3Assert(m_data->m_reorderContactKernel);
@@ -234,6 +248,13 @@ b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver()
clReleaseKernel( m_data->m_reorderContactKernel);
clReleaseKernel( m_data->m_copyConstraintKernel);
clReleaseKernel(m_data->m_setDeterminismSortDataBodyAKernel);
clReleaseKernel(m_data->m_setDeterminismSortDataBodyBKernel);
clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeAKernel);
clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeBKernel);
delete m_data;
}
@@ -444,7 +465,11 @@ static bool sortfnc(const b3SortData& a,const b3SortData& b)
static bool b3ContactCmp(const b3Contact4& p, const b3Contact4& q)
{
return ((p.m_bodyAPtrAndSignBit<q.m_bodyAPtrAndSignBit) ||
(p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit<q.m_bodyBPtrAndSignBit));
((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit<q.m_bodyBPtrAndSignBit)) ||
((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA<q.m_childIndexA ) ||
((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA<q.m_childIndexA ) ||
((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA==q.m_childIndexA && p.m_childIndexB<q.m_childIndexB)
);
}
@@ -552,7 +577,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (optionalSortContactsDeterminism)
{
if (gpuSortContacts)
if (gpuSortContactsDeterminism)
{
B3_PROFILE("GPU Sort contact constraints (determinism)");
@@ -561,9 +586,24 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);
{
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel);
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel);
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts);
launcher.launch1D( numContacts, 64 );
}
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel);
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts);
launcher.launch1D( numContacts, 64 );
}
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel);
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts);
@@ -573,7 +613,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel);
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel);
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts);
@@ -582,8 +622,6 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
//__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )
{
B3_PROFILE("gpu reorderContactKernel (determinism)");
@@ -886,9 +924,9 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
//int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
//int numBatches = sortConstraintByBatch2( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
//int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU

View File

@@ -513,6 +513,7 @@ struct SolveTask// : public ThreadPool::Task
if (bodyA.m_invMass)
{
b3Assert(usedBodies[aIdx]==0);
usedBodies[aIdx]++;
}
if (m_wgUsedBodies)
{
@@ -537,12 +538,15 @@ struct SolveTask// : public ThreadPool::Task
}
}
}
usedBodies[aIdx]++;
if (bodyB.m_invMass)
{
b3Assert(usedBodies[bIdx]==0);
usedBodies[bIdx]++;
}
usedBodies[bIdx]++;
if( !m_solveFriction )
{

View File

@@ -73,6 +73,12 @@ typedef struct
int m_bodyA;//sign bit set for fixed objects
int m_bodyB;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
}Contact4;
typedef struct

View File

@@ -75,6 +75,12 @@ static const char* batchingKernelsCL= \
"\n"
" int m_bodyA;//sign bit set for fixed objects\n"
" int m_bodyB;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"}Contact4;\n"
"\n"
"typedef struct \n"

View File

@@ -74,6 +74,12 @@ typedef struct
int m_bodyAPtrAndSignBit;//sign bit set for fixed objects
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
}Contact4;
typedef struct

View File

@@ -76,6 +76,12 @@ static const char* batchingKernelsNewCL= \
"\n"
" int m_bodyAPtrAndSignBit;//sign bit set for fixed objects\n"
" int m_bodyBPtrAndSignBit;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"}Contact4;\n"
"\n"
"typedef struct \n"

View File

@@ -357,12 +357,10 @@ __kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* co
int numRows = numConstraintRows[cid];
if (numRows)
{
// printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold);
for (int i=0;i<numRows;i++)
{
int rowIndex = rowOffsets[cid]+i;
float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;
// printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse);
if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)
{
constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;

View File

@@ -359,12 +359,10 @@ static const char* solveConstraintRowsCL= \
" int numRows = numConstraintRows[cid];\n"
" if (numRows)\n"
" {\n"
" // printf(\"cid=%d, breakingThreshold =%f\n\",cid,breakingThreshold);\n"
" for (int i=0;i<numRows;i++)\n"
" {\n"
" int rowIndex = rowOffsets[cid]+i;\n"
" float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;\n"
" // printf(\"rows[%d].m_appliedImpulse=%f\n\",rowIndex,rows[rowIndex].m_appliedImpulse);\n"
" if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)\n"
" {\n"
" constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n"

View File

@@ -213,6 +213,12 @@ typedef struct
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
} Contact4;
typedef struct

View File

@@ -215,6 +215,12 @@ static const char* solveContactCL= \
"\n"
" int m_bodyAPtrAndSignBit;\n"
" int m_bodyBPtrAndSignBit;\n"
" \n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"} Contact4;\n"
"\n"
"typedef struct\n"

View File

@@ -213,6 +213,12 @@ typedef struct
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
} Contact4;
typedef struct

View File

@@ -215,6 +215,12 @@ static const char* solveFrictionCL= \
"\n"
" int m_bodyAPtrAndSignBit;\n"
" int m_bodyBPtrAndSignBit;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"} Contact4;\n"
"\n"
"typedef struct\n"

View File

@@ -412,6 +412,12 @@ typedef struct
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
} Contact4;
typedef struct

View File

@@ -414,6 +414,12 @@ static const char* solverSetupCL= \
"\n"
" int m_bodyAPtrAndSignBit;\n"
" int m_bodyBPtrAndSignBit;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"} Contact4;\n"
"\n"
"typedef struct\n"

View File

@@ -386,6 +386,12 @@ typedef struct
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
} Contact4;
typedef struct
@@ -441,22 +447,53 @@ void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __globa
}
}
__kernel
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)
{
int gIdx = GET_GLOBAL_IDX;
if( gIdx < nContacts )
{
int2 sd;
sd.x = contactsIn[gIdx].m_bodyAPtrAndSignBit;
sd.x = contactsIn[gIdx].m_childIndexB;
sd.y = gIdx;
sortDataOut[gIdx] = sd;
}
}
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
{
int gIdx = GET_GLOBAL_IDX;
if( gIdx < nContacts )
{
int2 sdIn;
sdIn = sortDataInOut[gIdx];
int2 sdOut;
sdOut.x = contactsIn[sdIn.y].m_childIndexA;
sdOut.y = sdIn.y;
sortDataInOut[gIdx] = sdOut;
}
}
__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
{
int gIdx = GET_GLOBAL_IDX;
if( gIdx < nContacts )
{
int2 sdIn;
sdIn = sortDataInOut[gIdx];
int2 sdOut;
sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;
sdOut.y = sdIn.y;
sortDataInOut[gIdx] = sdOut;
}
}
__kernel
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)

View File

@@ -388,6 +388,12 @@ static const char* solverSetup2CL= \
"\n"
" int m_bodyAPtrAndSignBit;\n"
" int m_bodyBPtrAndSignBit;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"} Contact4;\n"
"\n"
"typedef struct\n"
@@ -443,22 +449,53 @@ static const char* solverSetup2CL= \
" }\n"
"}\n"
"\n"
"\n"
"__kernel\n"
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)\n"
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetDeterminismSortDataChildShapeA(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)\n"
"{\n"
" int gIdx = GET_GLOBAL_IDX;\n"
"\n"
" if( gIdx < nContacts )\n"
" {\n"
" int2 sd;\n"
" sd.x = contactsIn[gIdx].m_bodyAPtrAndSignBit;\n"
" sd.x = contactsIn[gIdx].m_childIndexA;\n"
" sd.y = gIdx;\n"
" sortDataOut[gIdx] = sd;\n"
" }\n"
"}\n"
"\n"
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetDeterminismSortDataChildShapeB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
"{\n"
" int gIdx = GET_GLOBAL_IDX;\n"
"\n"
" if( gIdx < nContacts )\n"
" {\n"
" int2 sdIn;\n"
" sdIn = sortDataInOut[gIdx];\n"
" int2 sdOut;\n"
" sdOut.x = contactsIn[sdIn.y].m_childIndexB;\n"
" sdOut.y = sdIn.y;\n"
" sortDataInOut[gIdx] = sdOut;\n"
" }\n"
"}\n"
"\n"
"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
"{\n"
" int gIdx = GET_GLOBAL_IDX;\n"
"\n"
" if( gIdx < nContacts )\n"
" {\n"
" int2 sdIn;\n"
" sdIn = sortDataInOut[gIdx];\n"
" int2 sdOut;\n"
" sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n"
" sdOut.y = sdIn.y;\n"
" sortDataInOut[gIdx] = sdOut;\n"
" }\n"
"}\n"
"\n"
"\n"
"__kernel\n"
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
"void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"

View File

@@ -389,6 +389,12 @@ typedef struct
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;
} Contact4;

View File

@@ -391,6 +391,12 @@ static const char* solverUtilsCL= \
"\n"
" int m_bodyAPtrAndSignBit;\n"
" int m_bodyBPtrAndSignBit;\n"
"\n"
" int m_childIndexA;\n"
" int m_childIndexB;\n"
" int m_unused1;\n"
" int m_unused2;\n"
"\n"
"} Contact4;\n"
"\n"
"\n"