made the simulation deterministic
disable 'simdwidth' optimization for determinism (need to double-check) made the spatial batching 3D
This commit is contained in:
@@ -40,7 +40,10 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
|
|||||||
//float camPos[4]={1,12.5,1.5,0};
|
//float camPos[4]={1,12.5,1.5,0};
|
||||||
|
|
||||||
m_instancingRenderer->setCameraTargetPosition(camPos);
|
m_instancingRenderer->setCameraTargetPosition(camPos);
|
||||||
m_instancingRenderer->setCameraDistance(100);
|
m_instancingRenderer->setCameraDistance(120);
|
||||||
|
//m_instancingRenderer->setCameraYaw(85);
|
||||||
|
m_instancingRenderer->setCameraYaw(30);
|
||||||
|
m_instancingRenderer->setCameraPitch(225);
|
||||||
|
|
||||||
|
|
||||||
m_instancingRenderer->updateCamera();
|
m_instancingRenderer->updateCamera();
|
||||||
|
|||||||
@@ -196,6 +196,9 @@ struct InternalDataRenderer : public GLInstanceRendererInternalData
|
|||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//printf("m_azi/pitch = %f\n", m_azi);
|
||||||
|
// printf("m_ele/yaw = %f\n", m_ele);
|
||||||
|
|
||||||
m_mouseXpos = x;
|
m_mouseXpos = x;
|
||||||
m_mouseYpos = y;
|
m_mouseYpos = y;
|
||||||
m_mouseInitialized = true;
|
m_mouseInitialized = true;
|
||||||
|
|||||||
@@ -6,8 +6,17 @@
|
|||||||
|
|
||||||
struct b3SortData
|
struct b3SortData
|
||||||
{
|
{
|
||||||
int m_key;
|
union
|
||||||
int m_value;
|
{
|
||||||
|
int m_key;
|
||||||
|
int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
int m_value;
|
||||||
|
int y;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
#include "b3BufferInfoCL.h"
|
#include "b3BufferInfoCL.h"
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
|
|
||||||
bool b3GpuBatchContacts = true;
|
bool b3GpuBatchContacts = true;
|
||||||
bool b3GpuSolveConstraint = true;
|
bool b3GpuSolveConstraint = true;
|
||||||
|
bool gpuRadixSort=true;
|
||||||
|
bool gpuSetSortData = true;
|
||||||
|
bool gpuSortContacts = true;
|
||||||
|
bool optionalSortContactsDeterminism = true;
|
||||||
|
|
||||||
#include "b3GpuBatchingPgsSolver.h"
|
#include "b3GpuBatchingPgsSolver.h"
|
||||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||||
@@ -31,15 +34,6 @@ bool b3GpuSolveConstraint = true;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
B3_SOLVER_N_SPLIT = 16,
|
|
||||||
B3_SOLVER_N_BATCHES = 4,
|
|
||||||
B3_SOLVER_N_OBJ_PER_SPLIT = 10,
|
|
||||||
B3_SOLVER_N_TASKS_PER_BATCH = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct b3GpuBatchingPgsSolverInternalData
|
struct b3GpuBatchingPgsSolverInternalData
|
||||||
@@ -65,6 +59,10 @@ struct b3GpuBatchingPgsSolverInternalData
|
|||||||
cl_kernel m_reorderContactKernel;
|
cl_kernel m_reorderContactKernel;
|
||||||
cl_kernel m_copyConstraintKernel;
|
cl_kernel m_copyConstraintKernel;
|
||||||
|
|
||||||
|
cl_kernel m_setDeterminismSortDataBodyAKernel;
|
||||||
|
cl_kernel m_setDeterminismSortDataBodyBKernel;
|
||||||
|
|
||||||
|
|
||||||
class b3RadixSort32CL* m_sort32;
|
class b3RadixSort32CL* m_sort32;
|
||||||
class b3BoundSearchCL* m_search;
|
class b3BoundSearchCL* m_search;
|
||||||
class b3PrefixScanCL* m_scan;
|
class b3PrefixScanCL* m_scan;
|
||||||
@@ -76,6 +74,9 @@ struct b3GpuBatchingPgsSolverInternalData
|
|||||||
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
|
b3OpenCLArray<b3InertiaCL>* m_inertiaBufferGPU;
|
||||||
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU;
|
||||||
|
|
||||||
|
b3OpenCLArray<b3Contact4>* m_pBufContactOutGPUCopy;
|
||||||
|
b3OpenCLArray<b3SortData>* m_contactKeyValues;
|
||||||
|
|
||||||
|
|
||||||
b3AlignedObjectArray<unsigned int> m_idxBuffer;
|
b3AlignedObjectArray<unsigned int> m_idxBuffer;
|
||||||
b3AlignedObjectArray<b3SortData> m_sortData;
|
b3AlignedObjectArray<b3SortData> m_sortData;
|
||||||
@@ -86,6 +87,7 @@ struct b3GpuBatchingPgsSolverInternalData
|
|||||||
|
|
||||||
b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity)
|
b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity)
|
||||||
{
|
{
|
||||||
|
m_debugOutput=0;
|
||||||
m_data = new b3GpuBatchingPgsSolverInternalData;
|
m_data = new b3GpuBatchingPgsSolverInternalData;
|
||||||
m_data->m_context = ctx;
|
m_data->m_context = ctx;
|
||||||
m_data->m_device = device;
|
m_data->m_device = device;
|
||||||
@@ -97,24 +99,28 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,q);
|
m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaCL>(ctx,q);
|
||||||
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q);
|
m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q);
|
||||||
|
|
||||||
|
m_data->m_pBufContactOutGPUCopy = new b3OpenCLArray<b3Contact4>(ctx,q);
|
||||||
|
m_data->m_contactKeyValues = new b3OpenCLArray<b3SortData>(ctx,q);
|
||||||
|
|
||||||
|
|
||||||
m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024);
|
m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024);
|
||||||
|
|
||||||
m_data->m_sort32 = new b3RadixSort32CL(ctx,device,m_data->m_queue);
|
m_data->m_sort32 = new b3RadixSort32CL(ctx,device,m_data->m_queue);
|
||||||
m_data->m_scan = new b3PrefixScanCL(ctx,device,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
m_data->m_scan = new b3PrefixScanCL(ctx,device,m_data->m_queue,B3_SOLVER_N_CELLS);
|
||||||
m_data->m_search = new b3BoundSearchCL(ctx,device,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
m_data->m_search = new b3BoundSearchCL(ctx,device,m_data->m_queue,B3_SOLVER_N_CELLS);
|
||||||
|
|
||||||
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||||
|
|
||||||
m_data->m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,m_data->m_queue,sortSize);
|
m_data->m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,m_data->m_queue,sortSize);
|
||||||
m_data->m_contactBuffer = new b3OpenCLArray<b3Contact4>(ctx,m_data->m_queue);
|
m_data->m_contactBuffer = new b3OpenCLArray<b3Contact4>(ctx,m_data->m_queue);
|
||||||
|
|
||||||
m_data->m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,m_data->m_queue,B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT );
|
m_data->m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,m_data->m_queue,B3_SOLVER_N_CELLS);
|
||||||
m_data->m_numConstraints->resize(B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
m_data->m_numConstraints->resize(B3_SOLVER_N_CELLS);
|
||||||
|
|
||||||
m_data->m_contactCGPU = new b3OpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
|
m_data->m_contactCGPU = new b3OpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
|
||||||
|
|
||||||
m_data->m_offsets = new b3OpenCLArray<unsigned int>( ctx,m_data->m_queue, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT );
|
m_data->m_offsets = new b3OpenCLArray<unsigned int>( ctx,m_data->m_queue,B3_SOLVER_N_CELLS);
|
||||||
m_data->m_offsets->resize(B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);
|
m_data->m_offsets->resize(B3_SOLVER_N_CELLS);
|
||||||
const char* additionalMacros = "";
|
const char* additionalMacros = "";
|
||||||
const char* srcFileNameForCaching="";
|
const char* srcFileNameForCaching="";
|
||||||
|
|
||||||
@@ -132,7 +138,7 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH);
|
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH,true);
|
||||||
b3Assert(solveContactProg);
|
b3Assert(solveContactProg);
|
||||||
|
|
||||||
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
|
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
|
||||||
@@ -149,7 +155,7 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
|
||||||
b3Assert(m_data->m_solveFrictionKernel);
|
b3Assert(m_data->m_solveFrictionKernel);
|
||||||
|
|
||||||
m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, 0, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
|
||||||
b3Assert(m_data->m_solveContactKernel);
|
b3Assert(m_data->m_solveContactKernel);
|
||||||
|
|
||||||
m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
|
||||||
@@ -158,6 +164,12 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||||
b3Assert(m_data->m_setSortDataKernel);
|
b3Assert(m_data->m_setSortDataKernel);
|
||||||
|
|
||||||
|
m_data->m_setDeterminismSortDataBodyAKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyA", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||||
|
b3Assert(m_data->m_setDeterminismSortDataBodyAKernel);
|
||||||
|
|
||||||
|
m_data->m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyB", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||||
|
b3Assert(m_data->m_setDeterminismSortDataBodyBKernel);
|
||||||
|
|
||||||
m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
|
||||||
b3Assert(m_data->m_reorderContactKernel);
|
b3Assert(m_data->m_reorderContactKernel);
|
||||||
|
|
||||||
@@ -196,6 +208,10 @@ b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver()
|
|||||||
delete m_data->m_bodyBufferGPU;
|
delete m_data->m_bodyBufferGPU;
|
||||||
delete m_data->m_inertiaBufferGPU;
|
delete m_data->m_inertiaBufferGPU;
|
||||||
delete m_data->m_pBufContactOutGPU;
|
delete m_data->m_pBufContactOutGPU;
|
||||||
|
delete m_data->m_pBufContactOutGPUCopy;
|
||||||
|
delete m_data->m_contactKeyValues;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
delete m_data->m_contactCGPU;
|
delete m_data->m_contactCGPU;
|
||||||
delete m_data->m_numConstraints;
|
delete m_data->m_numConstraints;
|
||||||
@@ -232,23 +248,24 @@ struct b3ConstraintCfg
|
|||||||
float m_positionConstraintCoeff;
|
float m_positionConstraintCoeff;
|
||||||
float m_dt;
|
float m_dt;
|
||||||
bool m_enableParallelSolve;
|
bool m_enableParallelSolve;
|
||||||
float m_averageExtent;
|
float m_batchCellSize;
|
||||||
int m_staticIdx;
|
int m_staticIdx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
|
||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
//sort the contacts
|
||||||
|
|
||||||
|
|
||||||
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
||||||
{
|
{
|
||||||
|
|
||||||
const int nn = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
const int nn = B3_SOLVER_N_CELLS;
|
||||||
|
|
||||||
cdata.x = 0;
|
cdata.x = 0;
|
||||||
cdata.y = maxNumBatches;//250;
|
cdata.y = maxNumBatches;//250;
|
||||||
@@ -276,7 +293,7 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
|
|
||||||
|
|
||||||
cdata.z = ib;
|
cdata.z = ib;
|
||||||
cdata.w = B3_SOLVER_N_SPLIT;
|
|
||||||
|
|
||||||
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel );
|
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel );
|
||||||
#if 1
|
#if 1
|
||||||
@@ -286,8 +303,8 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||||
b3BufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
b3BufferInfoCL( m_data->m_solverGPU->m_numConstraints->getBufferCL() ),
|
||||||
b3BufferInfoCL( m_data->m_offsets->getBufferCL() )
|
b3BufferInfoCL( m_data->m_solverGPU->m_offsets->getBufferCL() )
|
||||||
#ifdef DEBUG_ME
|
#ifdef DEBUG_ME
|
||||||
, b3BufferInfoCL(&gpuDebugInfo)
|
, b3BufferInfoCL(&gpuDebugInfo)
|
||||||
#endif
|
#endif
|
||||||
@@ -299,7 +316,12 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
//launcher.setConst( cdata.x );
|
//launcher.setConst( cdata.x );
|
||||||
launcher.setConst( cdata.y );
|
launcher.setConst( cdata.y );
|
||||||
launcher.setConst( cdata.z );
|
launcher.setConst( cdata.z );
|
||||||
launcher.setConst( cdata.w );
|
b3Int4 nSplit;
|
||||||
|
nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
|
launcher.setConst( nSplit );
|
||||||
launcher.launch1D( numWorkItems, 64 );
|
launcher.launch1D( numWorkItems, 64 );
|
||||||
|
|
||||||
|
|
||||||
@@ -366,14 +388,14 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
||||||
{
|
{
|
||||||
cdata.z = ib;
|
cdata.z = ib;
|
||||||
cdata.w = B3_SOLVER_N_SPLIT;
|
|
||||||
|
|
||||||
b3BufferInfoCL bInfo[] = {
|
b3BufferInfoCL bInfo[] = {
|
||||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||||
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
b3BufferInfoCL( shapeBuf->getBufferCL() ),
|
||||||
b3BufferInfoCL( constraint->getBufferCL() ),
|
b3BufferInfoCL( constraint->getBufferCL() ),
|
||||||
b3BufferInfoCL( m_data->m_numConstraints->getBufferCL() ),
|
b3BufferInfoCL( m_data->m_solverGPU->m_numConstraints->getBufferCL() ),
|
||||||
b3BufferInfoCL( m_data->m_offsets->getBufferCL() )
|
b3BufferInfoCL( m_data->m_solverGPU->m_offsets->getBufferCL() )
|
||||||
#ifdef DEBUG_ME
|
#ifdef DEBUG_ME
|
||||||
,b3BufferInfoCL(&gpuDebugInfo)
|
,b3BufferInfoCL(&gpuDebugInfo)
|
||||||
#endif //DEBUG_ME
|
#endif //DEBUG_ME
|
||||||
@@ -383,7 +405,13 @@ void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray<b3Rigi
|
|||||||
//launcher.setConst( cdata.x );
|
//launcher.setConst( cdata.x );
|
||||||
launcher.setConst( cdata.y );
|
launcher.setConst( cdata.y );
|
||||||
launcher.setConst( cdata.z );
|
launcher.setConst( cdata.z );
|
||||||
launcher.setConst( cdata.w );
|
|
||||||
|
b3Int4 nSplit;
|
||||||
|
nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
|
launcher.setConst( nSplit );
|
||||||
|
|
||||||
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
||||||
}
|
}
|
||||||
@@ -414,14 +442,196 @@ static bool sortfnc(const b3SortData& a,const b3SortData& b)
|
|||||||
return (a.m_key<b.m_key);
|
return (a.m_key<b.m_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool b3ContactCmp(const b3Contact4& p, const b3Contact4& q)
|
||||||
|
{
|
||||||
|
return ((p.m_bodyAPtrAndSignBit<q.m_bodyAPtrAndSignBit) ||
|
||||||
|
(p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit<q.m_bodyBPtrAndSignBit));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static const int gridTable4x4[] =
|
||||||
|
{
|
||||||
|
0,1,17,16,
|
||||||
|
1,2,18,19,
|
||||||
|
17,18,32,3,
|
||||||
|
16,19,3,34
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int gridTable8x8[] =
|
||||||
|
{
|
||||||
|
0, 2, 3, 16, 17, 18, 19, 1,
|
||||||
|
66, 64, 80, 67, 82, 81, 65, 83,
|
||||||
|
131,144,128,130,147,129,145,146,
|
||||||
|
208,195,194,192,193,211,210,209,
|
||||||
|
21, 22, 23, 5, 4, 6, 7, 20,
|
||||||
|
86, 85, 69, 87, 70, 68, 84, 71,
|
||||||
|
151,133,149,150,135,148,132,134,
|
||||||
|
197,27,214,213,212,199,198,196
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define USE_SPATIAL_BATCHING 1
|
||||||
|
#define USE_4x4_GRID 1
|
||||||
|
|
||||||
|
|
||||||
|
void SetSortDataCPU(b3Contact4* gContact, b3RigidBodyCL* gBodies, b3SortData* gSortDataOut, int nContacts,float scale,const b3Int4& nSplit,int staticIdx)
|
||||||
|
{
|
||||||
|
for (int gIdx=0;gIdx<nContacts;gIdx++)
|
||||||
|
{
|
||||||
|
if( gIdx < nContacts )
|
||||||
|
{
|
||||||
|
int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;
|
||||||
|
int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;
|
||||||
|
|
||||||
|
int aIdx = abs(aPtrAndSignBit );
|
||||||
|
int bIdx = abs(bPtrAndSignBit);
|
||||||
|
|
||||||
|
bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);
|
||||||
|
bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);
|
||||||
|
|
||||||
|
#if USE_SPATIAL_BATCHING
|
||||||
|
int idx = (aStatic)? bIdx: aIdx;
|
||||||
|
b3Vector3 p = gBodies[idx].m_pos;
|
||||||
|
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);
|
||||||
|
int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);
|
||||||
|
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);
|
||||||
|
|
||||||
|
int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);
|
||||||
|
|
||||||
|
#else//USE_SPATIAL_BATCHING
|
||||||
|
#if USE_4x4_GRID
|
||||||
|
int aa = aIdx&3;
|
||||||
|
int bb = bIdx&3;
|
||||||
|
if (aStatic)
|
||||||
|
aa = bb;
|
||||||
|
if (bStatic)
|
||||||
|
bb = aa;
|
||||||
|
|
||||||
|
int gridIndex = aa + bb*4;
|
||||||
|
int newIndex = gridTable4x4[gridIndex];
|
||||||
|
#else//USE_4x4_GRID
|
||||||
|
int aa = aIdx&7;
|
||||||
|
int bb = bIdx&7;
|
||||||
|
if (aStatic)
|
||||||
|
aa = bb;
|
||||||
|
if (bStatic)
|
||||||
|
bb = aa;
|
||||||
|
|
||||||
|
int gridIndex = aa + bb*8;
|
||||||
|
int newIndex = gridTable8x8[gridIndex];
|
||||||
|
#endif//USE_4x4_GRID
|
||||||
|
#endif//USE_SPATIAL_BATCHING
|
||||||
|
|
||||||
|
|
||||||
|
gSortDataOut[gIdx].x = newIndex;
|
||||||
|
gSortDataOut[gIdx].y = gIdx;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gSortDataOut[gIdx].x = 0xffffffff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
|
void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
|
||||||
{
|
{
|
||||||
|
B3_PROFILE("solveContacts");
|
||||||
m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
|
m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
|
||||||
m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
|
m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
|
||||||
m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);
|
m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);
|
||||||
|
|
||||||
|
if (optionalSortContactsDeterminism)
|
||||||
|
{
|
||||||
|
if (gpuSortContacts)
|
||||||
|
{
|
||||||
|
B3_PROFILE("GPU Sort contact constraints (determinism)");
|
||||||
|
|
||||||
|
m_data->m_pBufContactOutGPUCopy->resize(numContacts);
|
||||||
|
m_data->m_contactKeyValues->resize(numContacts);
|
||||||
|
|
||||||
|
m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel);
|
||||||
|
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||||
|
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||||
|
launcher.setConst(numContacts);
|
||||||
|
launcher.launch1D( numContacts, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
|
||||||
|
|
||||||
|
{
|
||||||
|
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel);
|
||||||
|
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||||
|
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||||
|
launcher.setConst(numContacts);
|
||||||
|
launcher.launch1D( numContacts, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
|
||||||
|
|
||||||
|
//__global Contact4* in, __global Contact4* out, __global int2* sortData, int4 cb )
|
||||||
|
|
||||||
|
{
|
||||||
|
B3_PROFILE("gpu reorderContactKernel (determinism)");
|
||||||
|
|
||||||
|
b3Int4 cdata;
|
||||||
|
cdata.x = numContacts;
|
||||||
|
|
||||||
|
//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||||
|
// , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||||
|
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||||
|
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
|
||||||
|
launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
|
||||||
|
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
|
||||||
|
launcher.setConst( cdata );
|
||||||
|
launcher.launch1D( numContacts, 64 );
|
||||||
|
}
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
B3_PROFILE("CPU Sort contact constraints (determinism)");
|
||||||
|
b3AlignedObjectArray<b3Contact4> cpuConstraints;
|
||||||
|
m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints);
|
||||||
|
bool sort = true;
|
||||||
|
if (sort)
|
||||||
|
{
|
||||||
|
cpuConstraints.quickSort(b3ContactCmp);
|
||||||
|
|
||||||
|
for (int i=0;i<cpuConstraints.size();i++)
|
||||||
|
{
|
||||||
|
cpuConstraints[i].m_batchIdx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints);
|
||||||
|
if (m_debugOutput==100)
|
||||||
|
{
|
||||||
|
for (int i=0;i<cpuConstraints.size();i++)
|
||||||
|
{
|
||||||
|
printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_debugOutput++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int nContactOut = m_data->m_pBufContactOutGPU->size();
|
int nContactOut = m_data->m_pBufContactOutGPU->size();
|
||||||
|
|
||||||
bool useSolver = true;
|
bool useSolver = true;
|
||||||
@@ -431,7 +641,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
float dt=1./60.;
|
float dt=1./60.;
|
||||||
b3ConstraintCfg csCfg( dt );
|
b3ConstraintCfg csCfg( dt );
|
||||||
csCfg.m_enableParallelSolve = true;
|
csCfg.m_enableParallelSolve = true;
|
||||||
csCfg.m_averageExtent = 0.3;//0.1;//2;//.2f;//@TODO m_averageObjExtent;
|
csCfg.m_batchCellSize = 6;
|
||||||
csCfg.m_staticIdx = static0Index;
|
csCfg.m_staticIdx = static0Index;
|
||||||
|
|
||||||
|
|
||||||
@@ -486,6 +696,8 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||||
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
|
||||||
|
|
||||||
|
|
||||||
|
if (gpuSetSortData)
|
||||||
{ // 2. set cell idx
|
{ // 2. set cell idx
|
||||||
B3_PROFILE("GPU set cell idx");
|
B3_PROFILE("GPU set cell idx");
|
||||||
struct CB
|
struct CB
|
||||||
@@ -493,15 +705,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
int m_nContacts;
|
int m_nContacts;
|
||||||
int m_staticIdx;
|
int m_staticIdx;
|
||||||
float m_scale;
|
float m_scale;
|
||||||
int m_nSplit;
|
b3Int4 m_nSplit;
|
||||||
};
|
};
|
||||||
|
|
||||||
b3Assert( sortSize%64 == 0 );
|
b3Assert( sortSize%64 == 0 );
|
||||||
CB cdata;
|
CB cdata;
|
||||||
cdata.m_nContacts = nContacts;
|
cdata.m_nContacts = nContacts;
|
||||||
cdata.m_staticIdx = csCfg.m_staticIdx;
|
cdata.m_staticIdx = csCfg.m_staticIdx;
|
||||||
cdata.m_scale = 1.f/(B3_SOLVER_N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
|
cdata.m_scale = 1.f/csCfg.m_batchCellSize;
|
||||||
cdata.m_nSplit = B3_SOLVER_N_SPLIT;
|
cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
||||||
|
|
||||||
@@ -516,10 +730,30 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
|
|
||||||
launcher.launch1D( sortSize, 64 );
|
launcher.launch1D( sortSize, 64 );
|
||||||
}
|
} else
|
||||||
|
{
|
||||||
|
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
||||||
|
b3AlignedObjectArray<b3SortData> sortDataCPU;
|
||||||
|
m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU);
|
||||||
|
|
||||||
|
b3AlignedObjectArray<b3Contact4> contactCPU;
|
||||||
|
m_data->m_pBufContactOutGPU->copyToHost(contactCPU);
|
||||||
|
b3AlignedObjectArray<b3RigidBodyCL> bodiesCPU;
|
||||||
|
bodyBuf->copyToHost(bodiesCPU);
|
||||||
|
float scale = 1.f/csCfg.m_batchCellSize;
|
||||||
|
b3Int4 nSplit;
|
||||||
|
nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
|
SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx);
|
||||||
|
|
||||||
|
|
||||||
|
m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool gpuRadixSort=true;
|
|
||||||
if (gpuRadixSort)
|
if (gpuRadixSort)
|
||||||
{ // 3. sort by cell idx
|
{ // 3. sort by cell idx
|
||||||
B3_PROFILE("gpuRadixSort");
|
B3_PROFILE("gpuRadixSort");
|
||||||
@@ -543,19 +777,19 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
keyValuesInOut.copyFromHost(hostValues);
|
keyValuesInOut.copyFromHost(hostValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
// 4. find entries
|
// 4. find entries
|
||||||
B3_PROFILE("gpuBoundSearch");
|
B3_PROFILE("gpuBoundSearch");
|
||||||
|
|
||||||
m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,
|
m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
|
||||||
B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT,b3BoundSearchCL::COUNT);
|
|
||||||
|
|
||||||
|
|
||||||
//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
|
//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
|
||||||
// B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );
|
// B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );
|
||||||
|
|
||||||
//unsigned int sum;
|
//unsigned int sum;
|
||||||
m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT);//,&sum );
|
m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
|
||||||
//printf("sum = %d\n",sum);
|
//printf("sum = %d\n",sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -640,7 +874,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
{
|
{
|
||||||
B3_PROFILE("batch grid");
|
B3_PROFILE("batch grid");
|
||||||
for(int i=0; i<B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; i++)
|
for(int i=0; i<B3_SOLVER_N_CELLS; i++)
|
||||||
{
|
{
|
||||||
int n = (nNativeHost)[i];
|
int n = (nNativeHost)[i];
|
||||||
int offset = (offsetsNativeHost)[i];
|
int offset = (offsetsNativeHost)[i];
|
||||||
@@ -648,12 +882,12 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
if( n )
|
if( n )
|
||||||
{
|
{
|
||||||
numNonzeroGrid++;
|
numNonzeroGrid++;
|
||||||
//printf("cpu batch\n");
|
//printf("cpu batch cell %d\n",i);
|
||||||
|
|
||||||
|
|
||||||
int simdWidth =64;//-1;//32;
|
int simdWidth =numBodies+1;//-1;//64;//-1;//32;
|
||||||
//int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||||
int numBatches = sortConstraintByBatch2( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
//int numBatches = sortConstraintByBatch2( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||||
//int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
//int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||||
|
|
||||||
|
|
||||||
@@ -704,17 +938,28 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
m_data->m_solverGPU->m_nIterations = 4;//10
|
int numIter = 4;
|
||||||
|
|
||||||
|
m_data->m_solverGPU->m_nIterations = numIter;//10
|
||||||
if (b3GpuSolveConstraint)
|
if (b3GpuSolveConstraint)
|
||||||
{
|
{
|
||||||
B3_PROFILE("GPU solveContactConstraint");
|
B3_PROFILE("GPU solveContactConstraint");
|
||||||
|
|
||||||
m_data->m_solverGPU->solveContactConstraint(
|
/*m_data->m_solverGPU->solveContactConstraint(
|
||||||
m_data->m_bodyBufferGPU,
|
m_data->m_bodyBufferGPU,
|
||||||
m_data->m_inertiaBufferGPU,
|
m_data->m_inertiaBufferGPU,
|
||||||
m_data->m_contactCGPU,0,
|
m_data->m_contactCGPU,0,
|
||||||
nContactOut ,
|
nContactOut ,
|
||||||
maxNumBatches);
|
maxNumBatches);
|
||||||
|
*/
|
||||||
|
|
||||||
|
solveContactConstraint(
|
||||||
|
m_data->m_bodyBufferGPU,
|
||||||
|
m_data->m_inertiaBufferGPU,
|
||||||
|
m_data->m_contactCGPU,0,
|
||||||
|
nContactOut ,
|
||||||
|
maxNumBatches,numIter);
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -803,6 +1048,8 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
|
|||||||
for(int i=0; i<nIdxSrc; i++)
|
for(int i=0; i<nIdxSrc; i++)
|
||||||
{
|
{
|
||||||
int idx = idxSrc[i];
|
int idx = idxSrc[i];
|
||||||
|
|
||||||
|
|
||||||
b3Assert( idx < n );
|
b3Assert( idx < n );
|
||||||
// check if it can go
|
// check if it can go
|
||||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class b3GpuBatchingPgsSolver
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
int m_debugOutput;
|
||||||
|
|
||||||
struct b3GpuBatchingPgsSolverInternalData* m_data;
|
struct b3GpuBatchingPgsSolverInternalData* m_data;
|
||||||
|
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
|||||||
|
|
||||||
//update worldspace AABBs from local AABB/worldtransform
|
//update worldspace AABBs from local AABB/worldtransform
|
||||||
{
|
{
|
||||||
|
B3_PROFILE("setupGpuAabbs");
|
||||||
setupGpuAabbsFull();
|
setupGpuAabbsFull();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ subject to the following restrictions:
|
|||||||
#include "b3Solver.h"
|
#include "b3Solver.h"
|
||||||
|
|
||||||
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
|
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
|
||||||
bool useNewBatchingKernel = true;
|
bool useNewBatchingKernel = false;//true;
|
||||||
|
|
||||||
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
|
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
|
||||||
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"
|
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"
|
||||||
@@ -91,19 +91,19 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
|
|||||||
m_queue(queue)
|
m_queue(queue)
|
||||||
{
|
{
|
||||||
m_sort32 = new b3RadixSort32CL(ctx,device,queue);
|
m_sort32 = new b3RadixSort32CL(ctx,device,queue);
|
||||||
m_scan = new b3PrefixScanCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
m_scan = new b3PrefixScanCL(ctx,device,queue,B3_SOLVER_N_CELLS);
|
||||||
m_search = new b3BoundSearchCL(ctx,device,queue,N_SPLIT*N_SPLIT);
|
m_search = new b3BoundSearchCL(ctx,device,queue,B3_SOLVER_N_CELLS);
|
||||||
|
|
||||||
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
|
||||||
|
|
||||||
m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize);
|
m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize);
|
||||||
m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue);
|
m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue);
|
||||||
|
|
||||||
m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,B3_SOLVER_N_CELLS );
|
||||||
m_numConstraints->resize(N_SPLIT*N_SPLIT);
|
m_numConstraints->resize(B3_SOLVER_N_CELLS);
|
||||||
|
|
||||||
m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue, N_SPLIT*N_SPLIT );
|
m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue,B3_SOLVER_N_CELLS);
|
||||||
m_offsets->resize(N_SPLIT*N_SPLIT);
|
m_offsets->resize(B3_SOLVER_N_CELLS);
|
||||||
const char* additionalMacros = "";
|
const char* additionalMacros = "";
|
||||||
const char* srcFileNameForCaching="";
|
const char* srcFileNameForCaching="";
|
||||||
|
|
||||||
@@ -122,7 +122,7 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
|
|||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH);
|
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH,false);
|
||||||
b3Assert(solveContactProg);
|
b3Assert(solveContactProg);
|
||||||
|
|
||||||
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
|
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
|
||||||
@@ -168,8 +168,8 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
|
|||||||
cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH);
|
cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH);
|
||||||
b3Assert(batchingNewProg);
|
b3Assert(batchingNewProg);
|
||||||
|
|
||||||
m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||||
//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
|
m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
|
||||||
b3Assert(m_batchingKernelNew);
|
b3Assert(m_batchingKernelNew);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -454,69 +454,160 @@ void solveContact(b3GpuConstraint4& cs,
|
|||||||
struct SolveTask// : public ThreadPool::Task
|
struct SolveTask// : public ThreadPool::Task
|
||||||
{
|
{
|
||||||
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<b3InertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
SolveTask(b3AlignedObjectArray<b3RigidBodyCL>& bodies, b3AlignedObjectArray<b3InertiaCL>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
|
||||||
int start, int nConstraints)
|
int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx)
|
||||||
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
|
||||||
m_solveFriction( true ){}
|
m_solveFriction( true ),m_maxNumBatches(maxNumBatches),
|
||||||
|
m_wgUsedBodies(wgUsedBodies),m_curWgidx(curWgidx)
|
||||||
|
{}
|
||||||
|
|
||||||
unsigned short int getType(){ return 0; }
|
unsigned short int getType(){ return 0; }
|
||||||
|
|
||||||
void run(int tIdx)
|
void run(int tIdx)
|
||||||
{
|
{
|
||||||
|
b3AlignedObjectArray<int> usedBodies;
|
||||||
|
//printf("run..............\n");
|
||||||
|
|
||||||
|
|
||||||
for(int ic=0; ic<m_nConstraints; ic++)
|
for (int bb=0;bb<m_maxNumBatches;bb++)
|
||||||
{
|
{
|
||||||
int i = m_start + ic;
|
usedBodies.resize(0);
|
||||||
|
for(int ic=m_nConstraints-1; ic>=0; ic--)
|
||||||
float frictionCoeff = m_constraints[i].getFrictionCoeff();
|
//for(int ic=0; ic<m_nConstraints; ic++)
|
||||||
int aIdx = (int)m_constraints[i].m_bodyA;
|
|
||||||
int bIdx = (int)m_constraints[i].m_bodyB;
|
|
||||||
b3RigidBodyCL& bodyA = m_bodies[aIdx];
|
|
||||||
b3RigidBodyCL& bodyB = m_bodies[bIdx];
|
|
||||||
|
|
||||||
if( !m_solveFriction )
|
|
||||||
{
|
{
|
||||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
|
||||||
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
|
||||||
|
|
||||||
solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
|
int i = m_start + ic;
|
||||||
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
|
if (m_constraints[i].m_batchIdx != bb)
|
||||||
maxRambdaDt, minRambdaDt );
|
continue;
|
||||||
|
|
||||||
}
|
float frictionCoeff = m_constraints[i].getFrictionCoeff();
|
||||||
else
|
int aIdx = (int)m_constraints[i].m_bodyA;
|
||||||
{
|
int bIdx = (int)m_constraints[i].m_bodyB;
|
||||||
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
int localBatch = m_constraints[i].m_batchIdx;
|
||||||
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
b3RigidBodyCL& bodyA = m_bodies[aIdx];
|
||||||
|
b3RigidBodyCL& bodyB = m_bodies[bIdx];
|
||||||
|
|
||||||
float sum = 0;
|
if ((bodyA.m_invMass) && (bodyB.m_invMass))
|
||||||
for(int j=0; j<4; j++)
|
|
||||||
{
|
{
|
||||||
sum +=m_constraints[i].m_appliedRambdaDt[j];
|
// printf("aIdx=%d, bIdx=%d\n", aIdx,bIdx);
|
||||||
}
|
}
|
||||||
frictionCoeff = 0.7f;
|
if (bIdx==10)
|
||||||
for(int j=0; j<4; j++)
|
|
||||||
{
|
{
|
||||||
maxRambdaDt[j] = frictionCoeff*sum;
|
//printf("ic(b)=%d, localBatch=%d\n",ic,localBatch);
|
||||||
minRambdaDt[j] = -maxRambdaDt[j];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
|
if (aIdx==10)
|
||||||
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
|
{
|
||||||
maxRambdaDt, minRambdaDt );
|
//printf("ic(a)=%d, localBatch=%d\n",ic,localBatch);
|
||||||
|
}
|
||||||
|
if (usedBodies.size()<(aIdx+1))
|
||||||
|
{
|
||||||
|
usedBodies.resize(aIdx+1,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (usedBodies.size()<(bIdx+1))
|
||||||
|
{
|
||||||
|
usedBodies.resize(bIdx+1,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bodyA.m_invMass)
|
||||||
|
{
|
||||||
|
b3Assert(usedBodies[aIdx]==0);
|
||||||
|
}
|
||||||
|
if (m_wgUsedBodies)
|
||||||
|
{
|
||||||
|
for (int w=0;w<B3_SOLVER_N_CELLS;w++)
|
||||||
|
{
|
||||||
|
if (w!=m_curWgidx)
|
||||||
|
{
|
||||||
|
if (bodyA.m_invMass)
|
||||||
|
{
|
||||||
|
if (m_wgUsedBodies[w].size()>aIdx)
|
||||||
|
{
|
||||||
|
b3Assert(m_wgUsedBodies[w][aIdx]==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bodyB.m_invMass)
|
||||||
|
{
|
||||||
|
if (m_wgUsedBodies[w].size()>bIdx)
|
||||||
|
{
|
||||||
|
b3Assert(m_wgUsedBodies[w][bIdx]==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
usedBodies[aIdx]++;
|
||||||
|
if (bodyB.m_invMass)
|
||||||
|
{
|
||||||
|
b3Assert(usedBodies[bIdx]==0);
|
||||||
|
}
|
||||||
|
usedBodies[bIdx]++;
|
||||||
|
|
||||||
|
if( !m_solveFriction )
|
||||||
|
{
|
||||||
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
|
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||||
|
|
||||||
|
solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
|
||||||
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
|
||||||
|
maxRambdaDt, minRambdaDt );
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||||
|
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
|
||||||
|
|
||||||
|
float sum = 0;
|
||||||
|
for(int j=0; j<4; j++)
|
||||||
|
{
|
||||||
|
sum +=m_constraints[i].m_appliedRambdaDt[j];
|
||||||
|
}
|
||||||
|
frictionCoeff = 0.7f;
|
||||||
|
for(int j=0; j<4; j++)
|
||||||
|
{
|
||||||
|
maxRambdaDt[j] = frictionCoeff*sum;
|
||||||
|
minRambdaDt[j] = -maxRambdaDt[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
|
||||||
|
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
|
||||||
|
maxRambdaDt, minRambdaDt );
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_wgUsedBodies)
|
||||||
|
{
|
||||||
|
if (m_wgUsedBodies[m_curWgidx].size()<usedBodies.size())
|
||||||
|
{
|
||||||
|
m_wgUsedBodies[m_curWgidx].resize(usedBodies.size());
|
||||||
|
}
|
||||||
|
for (int i=0;i<usedBodies.size();i++)
|
||||||
|
{
|
||||||
|
if (usedBodies[i])
|
||||||
|
{
|
||||||
|
//printf("cell %d uses body %d\n", m_curWgidx,i);
|
||||||
|
m_wgUsedBodies[m_curWgidx][i]=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
b3AlignedObjectArray<b3RigidBodyCL>& m_bodies;
|
b3AlignedObjectArray<b3RigidBodyCL>& m_bodies;
|
||||||
b3AlignedObjectArray<b3InertiaCL>& m_shapes;
|
b3AlignedObjectArray<b3InertiaCL>& m_shapes;
|
||||||
b3AlignedObjectArray<b3GpuConstraint4>& m_constraints;
|
b3AlignedObjectArray<b3GpuConstraint4>& m_constraints;
|
||||||
|
b3AlignedObjectArray<int>* m_wgUsedBodies;
|
||||||
|
int m_curWgidx;
|
||||||
int m_start;
|
int m_start;
|
||||||
int m_nConstraints;
|
int m_nConstraints;
|
||||||
bool m_solveFriction;
|
bool m_solveFriction;
|
||||||
|
int m_maxNumBatches;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -524,6 +615,51 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
int nSplitX = B3_SOLVER_N_SPLIT_X;
|
||||||
|
int nSplitY = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES;
|
||||||
|
for (int z=0;z<4;z++)
|
||||||
|
{
|
||||||
|
for (int y=0;y<4;y++)
|
||||||
|
{
|
||||||
|
for (int x=0;x<4;x++)
|
||||||
|
{
|
||||||
|
int newIndex = (x+y*nSplitX+z*nSplitX*nSplitY);
|
||||||
|
// printf("newIndex=%d\n",newIndex);
|
||||||
|
|
||||||
|
int zIdx = newIndex/(nSplitX*nSplitY);
|
||||||
|
int remain = newIndex%(nSplitX*nSplitY);
|
||||||
|
int yIdx = remain/nSplitX;
|
||||||
|
int xIdx = remain%nSplitX;
|
||||||
|
// printf("newIndex=%d\n",newIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--)
|
||||||
|
for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++)
|
||||||
|
{
|
||||||
|
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||||
|
{
|
||||||
|
int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2);
|
||||||
|
int remain= (wgIdx%((nSplitX*nSplitY)/4));
|
||||||
|
int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1);
|
||||||
|
|
||||||
|
/*int zIdx = newIndex/(nSplitX*nSplitY);
|
||||||
|
int remain = newIndex%(nSplitX*nSplitY);
|
||||||
|
int yIdx = remain/nSplitX;
|
||||||
|
int xIdx = remain%nSplitX;
|
||||||
|
*/
|
||||||
|
int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
|
||||||
|
// printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
b3AlignedObjectArray<b3RigidBodyCL> bodyNative;
|
b3AlignedObjectArray<b3RigidBodyCL> bodyNative;
|
||||||
bodyBuf->copyToHost(bodyNative);
|
bodyBuf->copyToHost(bodyNative);
|
||||||
b3AlignedObjectArray<b3InertiaCL> shapeNative;
|
b3AlignedObjectArray<b3InertiaCL> shapeNative;
|
||||||
@@ -531,24 +667,129 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyCL>* bodyBu
|
|||||||
b3AlignedObjectArray<b3GpuConstraint4> constraintNative;
|
b3AlignedObjectArray<b3GpuConstraint4> constraintNative;
|
||||||
constraint->copyToHost(constraintNative);
|
constraint->copyToHost(constraintNative);
|
||||||
|
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
b3AlignedObjectArray<unsigned int> numConstraintsHost;
|
||||||
{
|
m_numConstraints->copyToHost(numConstraintsHost);
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
|
|
||||||
task.m_solveFriction = false;
|
|
||||||
task.run(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
//printf("------------------------\n");
|
||||||
|
b3AlignedObjectArray<unsigned int> offsetsHost;
|
||||||
|
m_offsets->copyToHost(offsetsHost);
|
||||||
|
static int frame=0;
|
||||||
|
bool useBatches=true;
|
||||||
|
if (useBatches)
|
||||||
{
|
{
|
||||||
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
task.m_solveFriction = true;
|
{
|
||||||
task.run(0);
|
for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++)
|
||||||
|
{
|
||||||
|
|
||||||
|
int nSplitX = B3_SOLVER_N_SPLIT_X;
|
||||||
|
int nSplitY = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES;
|
||||||
|
//printf("cell Batch %d\n",cellBatch);
|
||||||
|
b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS];
|
||||||
|
for (int i=0;i<B3_SOLVER_N_CELLS;i++)
|
||||||
|
{
|
||||||
|
usedBodies[i].resize(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--)
|
||||||
|
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||||
|
{
|
||||||
|
int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2);
|
||||||
|
int remain= (wgIdx%((nSplitX*nSplitY)/4));
|
||||||
|
int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1);
|
||||||
|
int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
|
||||||
|
|
||||||
|
|
||||||
|
if( numConstraintsHost[cellIdx] == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
//printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch);
|
||||||
|
//printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]);
|
||||||
|
if (zIdx)
|
||||||
|
{
|
||||||
|
//printf("?\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iter==0)
|
||||||
|
{
|
||||||
|
//printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx);
|
||||||
|
//printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]);
|
||||||
|
}
|
||||||
|
const int start = offsetsHost[cellIdx];
|
||||||
|
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
||||||
|
const int end = start + numConstraintsInCell;
|
||||||
|
|
||||||
|
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx);
|
||||||
|
task.m_solveFriction = false;
|
||||||
|
task.run(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
|
{
|
||||||
|
for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++)
|
||||||
|
{
|
||||||
|
int nSplitX = B3_SOLVER_N_SPLIT_X;
|
||||||
|
int nSplitY = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
|
||||||
|
|
||||||
|
int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES;
|
||||||
|
|
||||||
|
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||||
|
{
|
||||||
|
int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2);
|
||||||
|
int remain= (wgIdx%((nSplitX*nSplitY)/4));
|
||||||
|
int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1);
|
||||||
|
|
||||||
|
int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
|
||||||
|
|
||||||
|
if( numConstraintsHost[cellIdx] == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
//printf("yIdx=%d\n",yIdx);
|
||||||
|
|
||||||
|
const int start = offsetsHost[cellIdx];
|
||||||
|
int numConstraintsInCell = numConstraintsHost[cellIdx];
|
||||||
|
const int end = start + numConstraintsInCell;
|
||||||
|
|
||||||
|
SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0);
|
||||||
|
task.m_solveFriction = true;
|
||||||
|
task.run(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
|
{
|
||||||
|
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0);
|
||||||
|
task.m_solveFriction = false;
|
||||||
|
task.run(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
|
{
|
||||||
|
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0);
|
||||||
|
task.m_solveFriction = true;
|
||||||
|
task.run(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bodyBuf->copyFromHost(bodyNative);
|
bodyBuf->copyFromHost(bodyNative);
|
||||||
shapeBuf->copyFromHost(shapeNative);
|
shapeBuf->copyFromHost(shapeNative);
|
||||||
constraint->copyFromHost(constraintNative);
|
constraint->copyFromHost(constraintNative);
|
||||||
|
frame++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -563,14 +804,17 @@ void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
|||||||
// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
|
||||||
// b3BufferInfoCL( m_offsets->getBufferCL() )
|
// b3BufferInfoCL( m_offsets->getBufferCL() )
|
||||||
|
|
||||||
const int nn = b3SolverBase::N_SPLIT*b3SolverBase::N_SPLIT;
|
int cellBatch = batchId;
|
||||||
int numWorkItems = 64*nn/b3SolverBase::N_BATCHES;
|
const int nn = B3_SOLVER_N_CELLS;
|
||||||
|
int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
|
||||||
|
|
||||||
b3AlignedObjectArray<unsigned int> gN;
|
b3AlignedObjectArray<unsigned int> gN;
|
||||||
m_numConstraints->copyToHost(gN);
|
m_numConstraints->copyToHost(gN);
|
||||||
b3AlignedObjectArray<unsigned int> gOffsets;
|
b3AlignedObjectArray<unsigned int> gOffsets;
|
||||||
m_offsets->copyToHost(gOffsets);
|
m_offsets->copyToHost(gOffsets);
|
||||||
int nSplit = b3SolverBase::N_SPLIT;
|
int nSplitX = B3_SOLVER_N_SPLIT_X;
|
||||||
|
int nSplitY = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
|
||||||
int bIdx = batchId;
|
int bIdx = batchId;
|
||||||
|
|
||||||
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
|
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
|
||||||
@@ -578,16 +822,21 @@ void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
|||||||
|
|
||||||
printf("batch = %d\n", batchId);
|
printf("batch = %d\n", batchId);
|
||||||
|
|
||||||
int numWorkgroups = nn/b3SolverBase::N_BATCHES;
|
int numWorkgroups = nn/B3_SOLVER_N_BATCHES;
|
||||||
b3AlignedObjectArray<int> usedBodies;
|
b3AlignedObjectArray<int> usedBodies;
|
||||||
|
|
||||||
|
|
||||||
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
|
||||||
{
|
{
|
||||||
printf("wgIdx = %d ", wgIdx);
|
printf("wgIdx = %d ", wgIdx);
|
||||||
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
|
||||||
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
int zIdx = (wgIdx/((nSplitX*nSplitY))/2)*2+((cellBatch&4)>>2);
|
||||||
int cellIdx = xIdx+yIdx*nSplit;
|
int remain = wgIdx%((nSplitX*nSplitY));
|
||||||
|
int yIdx = (remain%(nSplitX/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain/(nSplitX/2))*2 + (cellBatch&1);
|
||||||
|
|
||||||
|
|
||||||
|
int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
|
||||||
printf("cellIdx=%d\n",cellIdx);
|
printf("cellIdx=%d\n",cellIdx);
|
||||||
if( gN[cellIdx] == 0 )
|
if( gN[cellIdx] == 0 )
|
||||||
continue;
|
continue;
|
||||||
@@ -629,13 +878,13 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
|
||||||
{
|
{
|
||||||
|
|
||||||
const int nn = N_SPLIT*N_SPLIT;
|
const int nn = B3_SOLVER_N_CELLS;
|
||||||
|
|
||||||
cdata.x = 0;
|
cdata.x = 0;
|
||||||
cdata.y = maxNumBatches;//250;
|
cdata.y = maxNumBatches;//250;
|
||||||
|
|
||||||
|
|
||||||
int numWorkItems = 64*nn/N_BATCHES;
|
int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
|
||||||
#ifdef DEBUG_ME
|
#ifdef DEBUG_ME
|
||||||
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
|
||||||
adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
|
||||||
@@ -648,7 +897,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
B3_PROFILE("m_batchSolveKernel iterations");
|
B3_PROFILE("m_batchSolveKernel iterations");
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
{
|
{
|
||||||
for(int ib=0; ib<N_BATCHES; ib++)
|
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (verify)
|
if (verify)
|
||||||
@@ -663,7 +912,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
|
|
||||||
|
|
||||||
cdata.z = ib;
|
cdata.z = ib;
|
||||||
cdata.w = N_SPLIT;
|
|
||||||
|
|
||||||
b3LauncherCL launcher( m_queue, m_solveContactKernel );
|
b3LauncherCL launcher( m_queue, m_solveContactKernel );
|
||||||
#if 1
|
#if 1
|
||||||
@@ -686,7 +935,12 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
//launcher.setConst( cdata.x );
|
//launcher.setConst( cdata.x );
|
||||||
launcher.setConst( cdata.y );
|
launcher.setConst( cdata.y );
|
||||||
launcher.setConst( cdata.z );
|
launcher.setConst( cdata.z );
|
||||||
launcher.setConst( cdata.w );
|
b3Int4 nSplit;
|
||||||
|
nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
|
launcher.setConst( nSplit );
|
||||||
launcher.launch1D( numWorkItems, 64 );
|
launcher.launch1D( numWorkItems, 64 );
|
||||||
|
|
||||||
|
|
||||||
@@ -750,10 +1004,10 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
B3_PROFILE("m_batchSolveKernel iterations2");
|
B3_PROFILE("m_batchSolveKernel iterations2");
|
||||||
for(int iter=0; iter<m_nIterations; iter++)
|
for(int iter=0; iter<m_nIterations; iter++)
|
||||||
{
|
{
|
||||||
for(int ib=0; ib<N_BATCHES; ib++)
|
for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
|
||||||
{
|
{
|
||||||
cdata.z = ib;
|
cdata.z = ib;
|
||||||
cdata.w = N_SPLIT;
|
|
||||||
|
|
||||||
b3BufferInfoCL bInfo[] = {
|
b3BufferInfoCL bInfo[] = {
|
||||||
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
b3BufferInfoCL( bodyBuf->getBufferCL() ),
|
||||||
@@ -770,9 +1024,14 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
|
|||||||
//launcher.setConst( cdata.x );
|
//launcher.setConst( cdata.x );
|
||||||
launcher.setConst( cdata.y );
|
launcher.setConst( cdata.y );
|
||||||
launcher.setConst( cdata.z );
|
launcher.setConst( cdata.z );
|
||||||
launcher.setConst( cdata.w );
|
b3Int4 nSplit;
|
||||||
|
nSplit.x = B3_SOLVER_N_SPLIT_X;
|
||||||
|
nSplit.y = B3_SOLVER_N_SPLIT_Y;
|
||||||
|
nSplit.z = B3_SOLVER_N_SPLIT_Z;
|
||||||
|
|
||||||
launcher.launch1D( 64*nn/N_BATCHES, 64 );
|
launcher.setConst( nSplit );
|
||||||
|
|
||||||
|
launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
@@ -861,7 +1120,7 @@ void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
|||||||
cdata.m_nContacts = nContacts;
|
cdata.m_nContacts = nContacts;
|
||||||
cdata.m_staticIdx = cfg.m_staticIdx;
|
cdata.m_staticIdx = cfg.m_staticIdx;
|
||||||
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
|
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
|
||||||
cdata.m_nSplit = N_SPLIT;
|
cdata.m_nSplit = B3_SOLVER_N_SPLIT;
|
||||||
|
|
||||||
|
|
||||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) };
|
||||||
@@ -872,16 +1131,16 @@ void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
{ // 3. sort by cell idx
|
{ // 3. sort by cell idx
|
||||||
int n = N_SPLIT*N_SPLIT;
|
int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
|
||||||
int sortBit = 32;
|
int sortBit = 32;
|
||||||
//if( n <= 0xffff ) sortBit = 16;
|
//if( n <= 0xffff ) sortBit = 16;
|
||||||
//if( n <= 0xff ) sortBit = 8;
|
//if( n <= 0xff ) sortBit = 8;
|
||||||
m_sort32->execute(*m_sortDataBuffer,sortSize);
|
m_sort32->execute(*m_sortDataBuffer,sortSize);
|
||||||
}
|
}
|
||||||
{ // 4. find entries
|
{ // 4. find entries
|
||||||
m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, b3BoundSearchCL::COUNT);
|
m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, b3BoundSearchCL::COUNT);
|
||||||
|
|
||||||
m_scan->execute( *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
|
m_scan->execute( *countsNative, *offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT );
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // 5. sort constraints by cellIdx
|
{ // 5. sort constraints by cellIdx
|
||||||
@@ -911,7 +1170,7 @@ void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
|||||||
void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx )
|
||||||
{
|
{
|
||||||
|
|
||||||
int numWorkItems = 64*N_SPLIT*N_SPLIT;
|
int numWorkItems = 64*B3_SOLVER_N_CELLS;
|
||||||
{
|
{
|
||||||
B3_PROFILE("batch generation");
|
B3_PROFILE("batch generation");
|
||||||
|
|
||||||
@@ -962,7 +1221,7 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
|
|||||||
launcher.setConst(staticIdx);
|
launcher.setConst(staticIdx);
|
||||||
|
|
||||||
launcher.launch1D( numWorkItems, 64 );
|
launcher.launch1D( numWorkItems, 64 );
|
||||||
clFinish(m_queue);
|
//clFinish(m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef BATCH_DEBUG
|
#ifdef BATCH_DEBUG
|
||||||
|
|||||||
@@ -32,6 +32,15 @@ subject to the following restrictions:
|
|||||||
|
|
||||||
#define B3NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
|
#define B3NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
B3_SOLVER_N_SPLIT_X = 8,//16,//4,
|
||||||
|
B3_SOLVER_N_SPLIT_Y = 4,//16,//4,
|
||||||
|
B3_SOLVER_N_SPLIT_Z = 8,//,
|
||||||
|
B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X*B3_SOLVER_N_SPLIT_Y*B3_SOLVER_N_SPLIT_Z,
|
||||||
|
B3_SOLVER_N_BATCHES = 8,//4,//8,//4,
|
||||||
|
};
|
||||||
|
|
||||||
class b3SolverBase
|
class b3SolverBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@@ -45,19 +54,10 @@ class b3SolverBase
|
|||||||
float m_positionConstraintCoeff;
|
float m_positionConstraintCoeff;
|
||||||
float m_dt;
|
float m_dt;
|
||||||
bool m_enableParallelSolve;
|
bool m_enableParallelSolve;
|
||||||
float m_averageExtent;
|
float m_batchCellSize;
|
||||||
int m_staticIdx;
|
int m_staticIdx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
N_SPLIT = 16,
|
|
||||||
N_BATCHES = 4,//8,//4,
|
|
||||||
N_OBJ_PER_SPLIT = 10,
|
|
||||||
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class b3Solver : public b3SolverBase
|
class b3Solver : public b3SolverBase
|
||||||
|
|||||||
@@ -411,8 +411,8 @@ void BatchSolveKernelContact(__global Body* gBodies,
|
|||||||
__global int* gN,
|
__global int* gN,
|
||||||
__global int* gOffsets,
|
__global int* gOffsets,
|
||||||
int maxBatch,
|
int maxBatch,
|
||||||
int bIdx,
|
int cellBatch,
|
||||||
int nSplit
|
int4 nSplit
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
//__local int ldsBatchIdx[WG_SIZE+1];
|
//__local int ldsBatchIdx[WG_SIZE+1];
|
||||||
@@ -428,17 +428,29 @@ void BatchSolveKernelContact(__global Body* gBodies,
|
|||||||
//debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
|
//debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
|
||||||
|
|
||||||
|
|
||||||
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);
|
||||||
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
int remain= (wgIdx%((nSplit.x*nSplit.y)/4));
|
||||||
int cellIdx = xIdx+yIdx*nSplit;
|
int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);
|
||||||
|
int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);
|
||||||
|
|
||||||
|
//int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
||||||
|
//int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
||||||
|
//int cellIdx = xIdx+yIdx*nSplit;
|
||||||
|
|
||||||
if( gN[cellIdx] == 0 )
|
if( gN[cellIdx] == 0 )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const int start = gOffsets[cellIdx];
|
const int start = gOffsets[cellIdx];
|
||||||
const int end = start + gN[cellIdx];
|
const int end = start + gN[cellIdx];
|
||||||
|
|
||||||
|
|
||||||
|
//if (lIdx==0)
|
||||||
|
//printf("wgIdx = %d, start = %d, end=%d\n",wgIdx,start,end);
|
||||||
|
|
||||||
|
|
||||||
if( lIdx == 0 )
|
if( lIdx == 0 )
|
||||||
{
|
{
|
||||||
ldsCurBatch = 0;
|
ldsCurBatch = 0;
|
||||||
@@ -456,6 +468,9 @@ void BatchSolveKernelContact(__global Body* gBodies,
|
|||||||
{
|
{
|
||||||
if (gConstraints[idx].m_batchIdx == ldsCurBatch)
|
if (gConstraints[idx].m_batchIdx == ldsCurBatch)
|
||||||
{
|
{
|
||||||
|
//if (wgIdx==0 && lIdx==0)
|
||||||
|
//printf("solved wgIdx=%d, ldsCurBatch=%d idx=%d \n", wgIdx, ldsCurBatch,idx);
|
||||||
|
|
||||||
solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
|
solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );
|
||||||
|
|
||||||
idx+=64;
|
idx+=64;
|
||||||
@@ -465,6 +480,8 @@ void BatchSolveKernelContact(__global Body* gBodies,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
GROUP_LDS_BARRIER;
|
GROUP_LDS_BARRIER;
|
||||||
|
// if (wgIdx==0 && lIdx==0)
|
||||||
|
// printf("-----------------------\n");
|
||||||
if( lIdx == 0 )
|
if( lIdx == 0 )
|
||||||
{
|
{
|
||||||
ldsCurBatch++;
|
ldsCurBatch++;
|
||||||
|
|||||||
@@ -413,8 +413,8 @@ static const char* solveContactCL= \
|
|||||||
" __global int* gN,\n"
|
" __global int* gN,\n"
|
||||||
" __global int* gOffsets,\n"
|
" __global int* gOffsets,\n"
|
||||||
" int maxBatch,\n"
|
" int maxBatch,\n"
|
||||||
" int bIdx,\n"
|
" int cellBatch,\n"
|
||||||
" int nSplit\n"
|
" int4 nSplit\n"
|
||||||
" )\n"
|
" )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" //__local int ldsBatchIdx[WG_SIZE+1];\n"
|
" //__local int ldsBatchIdx[WG_SIZE+1];\n"
|
||||||
@@ -430,17 +430,29 @@ static const char* solveContactCL= \
|
|||||||
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\n"
|
"\n"
|
||||||
" int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"
|
" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n"
|
||||||
" int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"
|
" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n"
|
||||||
" int cellIdx = xIdx+yIdx*nSplit;\n"
|
" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n"
|
||||||
|
" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n"
|
||||||
|
" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n"
|
||||||
|
"\n"
|
||||||
|
" //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"
|
||||||
|
" //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"
|
||||||
|
" //int cellIdx = xIdx+yIdx*nSplit;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" if( gN[cellIdx] == 0 ) \n"
|
" if( gN[cellIdx] == 0 ) \n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
" const int start = gOffsets[cellIdx];\n"
|
" const int start = gOffsets[cellIdx];\n"
|
||||||
" const int end = start + gN[cellIdx];\n"
|
" const int end = start + gN[cellIdx];\n"
|
||||||
"\n"
|
"\n"
|
||||||
" \n"
|
" \n"
|
||||||
|
" //if (lIdx==0)\n"
|
||||||
|
" //printf(\"wgIdx = %d, start = %d, end=%d\n\",wgIdx,start,end);\n"
|
||||||
|
"\n"
|
||||||
|
" \n"
|
||||||
" if( lIdx == 0 )\n"
|
" if( lIdx == 0 )\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" ldsCurBatch = 0;\n"
|
" ldsCurBatch = 0;\n"
|
||||||
@@ -458,6 +470,9 @@ static const char* solveContactCL= \
|
|||||||
" {\n"
|
" {\n"
|
||||||
" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"
|
" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
|
" //if (wgIdx==0 && lIdx==0)\n"
|
||||||
|
" //printf(\"solved wgIdx=%d, ldsCurBatch=%d idx=%d \n\", wgIdx, ldsCurBatch,idx);\n"
|
||||||
|
" \n"
|
||||||
" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n"
|
||||||
"\n"
|
"\n"
|
||||||
" idx+=64;\n"
|
" idx+=64;\n"
|
||||||
@@ -467,6 +482,8 @@ static const char* solveContactCL= \
|
|||||||
" }\n"
|
" }\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" GROUP_LDS_BARRIER;\n"
|
" GROUP_LDS_BARRIER;\n"
|
||||||
|
" // if (wgIdx==0 && lIdx==0)\n"
|
||||||
|
" // printf(\"-----------------------\n\");\n"
|
||||||
" if( lIdx == 0 )\n"
|
" if( lIdx == 0 )\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" ldsCurBatch++;\n"
|
" ldsCurBatch++;\n"
|
||||||
|
|||||||
@@ -440,8 +440,8 @@ void BatchSolveKernelFriction(__global Body* gBodies,
|
|||||||
__global int* gN,
|
__global int* gN,
|
||||||
__global int* gOffsets,
|
__global int* gOffsets,
|
||||||
int maxBatch,
|
int maxBatch,
|
||||||
int bIdx,
|
int cellBatch,
|
||||||
int nSplit
|
int4 nSplit
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
//__local int ldsBatchIdx[WG_SIZE+1];
|
//__local int ldsBatchIdx[WG_SIZE+1];
|
||||||
@@ -457,9 +457,12 @@ void BatchSolveKernelFriction(__global Body* gBodies,
|
|||||||
//debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
|
//debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;
|
||||||
|
|
||||||
|
|
||||||
int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);
|
int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);
|
||||||
int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);
|
int remain= (wgIdx%((nSplit.x*nSplit.y)/4));
|
||||||
int cellIdx = xIdx+yIdx*nSplit;
|
int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);
|
||||||
|
int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);
|
||||||
|
int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);
|
||||||
|
|
||||||
|
|
||||||
if( gN[cellIdx] == 0 )
|
if( gN[cellIdx] == 0 )
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -442,8 +442,8 @@ static const char* solveFrictionCL= \
|
|||||||
" __global int* gN,\n"
|
" __global int* gN,\n"
|
||||||
" __global int* gOffsets,\n"
|
" __global int* gOffsets,\n"
|
||||||
" int maxBatch,\n"
|
" int maxBatch,\n"
|
||||||
" int bIdx,\n"
|
" int cellBatch,\n"
|
||||||
" int nSplit\n"
|
" int4 nSplit\n"
|
||||||
" )\n"
|
" )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" //__local int ldsBatchIdx[WG_SIZE+1];\n"
|
" //__local int ldsBatchIdx[WG_SIZE+1];\n"
|
||||||
@@ -459,9 +459,12 @@ static const char* solveFrictionCL= \
|
|||||||
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\n"
|
"\n"
|
||||||
" int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n"
|
" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n"
|
||||||
" int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n"
|
" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n"
|
||||||
" int cellIdx = xIdx+yIdx*nSplit;\n"
|
" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n"
|
||||||
|
" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n"
|
||||||
|
" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n"
|
||||||
|
"\n"
|
||||||
" \n"
|
" \n"
|
||||||
" if( gN[cellIdx] == 0 ) \n"
|
" if( gN[cellIdx] == 0 ) \n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
|
|||||||
@@ -441,6 +441,42 @@ void ReorderContactKernel(__global Contact4* in, __global Contact4* out, __globa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||||
|
void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)
|
||||||
|
{
|
||||||
|
int gIdx = GET_GLOBAL_IDX;
|
||||||
|
|
||||||
|
if( gIdx < nContacts )
|
||||||
|
{
|
||||||
|
int2 sd;
|
||||||
|
sd.x = contactsIn[gIdx].m_bodyAPtrAndSignBit;
|
||||||
|
sd.y = gIdx;
|
||||||
|
sortDataOut[gIdx] = sd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel
|
||||||
|
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||||
|
void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)
|
||||||
|
{
|
||||||
|
int gIdx = GET_GLOBAL_IDX;
|
||||||
|
|
||||||
|
if( gIdx < nContacts )
|
||||||
|
{
|
||||||
|
int2 sdIn;
|
||||||
|
sdIn = sortDataInOut[gIdx];
|
||||||
|
int2 sdOut;
|
||||||
|
sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;
|
||||||
|
sdOut.y = sdIn.y;
|
||||||
|
sortDataInOut[gIdx] = sdOut;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int m_nContacts;
|
int m_nContacts;
|
||||||
@@ -480,7 +516,7 @@ static __constant const int gridTable8x8[] =
|
|||||||
__kernel
|
__kernel
|
||||||
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
|
||||||
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut,
|
||||||
int nContacts,float scale,int N_SPLIT, int staticIdx)
|
int nContacts,float scale,int4 nSplit,int staticIdx)
|
||||||
|
|
||||||
{
|
{
|
||||||
int gIdx = GET_GLOBAL_IDX;
|
int gIdx = GET_GLOBAL_IDX;
|
||||||
@@ -499,9 +535,10 @@ int nContacts,float scale,int N_SPLIT, int staticIdx)
|
|||||||
#if USE_SPATIAL_BATCHING
|
#if USE_SPATIAL_BATCHING
|
||||||
int idx = (aStatic)? bIdx: aIdx;
|
int idx = (aStatic)? bIdx: aIdx;
|
||||||
float4 p = gBodies[idx].m_pos;
|
float4 p = gBodies[idx].m_pos;
|
||||||
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);
|
||||||
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);
|
int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);
|
||||||
int newIndex = (xIdx+zIdx*N_SPLIT);
|
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);
|
||||||
|
int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);
|
||||||
|
|
||||||
#else//USE_SPATIAL_BATCHING
|
#else//USE_SPATIAL_BATCHING
|
||||||
#if USE_4x4_GRID
|
#if USE_4x4_GRID
|
||||||
|
|||||||
@@ -443,6 +443,42 @@ static const char* solverSetup2CL= \
|
|||||||
" }\n"
|
" }\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"__kernel\n"
|
||||||
|
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||||
|
"void SetDeterminismSortDataBodyA(__global Contact4* contactsIn, __global int2* sortDataOut, int nContacts)\n"
|
||||||
|
"{\n"
|
||||||
|
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||||
|
"\n"
|
||||||
|
" if( gIdx < nContacts )\n"
|
||||||
|
" {\n"
|
||||||
|
" int2 sd;\n"
|
||||||
|
" sd.x = contactsIn[gIdx].m_bodyAPtrAndSignBit;\n"
|
||||||
|
" sd.y = gIdx;\n"
|
||||||
|
" sortDataOut[gIdx] = sd;\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"__kernel\n"
|
||||||
|
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||||
|
"void SetDeterminismSortDataBodyB(__global Contact4* contactsIn, __global int2* sortDataInOut, int nContacts)\n"
|
||||||
|
"{\n"
|
||||||
|
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||||
|
"\n"
|
||||||
|
" if( gIdx < nContacts )\n"
|
||||||
|
" {\n"
|
||||||
|
" int2 sdIn;\n"
|
||||||
|
" sdIn = sortDataInOut[gIdx];\n"
|
||||||
|
" int2 sdOut;\n"
|
||||||
|
" sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n"
|
||||||
|
" sdOut.y = sdIn.y;\n"
|
||||||
|
" sortDataInOut[gIdx] = sdOut;\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
"typedef struct\n"
|
"typedef struct\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int m_nContacts;\n"
|
" int m_nContacts;\n"
|
||||||
@@ -482,7 +518,7 @@ static const char* solverSetup2CL= \
|
|||||||
"__kernel\n"
|
"__kernel\n"
|
||||||
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n"
|
||||||
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
"void SetSortDataKernel(__global Contact4* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n"
|
||||||
"int nContacts,float scale,int N_SPLIT, int staticIdx)\n"
|
"int nContacts,float scale,int4 nSplit,int staticIdx)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int gIdx = GET_GLOBAL_IDX;\n"
|
" int gIdx = GET_GLOBAL_IDX;\n"
|
||||||
@@ -501,9 +537,10 @@ static const char* solverSetup2CL= \
|
|||||||
"#if USE_SPATIAL_BATCHING \n"
|
"#if USE_SPATIAL_BATCHING \n"
|
||||||
" int idx = (aStatic)? bIdx: aIdx;\n"
|
" int idx = (aStatic)? bIdx: aIdx;\n"
|
||||||
" float4 p = gBodies[idx].m_pos;\n"
|
" float4 p = gBodies[idx].m_pos;\n"
|
||||||
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n"
|
||||||
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (N_SPLIT-1);\n"
|
" int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n"
|
||||||
" int newIndex = (xIdx+zIdx*N_SPLIT);\n"
|
" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n"
|
||||||
|
" int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n"
|
||||||
" \n"
|
" \n"
|
||||||
"#else//USE_SPATIAL_BATCHING\n"
|
"#else//USE_SPATIAL_BATCHING\n"
|
||||||
" #if USE_4x4_GRID\n"
|
" #if USE_4x4_GRID\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user