work towards GPU joint solver (non-contact constraints), not working yet.

This commit is contained in:
erwin coumans
2013-07-06 13:08:58 -07:00
parent 01e5472f9f
commit 3a6e1ccff4
5 changed files with 121 additions and 33 deletions

View File

@@ -16,15 +16,19 @@ premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Narrowpha
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/updateAabbsKernel.h" --stringname="updateAabbsKernelCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h" --stringname="updateAabbsKernelCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverSetup.h" --stringname="solverSetupCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h" --stringname="solverSetupCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverUtils.h" --stringname="solverUtilsCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h" --stringname="solverUtilsCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveContact.h" --stringname="solveContactCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.h" --stringname="solveContactCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveFriction.h" --stringname="solveFrictionCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h" --stringname="solveFrictionCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h" --stringname="solveConstraintRowsCL" stringify
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" --headerfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" --stringname="rayCastKernelCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" --headerfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" --stringname="rayCastKernelCL" stringify

View File

@@ -38,7 +38,8 @@ void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc)
b3s_errorMessageFunc = errorMessageFunc; b3s_errorMessageFunc = errorMessageFunc;
} }
#define B3_MAX_DEBUG_STRING_LENGTH 2048 //#define B3_MAX_DEBUG_STRING_LENGTH 2048
#define B3_MAX_DEBUG_STRING_LENGTH 8192
void b3OutputPrintfVarArgsInternal(const char *str, ...) void b3OutputPrintfVarArgsInternal(const char *str, ...)

View File

@@ -7,21 +7,60 @@
#include "Bullet3Common/b3AlignedObjectArray.h" #include "Bullet3Common/b3AlignedObjectArray.h"
#include <string.h> //for memset #include <string.h> //for memset
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (bool usePgs)
#include "Bullet3OpenCL/RigidBody/kernels/jointSolver.h" //solveConstraintRowsCL
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#define B3_JOINT_SOLVER_PATH "src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl"
struct b3GpuPgsJacobiSolverInternalData
{
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_queue;
cl_kernel m_solveJointConstraintRowsKernels;
};
b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs)
:b3PgsJacobiSolver (usePgs) :b3PgsJacobiSolver (usePgs)
{ {
m_gpuData = new b3GpuPgsJacobiSolverInternalData();
m_gpuData->m_context = ctx;
m_gpuData->m_device = device;
m_gpuData->m_queue = queue;
cl_int errNum=0;
{
cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH);
b3Assert(errNum==CL_SUCCESS);
m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device,solveConstraintRowsCL, "solveJointConstraintRows",&errNum,prog);
b3Assert(errNum==CL_SUCCESS);
clReleaseProgram(prog);
}
} }
b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver ()
{ {
clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels);
delete m_gpuData;
} }
struct b3BatchConstraint struct b3BatchConstraint
{ {
int m_bodyAPtrAndSignBit; int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit; int m_bodyBPtrAndSignBit;
int m_offset; int m_constraintRowOffset;
short int m_numConstraintRows; short int m_numConstraintRows;
short int m_batchId; short int m_batchId;
@@ -154,7 +193,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodie
} }
batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows; batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows;
batchConstraints[i].m_offset = totalNumRows; batchConstraints[i].m_constraintRowOffset = totalNumRows;
totalNumRows += info1.m_numConstraintRows; totalNumRows += info1.m_numConstraintRows;
} }
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
@@ -409,38 +448,76 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstrai
} }
} }
int maxIterations = infoGlobal.m_numIterations; int maxIterations = infoGlobal.m_numIterations;
bool useBatching = false; bool useBatching = true;
if (useBatching ) if (useBatching )
{ {
b3OpenCLArray<b3SolverConstraint> gpuSolverConstraintRows(m_gpuData->m_context,m_gpuData->m_queue);
gpuSolverConstraintRows.copyFromHost(m_tmpSolverNonContactConstraintPool);
b3OpenCLArray<b3SolverBody> gpuSolverBodies(m_gpuData->m_context,m_gpuData->m_queue);
gpuSolverBodies.copyFromHost(m_tmpSolverBodyPool);
// gpuSolverBodies.copyToHost(m_tmpSolverBodyPool);
b3OpenCLArray<b3BatchConstraint> gpuBatchConstraints(m_gpuData->m_context,m_gpuData->m_queue);
gpuBatchConstraints.copyFromHost(batchConstraints);
b3OpenCLArray<b3SolverConstraint> gpuConstraintRows(m_gpuData->m_context,m_gpuData->m_queue);
gpuConstraintRows.copyFromHost(m_tmpSolverNonContactConstraintPool);
for ( int iteration = 0 ; iteration< maxIterations ; iteration++) for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
{ {
int bc = 0; int batchOffset = 0;
int batchConstraintOffset=0; int constraintOffset=0;
int numBatches = batches.size(); int numBatches = batches.size();
for (int bb=0;bb<numBatches;bb++) for (int bb=0;bb<numBatches;bb++)
{ {
int localConstraintIndex=0;
int numConstraintsInBatch = batches[bb]; int numConstraintsInBatch = batches[bb];
bool useGpu=false;
if (useGpu)
{
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels);
launcher.setBuffer(gpuSolverBodies.getBufferCL());
launcher.setBuffer(gpuBatchConstraints.getBufferCL());
launcher.setBuffer(gpuConstraintRows.getBufferCL());
launcher.setConst(batchOffset);
launcher.setConst(constraintOffset);
launcher.setConst(numConstraintsInBatch);
launcher.launch1D(numConstraintsInBatch);
} else
{
for (int b=0;b<numConstraintsInBatch;b++) for (int b=0;b<numConstraintsInBatch;b++)
{ {
const b3BatchConstraint& c = batchConstraints[bc+b]; const b3BatchConstraint& c = batchConstraints[batchOffset+b];
b3Assert(c.m_batchId==bb); b3Assert(c.m_batchId==bb);
//can be done in parallel... //can be done in parallel...
for (int jj=0;jj<c.m_numConstraintRows;jj++) for (int jj=0;jj<c.m_numConstraintRows;jj++)
{ {
b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[batchConstraintOffset+localConstraintIndex]; //
resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[c.m_constraintRowOffset+jj];
localConstraintIndex++; // resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
} }
} }
bc+=numConstraintsInBatch;
batchConstraintOffset+=numConstraintsInBatch;
} }
batchOffset+=numConstraintsInBatch;
constraintOffset+=numConstraintsInBatch;
} }
}//for (int iteration...
gpuSolverBodies.copyToHost(m_tmpSolverBodyPool);
clFinish(m_gpuData->m_queue);
printf(",,\n");
} else } else
{ {

View File

@@ -2,11 +2,16 @@
#define B3_GPU_PGS_JACOBI_SOLVER_H #define B3_GPU_PGS_JACOBI_SOLVER_H
#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
class b3GpuPgsJacobiSolver : public b3PgsJacobiSolver class b3GpuPgsJacobiSolver : public b3PgsJacobiSolver
{ {
int m_staticIdx; int m_staticIdx;
struct b3GpuPgsJacobiSolverInternalData* m_gpuData;
public: public:
b3GpuPgsJacobiSolver (bool usePgs); b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs);
virtual~b3GpuPgsJacobiSolver (); virtual~b3GpuPgsJacobiSolver ();
virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);

View File

@@ -46,7 +46,7 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
m_data->m_device = device; m_data->m_device = device;
m_data->m_queue = q; m_data->m_queue = q;
m_data->m_solver = new b3GpuPgsJacobiSolver(true);//new b3PgsJacobiSolver(true); m_data->m_solver = new b3GpuPgsJacobiSolver(ctx,device,q,true);//new b3PgsJacobiSolver(true);
m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies); m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs); m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
@@ -237,7 +237,8 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints); m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints);
} }
gpuBodies.copyFromHost(hostBodies); //gpuBodies.copyFromHost(hostBodies);
printf("...\n");
} }
if (numContacts) if (numContacts)