From 3a6e1ccff46c3b26c83be42e79f91fbe216a9a21 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Sat, 6 Jul 2013 13:08:58 -0700 Subject: [PATCH] work towards GPU joint solver (non-contact constraints), not working yet. --- build3/stringify.bat | 22 ++-- src/Bullet3Common/b3Logging.cpp | 3 +- .../RigidBody/b3GpuPgsJacobiSolver.cpp | 117 +++++++++++++++--- .../RigidBody/b3GpuPgsJacobiSolver.h | 7 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 5 +- 5 files changed, 121 insertions(+), 33 deletions(-) diff --git a/build3/stringify.bat b/build3/stringify.bat index 027859ba5..670db8ed5 100644 --- a/build3/stringify.bat +++ b/build3/stringify.bat @@ -16,15 +16,19 @@ premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Narrowpha premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" --headerfile="../src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/updateAabbsKernel.h" --stringname="updateAabbsKernelCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverSetup.h" --stringname="solverSetupCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solverUtils.h" --stringname="solverUtilsCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveContact.h" --stringname="solveContactCL" stringify -premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveFriction.h" --stringname="solveFrictionCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h" --stringname="updateAabbsKernelCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h" --stringname="solverSetupCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h" --stringname="solverUtilsCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solveContact.h" --stringname="solveContactCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h" --stringname="solveFrictionCL" stringify + +premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" --headerfile="../src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h" --stringname="solveConstraintRowsCL" stringify + + premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" --headerfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" --stringname="rayCastKernelCL" stringify diff --git a/src/Bullet3Common/b3Logging.cpp b/src/Bullet3Common/b3Logging.cpp index 5c647d313..b13728e4e 100644 --- a/src/Bullet3Common/b3Logging.cpp +++ b/src/Bullet3Common/b3Logging.cpp @@ -38,7 +38,8 @@ void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc) b3s_errorMessageFunc = errorMessageFunc; } -#define B3_MAX_DEBUG_STRING_LENGTH 2048 +//#define B3_MAX_DEBUG_STRING_LENGTH 2048 +#define B3_MAX_DEBUG_STRING_LENGTH 8192 void b3OutputPrintfVarArgsInternal(const char *str, ...) diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp index a2f236754..f964a61ae 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -7,21 +7,60 @@ #include "Bullet3Common/b3AlignedObjectArray.h" #include //for memset #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (bool usePgs) + + +#include "Bullet3OpenCL/RigidBody/kernels/jointSolver.h" //solveConstraintRowsCL +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" + +#define B3_JOINT_SOLVER_PATH "src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" + + +struct b3GpuPgsJacobiSolverInternalData +{ + + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + cl_kernel m_solveJointConstraintRowsKernels; + + +}; +b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs) :b3PgsJacobiSolver (usePgs) { + m_gpuData = new b3GpuPgsJacobiSolverInternalData(); + m_gpuData->m_context = ctx; + m_gpuData->m_device = device; + m_gpuData->m_queue = queue; + + cl_int errNum=0; + + { + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH); + b3Assert(errNum==CL_SUCCESS); + m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device,solveConstraintRowsCL, "solveJointConstraintRows",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); + clReleaseProgram(prog); + } + + } b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () { + clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels); + + delete m_gpuData; } struct b3BatchConstraint { int m_bodyAPtrAndSignBit; int m_bodyBPtrAndSignBit; - int m_offset; + int m_constraintRowOffset; short int m_numConstraintRows; short int m_batchId; @@ -154,7 +193,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodie } batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows; - batchConstraints[i].m_offset = totalNumRows; + batchConstraints[i].m_constraintRowOffset = totalNumRows; totalNumRows += info1.m_numConstraintRows; } m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); @@ -409,38 +448,76 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstrai } } int maxIterations = infoGlobal.m_numIterations; - bool useBatching = false; + bool useBatching = true; if (useBatching ) { + b3OpenCLArray gpuSolverConstraintRows(m_gpuData->m_context,m_gpuData->m_queue); + gpuSolverConstraintRows.copyFromHost(m_tmpSolverNonContactConstraintPool); + + b3OpenCLArray gpuSolverBodies(m_gpuData->m_context,m_gpuData->m_queue); + gpuSolverBodies.copyFromHost(m_tmpSolverBodyPool); +// gpuSolverBodies.copyToHost(m_tmpSolverBodyPool); + + b3OpenCLArray gpuBatchConstraints(m_gpuData->m_context,m_gpuData->m_queue); + gpuBatchConstraints.copyFromHost(batchConstraints); + + b3OpenCLArray gpuConstraintRows(m_gpuData->m_context,m_gpuData->m_queue); + gpuConstraintRows.copyFromHost(m_tmpSolverNonContactConstraintPool); + + for ( int iteration = 0 ; iteration< maxIterations ; iteration++) { - int bc = 0; - int batchConstraintOffset=0; + int batchOffset = 0; + int constraintOffset=0; int numBatches = batches.size(); for (int bb=0;bbm_queue,m_gpuData->m_solveJointConstraintRowsKernels); + launcher.setBuffer(gpuSolverBodies.getBufferCL()); + launcher.setBuffer(gpuBatchConstraints.getBufferCL()); + launcher.setBuffer(gpuConstraintRows.getBufferCL()); + launcher.setConst(batchOffset); + launcher.setConst(constraintOffset); + launcher.setConst(numConstraintsInBatch); + + launcher.launch1D(numConstraintsInBatch); + + } else + { + + for (int b=0;bm_queue); + printf(",,\n"); + } else { diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h index 60cc909af..5ef255b64 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h @@ -2,11 +2,16 @@ #define B3_GPU_PGS_JACOBI_SOLVER_H #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" + + class b3GpuPgsJacobiSolver : public b3PgsJacobiSolver { int m_staticIdx; + struct b3GpuPgsJacobiSolverInternalData* m_gpuData; + public: - b3GpuPgsJacobiSolver (bool usePgs); + b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs); virtual~b3GpuPgsJacobiSolver (); virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index ef1b2c89b..0fa1f26ec 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -46,7 +46,7 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic m_data->m_device = device; m_data->m_queue = q; - m_data->m_solver = new b3GpuPgsJacobiSolver(true);//new b3PgsJacobiSolver(true); + m_data->m_solver = new b3GpuPgsJacobiSolver(ctx,device,q,true);//new b3PgsJacobiSolver(true); m_data->m_allAabbsGPU = new b3OpenCLArray(ctx,q,config.m_maxConvexBodies); m_data->m_overlappingPairsGPU = new b3OpenCLArray(ctx,q,config.m_maxBroadphasePairs); @@ -237,7 +237,8 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints); } - gpuBodies.copyFromHost(hostBodies); + //gpuBodies.copyFromHost(hostBodies); + printf("...\n"); } if (numContacts)