From 5ce0b3938f826728586441e4e6e8a2ab1eca3853 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Mon, 11 Nov 2013 21:15:06 -0800 Subject: [PATCH] Re-introduce GPU parallel Jacobi solver, called btGpuJacobiContactSolver. There are still some issues, but for basic scene's it works. To avoid confusion, rename GPU contact solver to btGpuContactSolver, and constraint (non-contact) solver to btPgsConstraintSolver. --- Demos3/GpuDemos/main_opengl3core.cpp | 2 +- Demos3/GpuDemos/rigidbody/ConcaveScene.cpp | 7 +- Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp | 36 + .../ConstraintSolver/b3SolverBody.h | 2 +- .../ConstraintSolver/b3SolverConstraint.h | 2 +- .../ConstraintSolver/b3TypedConstraint.h | 2 +- .../b3ConvexHullContact.cpp | 14 +- .../RigidBody/b3GpuJacobiContactSolver.cpp | 1381 +++++++++++++++++ .../RigidBody/b3GpuJacobiContactSolver.h | 62 + ...olver.cpp => b3GpuPgsConstraintSolver.cpp} | 24 +- ...obiSolver.h => b3GpuPgsConstraintSolver.h} | 12 +- ...gsSolver.cpp => b3GpuPgsContactSolver.cpp} | 18 +- ...ingPgsSolver.h => b3GpuPgsContactSolver.h} | 6 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 29 +- .../b3GpuRigidBodyPipelineInternalData.h | 6 +- src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h | 2 +- .../RigidBody/b3GpuSolverConstraint.h | 2 +- .../RigidBody/kernels/solverUtils.cl | 6 +- .../RigidBody/kernels/solverUtils.h | 2 +- 19 files changed, 1557 insertions(+), 58 deletions(-) create mode 100644 src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp create mode 100644 src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h rename src/Bullet3OpenCL/RigidBody/{b3GpuPgsJacobiSolver.cpp => b3GpuPgsConstraintSolver.cpp} (96%) rename src/Bullet3OpenCL/RigidBody/{b3GpuPgsJacobiSolver.h => b3GpuPgsConstraintSolver.h} (92%) rename src/Bullet3OpenCL/RigidBody/{b3GpuBatchingPgsSolver.cpp => b3GpuPgsContactSolver.cpp} (97%) rename src/Bullet3OpenCL/RigidBody/{b3GpuBatchingPgsSolver.h => b3GpuPgsContactSolver.h} (89%) diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index 54fc22541..a4102d246 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -84,7 +84,7 @@ enum }; b3AlignedObjectArray demoNames; -int selectedDemo = 1; +int selectedDemo = 0; GpuDemo::CreateFunc* allDemos[]= { //ConcaveCompound2Scene::MyCreateFunc, diff --git a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp index a12e8f3a1..df62cc15c 100644 --- a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp +++ b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp @@ -22,10 +22,9 @@ #include "../gwenUserInterface.h" #include "OpenGLWindow/GLInstanceGraphicsShape.h" #define CONCAVE_GAPX 14 -#define CONCAVE_GAPY 8 +#define CONCAVE_GAPY 5 #define CONCAVE_GAPZ 14 - GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(std::vector& shapes) { @@ -342,7 +341,9 @@ void ConcaveScene::createDynamicObjects(const ConstructionInfo& ci) float mass = 1; //b3Vector3 position(-2*ci.gapX+i*ci.gapX,25+j*ci.gapY,-2*ci.gapZ+k*ci.gapZ); - b3Vector3 position=b3MakeVector3(-(ci.arraySizeX/2)*CONCAVE_GAPX+i*CONCAVE_GAPX,3+j*CONCAVE_GAPY,-(ci.arraySizeZ/2)*CONCAVE_GAPZ+k*CONCAVE_GAPZ); + b3Vector3 position=b3MakeVector3(-(ci.arraySizeX/2)*CONCAVE_GAPX+i*CONCAVE_GAPX, + 23+j*CONCAVE_GAPY, + -(ci.arraySizeZ/2)*CONCAVE_GAPZ+k*CONCAVE_GAPZ); b3Quaternion orn(0,0,0,1); b3Vector4 color = colors[curColor]; diff --git a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp index 661c77da9..9c992d8df 100644 --- a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp @@ -240,6 +240,42 @@ void GpuConvexPlaneScene::createStaticEnvironment(const ConstructionInfo& ci) } +/* +void GpuConvexPlaneScene::createStaticEnvironment(const ConstructionInfo& ci) +{ + int strideInBytes = 9*sizeof(float); + int numVertices = sizeof(cube_vertices)/strideInBytes; + int numIndices = sizeof(cube_indices)/sizeof(int); + //int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); + int group=1; + int mask=1; + int index=0; + + + { + b3Vector4 scaling=b3MakeVector4(100,0.001,100,1); + + + //int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + b3Vector3 normal=b3MakeVector3(0,1,0); + float constant=0.f; + int colIndex = m_data->m_np->registerPlaneShape(normal,constant);//>registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + b3Vector3 position=b3MakeVector3(0,0,0); + + + + b3Quaternion orn(0,0,0,1); + + b3Vector4 color=b3MakeVector4(0,0,1,1); + + int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); + int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0.f,position,orn,colIndex,index,false); + + } + +} +*/ diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h b/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h index 2cb23bb49..0049317d9 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h +++ b/src/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h @@ -16,7 +16,7 @@ subject to the following restrictions: #ifndef B3_SOLVER_BODY_H #define B3_SOLVER_BODY_H -struct b3RigidBody; + #include "Bullet3Common/b3Vector3.h" #include "Bullet3Common/b3Matrix3x3.h" diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h b/src/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h index edeadb459..bce83d460 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h +++ b/src/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h @@ -16,7 +16,7 @@ subject to the following restrictions: #ifndef B3_SOLVER_CONSTRAINT_H #define B3_SOLVER_CONSTRAINT_H -struct b3RigidBody; + #include "Bullet3Common/b3Vector3.h" #include "Bullet3Common/b3Matrix3x3.h" //#include "b3JacobianEntry.h" diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h b/src/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h index 8069a556d..8b3147e6e 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h +++ b/src/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h @@ -111,7 +111,7 @@ public: int m_numConstraintRows,nub; }; - static b3RigidBody& getFixedBody(); + struct b3ConstraintInfo2 { // integrator parameters: frames per second (1/stepsize), default error diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index f8fd380e4..d927588c4 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -2815,6 +2815,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* } b3AlignedObjectArray oldHostContacts; + if (oldContacts->size()) { oldContacts->copyToHost(oldHostContacts); @@ -2897,11 +2898,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, - // oldHostContacts); + //int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, + oldHostContacts); if (contactIndex>=0) @@ -2926,6 +2927,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* { contactOut->copyFromHost(hostContacts); + } else + { + contactOut->resize(0); } return; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp new file mode 100644 index 000000000..3fe85b187 --- /dev/null +++ b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp @@ -0,0 +1,1381 @@ + +#include "b3GpuJacobiContactSolver.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 +class b3Vector3; +#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h" +#include "Bullet3Common/b3Logging.h" +#include "b3GpuConstraint4.h" +#include "Bullet3Common/shared/b3Int2.h" +#include "Bullet3Common/shared/b3Int4.h" +#define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" + + +struct b3GpuJacobiSolverInternalData +{ + //btRadixSort32CL* m_sort32; + //btBoundSearchCL* m_search; + b3PrefixScanCL* m_scan; + + b3OpenCLArray* m_bodyCount; + b3OpenCLArray* m_contactConstraintOffsets; + b3OpenCLArray* m_offsetSplitBodies; + + b3OpenCLArray* m_deltaLinearVelocities; + b3OpenCLArray* m_deltaAngularVelocities; + + b3AlignedObjectArray m_deltaLinearVelocitiesCPU; + b3AlignedObjectArray m_deltaAngularVelocitiesCPU; + + + + b3OpenCLArray* m_contactConstraints; + + b3FillCL* m_filler; + + + cl_kernel m_countBodiesKernel; + cl_kernel m_contactToConstraintSplitKernel; + cl_kernel m_clearVelocitiesKernel; + cl_kernel m_averageVelocitiesKernel; + cl_kernel m_updateBodyVelocitiesKernel; + cl_kernel m_solveContactKernel; + cl_kernel m_solveFrictionKernel; + + + +}; + + +b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) + :m_context(ctx), + m_device(device), + m_queue(queue) +{ + m_data = new b3GpuJacobiSolverInternalData; + m_data->m_scan = new b3PrefixScanCL(m_context,m_device,m_queue); + m_data->m_bodyCount = new b3OpenCLArray(m_context,m_queue); + m_data->m_filler = new b3FillCL(m_context,m_device,m_queue); + m_data->m_contactConstraintOffsets = new b3OpenCLArray(m_context,m_queue); + m_data->m_offsetSplitBodies = new b3OpenCLArray(m_context,m_queue); + m_data->m_contactConstraints = new b3OpenCLArray(m_context,m_queue); + m_data->m_deltaLinearVelocities = new b3OpenCLArray(m_context,m_queue); + m_data->m_deltaAngularVelocities = new b3OpenCLArray(m_context,m_queue); + + cl_int pErrNum; + const char* additionalMacros=""; + const char* solverUtilsSource = solverUtilsCL; + { + cl_program solverUtilsProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverUtilsSource, &pErrNum,additionalMacros, SOLVER_UTILS_KERNEL_PATH); + b3Assert(solverUtilsProg); + m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_countBodiesKernel); + + m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_contactToConstraintSplitKernel); + m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_clearVelocitiesKernel); + + m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_averageVelocitiesKernel); + + m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_updateBodyVelocitiesKernel); + + + m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_solveContactKernel ); + + m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros ); + b3Assert(m_data->m_solveFrictionKernel); + } + +} + + +b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver() +{ + clReleaseKernel(m_data->m_solveContactKernel); + clReleaseKernel(m_data->m_solveFrictionKernel); + clReleaseKernel(m_data->m_countBodiesKernel); + clReleaseKernel(m_data->m_contactToConstraintSplitKernel); + clReleaseKernel(m_data->m_averageVelocitiesKernel); + clReleaseKernel(m_data->m_updateBodyVelocitiesKernel); + clReleaseKernel(m_data->m_clearVelocitiesKernel ); + + delete m_data->m_deltaLinearVelocities; + delete m_data->m_deltaAngularVelocities; + delete m_data->m_contactConstraints; + delete m_data->m_offsetSplitBodies; + delete m_data->m_contactConstraintOffsets; + delete m_data->m_bodyCount; + delete m_data->m_filler; + delete m_data->m_scan; + delete m_data; +} + + + +b3Vector3 make_float4(float v) +{ + return b3MakeVector3 (v,v,v); +} + +b3Vector4 make_float4(float x,float y, float z, float w) +{ + return b3MakeVector4 (x,y,z,w); +} + + + static + inline + float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, + const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) + { + return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); + } + + + static + inline + void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, + b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) + { + linear = n; + angular0 = b3Cross(r0, n); + angular1 = -b3Cross(r1, n); + } + + +static __inline void solveContact(b3GpuConstraint4& cs, + const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, + const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) +{ + + + for(int ic=0; ic<4; ic++) + { + // dont necessary because this makes change to 0 + if( cs.m_jacCoeffInv[ic] == 0.f ) continue; + + { + b3Vector3 angular0, angular1, linear; + b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; + b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; + setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 ); + + float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, + linVelARO+dLinVelA, angVelARO+dAngVelA, linVelBRO+dLinVelB, angVelBRO+dAngVelB ) + cs.m_b[ic]; + rambdaDt *= cs.m_jacCoeffInv[ic]; + + { + float prevSum = cs.m_appliedRambdaDt[ic]; + float updated = prevSum; + updated += rambdaDt; + updated = b3Max( updated, minRambdaDt[ic] ); + updated = b3Min( updated, maxRambdaDt[ic] ); + rambdaDt = updated - prevSum; + cs.m_appliedRambdaDt[ic] = updated; + } + + b3Vector3 linImp0 = invMassA*linear*rambdaDt; + b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; + b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; + b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; +#ifdef _WIN32 + b3Assert(_finite(linImp0.getX())); + b3Assert(_finite(linImp1.getX())); +#endif + + if (invMassA) + { + dLinVelA += linImp0; + dAngVelA += angImp0; + } + if (invMassB) + { + dLinVelB += linImp1; + dAngVelB += angImp1; + } + } + } +} + + + +void solveContact3(b3GpuConstraint4* cs, + b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA, + b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB, + b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB) +{ + float minRambdaDt = 0; + float maxRambdaDt = FLT_MAX; + + for(int ic=0; ic<4; ic++) + { + if( cs->m_jacCoeffInv[ic] == 0.f ) continue; + + b3Vector3 angular0, angular1, linear; + b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr; + b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr; + setLinearAndAngular( cs->m_linear, r0, r1, linear, angular0, angular1 ); + + float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, + *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; + rambdaDt *= cs->m_jacCoeffInv[ic]; + + { + float prevSum = cs->m_appliedRambdaDt[ic]; + float updated = prevSum; + updated += rambdaDt; + updated = b3Max( updated, minRambdaDt ); + updated = b3Min( updated, maxRambdaDt ); + rambdaDt = updated - prevSum; + cs->m_appliedRambdaDt[ic] = updated; + } + + b3Vector3 linImp0 = invMassA*linear*rambdaDt; + b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; + b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; + b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; + + if (invMassA) + { + *dLinVelA += linImp0; + *dAngVelA += angImp0; + } + if (invMassB) + { + *dLinVelB += linImp1; + *dAngVelB += angImp1; + } + } +} + + +static inline void solveFriction(b3GpuConstraint4& cs, + const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, + const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) +{ + + b3Vector3 linVelA = linVelARO+dLinVelA; + b3Vector3 linVelB = linVelBRO+dLinVelB; + b3Vector3 angVelA = angVelARO+dAngVelA; + b3Vector3 angVelB = angVelBRO+dAngVelB; + + if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return; + const b3Vector3& center = (const b3Vector3&)cs.m_center; + + b3Vector3 n = -(const b3Vector3&)cs.m_linear; + + b3Vector3 tangent[2]; +#if 1 + b3PlaneSpace1 (n, tangent[0],tangent[1]); +#else + b3Vector3 r = cs.m_worldPos[0]-center; + tangent[0] = cross3( n, r ); + tangent[1] = cross3( tangent[0], n ); + tangent[0] = normalize3( tangent[0] ); + tangent[1] = normalize3( tangent[1] ); +#endif + + b3Vector3 angular0, angular1, linear; + b3Vector3 r0 = center - posA; + b3Vector3 r1 = center - posB; + for(int i=0; i<2; i++) + { + setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 ); + float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB ); + rambdaDt *= cs.m_fJacCoeffInv[i]; + + { + float prevSum = cs.m_fAppliedRambdaDt[i]; + float updated = prevSum; + updated += rambdaDt; + updated = b3Max( updated, minRambdaDt[i] ); + updated = b3Min( updated, maxRambdaDt[i] ); + rambdaDt = updated - prevSum; + cs.m_fAppliedRambdaDt[i] = updated; + } + + b3Vector3 linImp0 = invMassA*linear*rambdaDt; + b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; + b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; + b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; +#ifdef _WIN32 + b3Assert(_finite(linImp0.getX())); + b3Assert(_finite(linImp1.getX())); +#endif + if (invMassA) + { + dLinVelA += linImp0; + dAngVelA += angImp0; + } + if (invMassB) + { + dLinVelB += linImp1; + dAngVelB += angImp1; + } + } + + { // angular damping for point constraint + b3Vector3 ab = ( posB - posA ).normalized(); + b3Vector3 ac = ( center - posA ).normalized(); + if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) + { + float angNA = b3Dot( n, angVelA ); + float angNB = b3Dot( n, angVelB ); + + if (invMassA) + dAngVelA -= (angNA*0.1f)*n; + if (invMassB) + dAngVelB -= (angNB*0.1f)*n; + } + } + +} + + + + +float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1, + float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB) +{ + // linear0,1 are normlized + float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; + + float jmj1 = b3Dot(mtMul3(angular0,*invInertia0), angular0); + float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; + float jmj3 = b3Dot(mtMul3(angular1,*invInertia1), angular1); + return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); +// return -1.f/((jmj0+jmj1)+(jmj2+jmj3)); + +} + + +void setConstraint4( const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, + const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, + b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB, + b3GpuConstraint4* dstC ) +{ + dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); + dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); + + float dtInv = 1.f/dt; + for(int ic=0; ic<4; ic++) + { + dstC->m_appliedRambdaDt[ic] = 0.f; + } + dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; + + + dstC->m_linear = src->m_worldNormalOnB; + dstC->m_linear[3] = 0.7f ;//src->getFrictionCoeff() ); + for(int ic=0; ic<4; ic++) + { + b3Vector3 r0 = src->m_worldPosB[ic] - posA; + b3Vector3 r1 = src->m_worldPosB[ic] - posB; + + if( ic >= src->m_worldNormalOnB[3] )//npoints + { + dstC->m_jacCoeffInv[ic] = 0.f; + continue; + } + + float relVelN; + { + b3Vector3 linear, angular0, angular1; + setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1); + + dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB); + + relVelN = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB); + + float e = 0.f;//src->getRestituitionCoeff(); + if( relVelN*relVelN < 0.004f ) + { + e = 0.f; + } + + dstC->m_b[ic] = e*relVelN; + //float penetration = src->m_worldPos[ic].w; + dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift)*positionConstraintCoeff*dtInv; + dstC->m_appliedRambdaDt[ic] = 0.f; + } + } + + if( src->m_worldNormalOnB[3] > 0 )//npoints + { // prepare friction + b3Vector3 center = make_float4(0.f); + for(int i=0; im_worldNormalOnB[3]; i++) + center += src->m_worldPosB[i]; + center /= (float)src->m_worldNormalOnB[3]; + + b3Vector3 tangent[2]; + b3PlaneSpace1(src->m_worldNormalOnB,tangent[0],tangent[1]); + + b3Vector3 r[2]; + r[0] = center - posA; + r[1] = center - posB; + + for(int i=0; i<2; i++) + { + b3Vector3 linear, angular0, angular1; + setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1); + + dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, + invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB); + dstC->m_fAppliedRambdaDt[i] = 0.f; + } + dstC->m_center = center; + } + + for(int i=0; i<4; i++) + { + if( im_worldNormalOnB[3] ) + { + dstC->m_worldPos[i] = src->m_worldPosB[i]; + } + else + { + dstC->m_worldPos[i] = make_float4(0.f); + } + } +} + + + +void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyCL* gBodies, b3InertiaCL* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts, +float dt, +float positionDrift, +float positionConstraintCoeff, int gIdx, b3AlignedObjectArray& bodyCount +) +{ + //int gIdx = 0;//GET_GLOBAL_IDX; + + if( gIdx < nContacts ) + { + int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); + int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); + + b3Vector3 posA = gBodies[aIdx].m_pos; + b3Vector3 linVelA = gBodies[aIdx].m_linVel; + b3Vector3 angVelA = gBodies[aIdx].m_angVel; + float invMassA = gBodies[aIdx].m_invMass; + b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld;//.m_invInertia; + + b3Vector3 posB = gBodies[bIdx].m_pos; + b3Vector3 linVelB = gBodies[bIdx].m_linVel; + b3Vector3 angVelB = gBodies[bIdx].m_angVel; + float invMassB = gBodies[bIdx].m_invMass; + b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;//m_invInertia; + + b3GpuConstraint4 cs; + float countA = invMassA ? (float)(bodyCount[aIdx]) : 1; + float countB = invMassB ? (float)(bodyCount[bIdx]) : 1; + setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, + &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, + &cs ); + + + + cs.m_batchIdx = gContact[gIdx].m_batchIdx; + + gConstraintOut[gIdx] = cs; + } +} + + +void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyCL* bodies,b3InertiaCL* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,const b3JacobiSolverInfo& solverInfo) +{ + B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); + + b3AlignedObjectArray bodyCount; + bodyCount.resize(numBodies); + for (int i=0;i contactConstraintOffsets; + contactConstraintOffsets.resize(numManifolds); + + + for (int i=0;i offsetSplitBodies; + offsetSplitBodies.resize(numBodies); + unsigned int totalNumSplitBodies; + m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodies,&totalNumSplitBodies); + int numlastBody = bodyCount[numBodies-1]; + totalNumSplitBodies += numlastBody; + printf("totalNumSplitBodies = %d\n",totalNumSplitBodies); + + + + + + b3AlignedObjectArray contactConstraints; + contactConstraints.resize(numManifolds); + + for (int i=0;i deltaLinearVelocities; + b3AlignedObjectArray deltaAngularVelocities; + deltaLinearVelocities.resize(totalNumSplitBodies); + deltaAngularVelocities.resize(totalNumSplitBodies); + for (int i=0;i* bodies,b3OpenCLArray* inertias,b3OpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo) +{ + b3JacobiSolverInfo solverInfo; + solverInfo.m_fixedBodyIndex = static0Index; + + + B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); + + //int numBodies = bodies->size(); + int numManifolds = numContacts;//manifoldPtr->size(); + + { + B3_PROFILE("resize"); + m_data->m_bodyCount->resize(numBodies); + } + + unsigned int val=0; + b3Int2 val2; + val2.x=0; + val2.y=0; + + { + B3_PROFILE("m_filler"); + m_data->m_contactConstraintOffsets->resize(numManifolds); + m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); + + + m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); + } + + { + B3_PROFILE("m_countBodiesKernel"); + b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); + launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setConst(numManifolds); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.launch1D(numManifolds); + } + unsigned int totalNumSplitBodies=0; + { + B3_PROFILE("m_scan->execute"); + + m_data->m_offsetSplitBodies->resize(numBodies); + m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); + totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); + } + + { + B3_PROFILE("m_data->m_contactConstraints->resize"); + //int numContacts = manifoldPtr->size(); + m_data->m_contactConstraints->resize(numContacts); + } + + { + B3_PROFILE("contactToConstraintSplitKernel"); + b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); + launcher.setBuffer(contactBuf); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(inertiaBuf); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setConst(numContacts); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.launch1D( numContacts, 64 ); + + } + + + { + B3_PROFILE("m_data->m_deltaLinearVelocities->resize"); + m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); + m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); + } + + + + { + B3_PROFILE("m_clearVelocitiesKernel"); + b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); + launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launch.setConst(totalNumSplitBodies); + launch.launch1D(totalNumSplitBodies); + } + + + int maxIter = solverInfo.m_numIterations; + + for (int iter = 0;iterm_solveContactKernel ); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(inertiaBuf); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + + } + + + + { + B3_PROFILE("average velocities"); + b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + + } + + + { + B3_PROFILE("m_solveFrictionKernel"); + b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(inertiaBuf); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + + } + + + { + B3_PROFILE("average velocities"); + b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + + } + + + + } + + + { + B3_PROFILE("update body velocities"); + b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel); + launcher.setBuffer(bodyBuf); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + + +} + +#if 0 + +void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray* bodiesGPU,b3OpenCLArray* inertiasGPU,b3OpenCLArray* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo) +{ + + b3AlignedObjectArray bodiesCPU; + bodiesGPU->copyToHost(bodiesCPU); + b3AlignedObjectArray inertiasCPU; + inertiasGPU->copyToHost(inertiasCPU); + b3AlignedObjectArray manifoldPtrCPU; + manifoldPtrGPU->copyToHost(manifoldPtrCPU); + + int numBodiesCPU = bodiesGPU->size(); + int numManifoldsCPU = manifoldPtrGPU->size(); + B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed"); + + b3AlignedObjectArray bodyCount; + bodyCount.resize(numBodiesCPU); + for (int i=0;i contactConstraintOffsets; + contactConstraintOffsets.resize(numManifoldsCPU); + + + for (int i=0;i offsetSplitBodies; + offsetSplitBodies.resize(numBodiesCPU); + unsigned int totalNumSplitBodiesCPU; + m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU); + int numlastBody = bodyCount[numBodiesCPU-1]; + totalNumSplitBodiesCPU += numlastBody; + + int numBodies = bodiesGPU->size(); + int numManifolds = manifoldPtrGPU->size(); + + m_data->m_bodyCount->resize(numBodies); + + unsigned int val=0; + b3Int2 val2; + val2.x=0; + val2.y=0; + + { + B3_PROFILE("m_filler"); + m_data->m_contactConstraintOffsets->resize(numManifolds); + m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); + + + m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); + } + + { + B3_PROFILE("m_countBodiesKernel"); + b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); + launcher.setBuffer(manifoldPtrGPU->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setConst(numManifolds); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.launch1D(numManifolds); + } + + unsigned int totalNumSplitBodies=0; + m_data->m_offsetSplitBodies->resize(numBodies); + m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); + totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); + + if (totalNumSplitBodies != totalNumSplitBodiesCPU) + { + printf("error in totalNumSplitBodies!\n"); + } + + int numContacts = manifoldPtrGPU->size(); + m_data->m_contactConstraints->resize(numContacts); + + + { + B3_PROFILE("contactToConstraintSplitKernel"); + b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); + launcher.setBuffer(manifoldPtrGPU->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setConst(numContacts); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.launch1D( numContacts, 64 ); + clFinish(m_queue); + } + + + + b3AlignedObjectArray contactConstraints; + contactConstraints.resize(numManifoldsCPU); + + for (int i=0;i deltaLinearVelocities; + b3AlignedObjectArray deltaAngularVelocities; + deltaLinearVelocities.resize(totalNumSplitBodiesCPU); + deltaAngularVelocities.resize(totalNumSplitBodiesCPU); + for (int i=0;im_deltaLinearVelocities->resize(totalNumSplitBodies); + m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); + + + + { + B3_PROFILE("m_clearVelocitiesKernel"); + b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); + launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launch.setConst(totalNumSplitBodies); + launch.launch1D(totalNumSplitBodies); + } + + + ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + m_data->m_contactConstraints->copyToHost(contactConstraints); + m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); + m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + + for (int iter = 0;iterm_solveContactKernel ); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + clFinish(m_queue); + } + + + int i=0; + for( i=0; im_averageVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + //easy + for (int i=0;im_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); + //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + +#if 0 + + { + B3_PROFILE("m_solveFrictionKernel"); + b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + clFinish(m_queue); + } + + //solve friction + + for(int i=0; im_averageVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + //easy + for (int i=0;im_updateBodyVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + + //easy + for (int i=0;icopyFromHost(bodiesCPU); + + +} +#endif diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h new file mode 100644 index 000000000..3cb384d6a --- /dev/null +++ b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h @@ -0,0 +1,62 @@ + +#ifndef B3_GPU_JACOBI_CONTACT_SOLVER_H +#define B3_GPU_JACOBI_CONTACT_SOLVER_H +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" +//#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" + + +//struct b3InertiaCL; +//b3InertiaData + +class b3TypedConstraint; + +struct b3JacobiSolverInfo +{ + int m_fixedBodyIndex; + + float m_deltaTime; + float m_positionDrift; + float m_positionConstraintCoeff; + int m_numIterations; + + b3JacobiSolverInfo() + :m_fixedBodyIndex(0), + m_deltaTime(1./60.f), + m_positionDrift( 0.005f ), + m_positionConstraintCoeff( 0.99f ), + m_numIterations(7) + { + } +}; +class b3GpuJacobiContactSolver +{ +protected: + + struct b3GpuJacobiSolverInternalData* m_data; + + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + +public: + + b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); + virtual ~b3GpuJacobiContactSolver(); + + + void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); + void solveGroupHost(b3RigidBodyCL* bodies,b3InertiaCL* inertias,int numBodies,struct b3Contact4* manifoldPtr, int numManifolds,const b3JacobiSolverInfo& solverInfo); + //void solveGroupHost(btRigidBodyCL* bodies,b3InertiaData* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); + + //b3Scalar solveGroup(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias, int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); + + //void solveGroup(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); + //void solveGroupMixed(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); + +}; +#endif //B3_GPU_JACOBI_CONTACT_SOLVER_H + diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp similarity index 96% rename from src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp rename to src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp index 50c7250e2..95ce234cb 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp @@ -22,7 +22,7 @@ bool useGpuSolveJointConstraintRows=true; bool useGpuWriteBackVelocities = true; bool gpuBreakConstraints = true; -#include "b3GpuPgsJacobiSolver.h" +#include "b3GpuPgsConstraintSolver.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" @@ -119,7 +119,7 @@ b3Vector3 getVelocityInLocalPoint(b3RigidBodyCL* rb, const b3Vector3& rel_pos) -b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs) +b3GpuPgsConstraintSolver::b3GpuPgsConstraintSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs) { m_usePgs = usePgs; m_gpuData = new b3GpuPgsJacobiSolverInternalData(); @@ -165,7 +165,7 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, } -b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () +b3GpuPgsConstraintSolver::~b3GpuPgsConstraintSolver () { clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels); clReleaseKernel(m_gpuData->m_initSolverBodiesKernel); @@ -196,7 +196,7 @@ struct b3BatchConstraint static b3AlignedObjectArray batchConstraints; static b3AlignedObjectArray batches; -void b3GpuPgsJacobiSolver::recomputeBatches() +void b3GpuPgsConstraintSolver::recomputeBatches() { batches.clear(); } @@ -204,7 +204,7 @@ void b3GpuPgsJacobiSolver::recomputeBatches() -b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) +b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("GPU solveGroupCacheFriendlySetup"); batchConstraints.resize(numConstraints); @@ -655,7 +655,7 @@ void resolveSingleConstraintRowGeneric2( b3GpuSolverBody* body1, b3GpuSolverBod -void b3GpuPgsJacobiSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyCL* rb) +void b3GpuPgsConstraintSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyCL* rb) { solverBody->m_deltaLinearVelocity.setValue(0.f,0.f,0.f); @@ -674,12 +674,12 @@ void b3GpuPgsJacobiSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solver } -void b3GpuPgsJacobiSolver::averageVelocities() +void b3GpuPgsConstraintSolver::averageVelocities() { } -b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal) +b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal) { //only create the batches once. //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated @@ -856,7 +856,7 @@ static b3AlignedObjectArray curUsed; -inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) +inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { int sz = sizeof(b3BatchConstraint); @@ -991,7 +991,7 @@ inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, /// b3PgsJacobiSolver Sequentially applies impulses -b3Scalar b3GpuPgsJacobiSolver::solveGroup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, +b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints,int numConstraints, const b3ContactSolverInfo& infoGlobal) { @@ -1007,7 +1007,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroup(b3OpenCLArray* gpuBodie return 0.f; } -void b3GpuPgsJacobiSolver::solveJoints(int numBodies, b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, +void b3GpuPgsConstraintSolver::solveJoints(int numBodies, b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numConstraints, b3OpenCLArray* gpuConstraints) { b3ContactSolverInfo infoGlobal; @@ -1029,7 +1029,7 @@ void b3GpuPgsJacobiSolver::solveJoints(int numBodies, b3OpenCLArray testBodies; -b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) +b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("solveGroupCacheFriendlyFinish"); int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h similarity index 92% rename from src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h rename to src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h index ca73a48eb..fef4b6003 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h @@ -13,8 +13,8 @@ subject to the following restrictions: */ //Originally written by Erwin Coumans -#ifndef B3_GPU_PGS_JACOBI_SOLVER_H -#define B3_GPU_PGS_JACOBI_SOLVER_H +#ifndef B3_GPU_PGS_CONSTRAINT_SOLVER_H +#define B3_GPU_PGS_CONSTRAINT_SOLVER_H struct b3Contact4; struct b3ContactPoint; @@ -33,7 +33,7 @@ struct b3InertiaCL; #include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" #include "b3GpuGenericConstraint.h" -class b3GpuPgsJacobiSolver +class b3GpuPgsConstraintSolver { protected: int m_staticIdx; @@ -59,8 +59,8 @@ protected: void initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyCL* rb); public: - b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs); - virtual~b3GpuPgsJacobiSolver (); + b3GpuPgsConstraintSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs); + virtual~b3GpuPgsConstraintSolver (); virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal); virtual b3Scalar solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); @@ -75,4 +75,4 @@ public: void recomputeBatches(); }; -#endif //B3_GPU_PGS_JACOBI_SOLVER_H +#endif //B3_GPU_PGS_CONSTRAINT_SOLVER_H diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp similarity index 97% rename from src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp rename to src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp index 439bbcca5..5c49f605c 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp @@ -28,7 +28,7 @@ #endif -#include "b3GpuBatchingPgsSolver.h" +#include "b3GpuPgsContactSolver.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" @@ -111,7 +111,7 @@ struct b3GpuBatchingPgsSolverInternalData -b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity) +b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity) { m_debugOutput=0; m_data = new b3GpuBatchingPgsSolverInternalData; @@ -237,7 +237,7 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic } -b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver() +b3GpuPgsContactSolver::~b3GpuPgsContactSolver() { delete m_data->m_bodyBufferGPU; delete m_data->m_inertiaBufferGPU; @@ -296,7 +296,7 @@ struct b3ConstraintCfg -void b3GpuBatchingPgsSolver::solveContactConstraint( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, +void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, b3OpenCLArray* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations) { @@ -589,7 +589,7 @@ void SetSortDataCPU(b3Contact4* gContact, b3RigidBodyCL* gBodies, b3SortData* gS -void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) +void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) { B3_PROFILE("solveContacts"); m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); @@ -1120,7 +1120,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } -void b3GpuBatchingPgsSolver::batchContacts( b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* n, b3OpenCLArray* offsets, int staticIdx ) +void b3GpuPgsContactSolver::batchContacts( b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* n, b3OpenCLArray* offsets, int staticIdx ) { } @@ -1139,7 +1139,7 @@ b3AlignedObjectArray sortData; b3AlignedObjectArray old; -inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) +inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch"); @@ -1267,7 +1267,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, b3AlignedObjectArray bodyUsed2; -inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) +inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch2"); @@ -1426,7 +1426,7 @@ b3AlignedObjectArray bodyUsed; b3AlignedObjectArray curUsed; -inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) +inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch3"); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h similarity index 89% rename from src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h rename to src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h index 8379588f3..521a3d2ab 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h @@ -8,7 +8,7 @@ #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" #include "b3GpuConstraint4.h" -class b3GpuBatchingPgsSolver +class b3GpuPgsContactSolver { protected: @@ -29,8 +29,8 @@ protected: public: - b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity); - virtual ~b3GpuBatchingPgsSolver(); + b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity); + virtual ~b3GpuPgsContactSolver(); void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index fcd95ae07..145fbf22a 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -33,6 +33,9 @@ subject to the following restrictions: #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" +//choice of contact solver +bool useJacobi = false; + //#define USE_CPU #ifdef USE_CPU bool useDbvt = true; @@ -52,15 +55,16 @@ subject to the following restrictions: #endif +#define TEST_OTHER_GPU_SOLVER 1 #ifdef TEST_OTHER_GPU_SOLVER -#include "b3GpuJacobiSolver.h" +#include "b3GpuJacobiContactSolver.h" #endif //TEST_OTHER_GPU_SOLVER #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h" +#include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h" -#include "b3GpuBatchingPgsSolver.h" +#include "b3GpuPgsContactSolver.h" #include "b3Solver.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" @@ -80,17 +84,17 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic m_data->m_queue = q; m_data->m_solver = new b3PgsJacobiSolver(true);//new b3PgsJacobiSolver(true); - m_data->m_gpuSolver = new b3GpuPgsJacobiSolver(ctx,device,q,true);//new b3PgsJacobiSolver(true); + m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx,device,q,true);//new b3PgsJacobiSolver(true); m_data->m_allAabbsGPU = new b3OpenCLArray(ctx,q,config.m_maxConvexBodies); m_data->m_overlappingPairsGPU = new b3OpenCLArray(ctx,q,config.m_maxBroadphasePairs); m_data->m_gpuConstraints = new b3OpenCLArray(ctx,q); #ifdef TEST_OTHER_GPU_SOLVER - m_data->m_solver3 = new b3GpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs); + m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx,device,q,config.m_maxBroadphasePairs); #endif // TEST_OTHER_GPU_SOLVER - m_data->m_solver2 = new b3GpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs); + m_data->m_solver2 = new b3GpuPgsContactSolver(ctx,device,q,config.m_maxBroadphasePairs); m_data->m_raycaster = new b3GpuRaycast(ctx,device,q); @@ -364,11 +368,13 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) { #ifdef TEST_OTHER_GPU_SOLVER + if (useJacobi) { bool useGpu = true; if (useGpu) { + bool forceHost = false; if (forceHost) { @@ -385,7 +391,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) { b3JacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo); + m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),solverInfo); } @@ -394,9 +400,14 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) gpuBodies.copyFromHost(hostBodies); } } else + + { + int static0Index = m_data->m_narrowphase->getStatic0Index(); b3JacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo); + //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo); + //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); + m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),m_data->m_config, static0Index); } } else { @@ -407,7 +418,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) b3AlignedObjectArray hostContacts; gpuContacts.copyToHost(hostContacts); { - m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); + //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); } gpuBodies.copyFromHost(hostBodies); } diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h index 533c94ce9..2baaaaf55 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h @@ -44,10 +44,10 @@ struct b3GpuRigidBodyPipelineInternalData class b3PgsJacobiSolver* m_solver; - class b3GpuPgsJacobiSolver* m_gpuSolver; + class b3GpuPgsConstraintSolver* m_gpuSolver; - class b3GpuBatchingPgsSolver* m_solver2; - class b3GpuJacobiSolver* m_solver3; + class b3GpuPgsContactSolver* m_solver2; + class b3GpuJacobiContactSolver* m_solver3; class b3GpuRaycast* m_raycaster; class b3GpuBroadphaseInterface* m_broadphaseSap; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h b/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h index 89074b2e4..f2a61801a 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h @@ -17,7 +17,7 @@ subject to the following restrictions: #ifndef B3_GPU_SOLVER_BODY_H #define B3_GPU_SOLVER_BODY_H -class b3RigidBody; + #include "Bullet3Common/b3Vector3.h" #include "Bullet3Common/b3Matrix3x3.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h b/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h index bf6309b61..60d235baa 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h @@ -17,7 +17,7 @@ subject to the following restrictions: #ifndef B3_GPU_SOLVER_CONSTRAINT_H #define B3_GPU_SOLVER_CONSTRAINT_H -class b3RigidBody; + #include "Bullet3Common/b3Vector3.h" #include "Bullet3Common/b3Matrix3x3.h" //#include "b3JacobianEntry.h" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl index 266e2cdfd..9c4ac4ce8 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl @@ -358,6 +358,8 @@ typedef struct float m_frictionCoeff; } Body; + + typedef struct { Matrix3x3 m_invInertia; @@ -385,6 +387,8 @@ typedef struct + + __kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) { int i = GET_GLOBAL_IDX; @@ -527,7 +531,7 @@ void solveContact(__global Constraint4* cs, float4 angular0, angular1, linear; float4 r0 = cs->m_worldPos[ic] - posA; float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); + setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index c0263f4fb..2476d1cab 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -506,7 +506,7 @@ static const char* solverUtilsCL= \ " float4 angular0, angular1, linear;\n" " float4 r0 = cs->m_worldPos[ic] - posA;\n" " float4 r1 = cs->m_worldPos[ic] - posB;\n" -" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" +" setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" " \n" " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n"