diff --git a/demo/gpudemo/GpuDemo.h b/demo/gpudemo/GpuDemo.h index e65296d39..ae36bbe94 100644 --- a/demo/gpudemo/GpuDemo.h +++ b/demo/gpudemo/GpuDemo.h @@ -37,9 +37,9 @@ public: :useOpenCL(true), preferredOpenCLPlatformIndex(-1), preferredOpenCLDeviceIndex(-1), - arraySizeX(41), - arraySizeY(44), - arraySizeZ(41), + arraySizeX(1), + arraySizeY(3), + arraySizeZ(1), m_useConcaveMesh(false), gapX(14.3), gapY(14.0), diff --git a/demo/gpudemo/rigidbody/GpuConvexScene.cpp b/demo/gpudemo/rigidbody/GpuConvexScene.cpp index 9767e51d1..9bc24d656 100644 --- a/demo/gpudemo/rigidbody/GpuConvexScene.cpp +++ b/demo/gpudemo/rigidbody/GpuConvexScene.cpp @@ -63,7 +63,7 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci) { for (int k=0;ksetCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(120); + m_instancingRenderer->setCameraDistance(20); } \ No newline at end of file diff --git a/opencl/gpu_rigidbody/host/btGpuConstraint4.h b/opencl/gpu_rigidbody/host/btGpuConstraint4.h index c00f0b23b..7095218b7 100644 --- a/opencl/gpu_rigidbody/host/btGpuConstraint4.h +++ b/opencl/gpu_rigidbody/host/btGpuConstraint4.h @@ -13,15 +13,13 @@ ATTRIBUTE_ALIGNED16(struct) btGpuConstraint4 float m_jacCoeffInv[4]; float m_b[4]; float m_appliedRambdaDt[4]; - float m_fJacCoeffInv[2]; // friction float m_fAppliedRambdaDt[2]; // friction unsigned int m_bodyA; unsigned int m_bodyB; - - unsigned int m_batchIdx; - unsigned int m_paddings[1]; + int m_batchIdx; + unsigned int m_paddings; inline void setFrictionCoeff(float value) { m_linear[3] = value; } inline float getFrictionCoeff() const { return m_linear[3]; } diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp index e6e026a24..f35584e71 100644 --- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp +++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp @@ -38,7 +38,7 @@ struct btGpuJacobiSolverInternalData cl_kernel m_countBodiesKernel; cl_kernel m_contactToConstraintSplitKernel; cl_kernel m_clearVelocitiesKernel; - + cl_kernel m_averageVelocitiesKernel; cl_kernel m_solveContactKernel; cl_kernel m_solveFrictionKernel; @@ -74,6 +74,12 @@ btGpuJacobiSolver::btGpuJacobiSolver(cl_context ctx, cl_device_id device, cl_com btAssert(m_data->m_contactToConstraintSplitKernel); m_data->m_clearVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); btAssert(m_data->m_clearVelocitiesKernel); + + m_data->m_averageVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); + btAssert(m_data->m_averageVelocitiesKernel); + + + m_data->m_solveContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros ); btAssert(m_data->m_solveContactKernel ); @@ -90,6 +96,7 @@ btGpuJacobiSolver::~btGpuJacobiSolver() clReleaseKernel(m_data->m_solveFrictionKernel); clReleaseKernel(m_data->m_countBodiesKernel); clReleaseKernel(m_data->m_contactToConstraintSplitKernel); + clReleaseKernel(m_data->m_averageVelocitiesKernel); clReleaseKernel(m_data->m_clearVelocitiesKernel ); delete m_data->m_deltaLinearVelocities; @@ -192,6 +199,54 @@ static __inline void solveContact(btGpuConstraint4& cs, +void solveContact3(btGpuConstraint4* cs, + btVector3* posAPtr, btVector3* linVelA, btVector3* angVelA, float invMassA, const btMatrix3x3& invInertiaA, + btVector3* posBPtr, btVector3* linVelB, btVector3* angVelB, float invMassB, const btMatrix3x3& invInertiaB, + btVector3* dLinVelA, btVector3* dAngVelA, btVector3* dLinVelB, btVector3* dAngVelB) +{ + float minRambdaDt = 0; + float maxRambdaDt = FLT_MAX; + + for(int ic=0; ic<4; ic++) + { + if( cs->m_jacCoeffInv[ic] == 0.f ) continue; + + btVector3 angular0, angular1, linear; + btVector3 r0 = cs->m_worldPos[ic] - *posAPtr; + btVector3 r1 = cs->m_worldPos[ic] - *posBPtr; + setLinearAndAngular( -cs->m_linear, r0, r1, linear, angular0, angular1 ); + + float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, + *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; + rambdaDt *= cs->m_jacCoeffInv[ic]; + + { + float prevSum = cs->m_appliedRambdaDt[ic]; + float updated = prevSum; + updated += rambdaDt; + updated = btMax( updated, minRambdaDt ); + updated = btMin( updated, maxRambdaDt ); + rambdaDt = updated - prevSum; + cs->m_appliedRambdaDt[ic] = updated; + } + + btVector3 linImp0 = invMassA*linear*rambdaDt; + btVector3 linImp1 = invMassB*(-linear)*rambdaDt; + btVector3 angImp0 = (invInertiaA* angular0)*rambdaDt; + btVector3 angImp1 = (invInertiaB* angular1)*rambdaDt; + + if (invMassA) + { + *dLinVelA += linImp0; + *dAngVelA += angImp0; + } + if (invMassB) + { + *dLinVelB += linImp1; + *dAngVelB += angImp1; + } + } +} static inline void solveFriction(btGpuConstraint4& cs, @@ -303,6 +358,8 @@ float calcJacCoeff(const btVector3& linear0, const btVector3& linear1, const btV float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; float jmj3 = btDot(mtMul3(angular1,*invInertia1), angular1); return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); +// return -1.f/((jmj0+jmj1)+(jmj2+jmj3)); + } @@ -426,8 +483,8 @@ float positionConstraintCoeff, int gIdx, btAlignedObjectArray& bod btMatrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;//m_invInertia; btGpuConstraint4 cs; - float countA = invMassA ? btScalar(bodyCount[aIdx]) : 1; - float countB = invMassB ? btScalar(bodyCount[bIdx]) : 1; + float countA = invMassA ? (float)(bodyCount[aIdx]) : 1; + float countB = invMassB ? (float)(bodyCount[bIdx]) : 1; setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, &cs ); @@ -499,7 +556,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti solverInfo.m_positionConstraintCoeff, i, bodyCount); } - int maxIter = 14; + int maxIter = 1; btAlignedObjectArray deltaLinearVelocities; @@ -592,7 +649,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti } - +#if 0 //solve friction @@ -674,7 +731,7 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti } } - +#endif } @@ -697,6 +754,422 @@ void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inerti } +void btGpuJacobiSolver::solveGroupMixedHost(btRigidBodyCL* bodiesCPU,btInertiaCL* inertiasCPU,int numBodies,btContact4* manifoldPtrCpu, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo) +{ + BT_PROFILE("btGpuJacobiSolver::solveGroup"); + + btAlignedObjectArray bodyCount; + + + + bool useHost = true; + btAlignedObjectArray contactConstraintOffsets; + btOpenCLArray manifoldGPU(m_context,m_queue); + manifoldGPU.resize(numManifolds); + manifoldGPU.copyFromHostPointer(manifoldPtrCpu,numManifolds); + + btAlignedObjectArray offsetSplitBodies; + unsigned int totalNumSplitBodies; + + btAlignedObjectArray contactConstraints; + contactConstraints.resize(numManifolds); + + + btOpenCLArray bodiesGPU(m_context,m_queue); + bodiesGPU.resize(numBodies); + + btOpenCLArray inertiasGPU(m_context,m_queue); + inertiasGPU.resize(numBodies); + + + if (useHost) + { + bodyCount.resize(numBodies); + for (int i=0;im_scan->executeHost(bodyCount,offsetSplitBodies,numBodies,&totalNumSplitBodies); + int numlastBody = bodyCount[numBodies-1]; + totalNumSplitBodies += numlastBody; + for (int i=0;isize(); + // int numManifolds = manifoldPtr->size(); + + m_data->m_bodyCount->resize(numBodies); + + unsigned int val=0; + btInt2 val2; + val2.x=0; + val2.y=0; + + { + BT_PROFILE("m_filler"); + m_data->m_contactConstraintOffsets->resize(numManifolds); + m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); + + + m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); + } + + { + BT_PROFILE("m_countBodiesKernel"); + btLauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); + launcher.setBuffer(manifoldGPU.getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setConst(numManifolds); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.launch1D(numManifolds); + } + m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); + m_data->m_bodyCount->copyToHost(bodyCount); + +// unsigned int totalNumSplitBodies=0; + m_data->m_offsetSplitBodies->resize(numBodies); + m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); + totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); + m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); + + int numContacts = manifoldGPU.size(); + m_data->m_contactConstraints->resize(numContacts); + + bodiesGPU.copyFromHostPointer(bodiesCPU,numBodies); + inertiasGPU.copyFromHostPointer(inertiasCPU,numBodies); + { + BT_PROFILE("contactToConstraintSplitKernel"); + btLauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); + launcher.setBuffer(manifoldGPU.getBufferCL()); + launcher.setBuffer(bodiesGPU.getBufferCL()); + launcher.setBuffer(inertiasGPU.getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setConst(numContacts); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.launch1D( numContacts, 64 ); + clFinish(m_queue); + } + + m_data->m_contactConstraints->copyToHost(contactConstraints); + + } + + + + + + + + + int maxIter = 1; + + + btAlignedObjectArray deltaLinearVelocities; + btAlignedObjectArray deltaAngularVelocities; + deltaLinearVelocities.resize(totalNumSplitBodies); + deltaAngularVelocities.resize(totalNumSplitBodies); + for (int i=0;im_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); + + + for (int iter = 0;iterm_solveContactKernel ); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodiesGPU.getBufferCL()); + launcher.setBuffer(inertiasGPU.getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + clFinish(m_queue); + } + + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + + + } + + bool useHostAverage=false; + if (useHostAverage) + { + //easy + for (int i=0;im_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); + + BT_PROFILE("average velocities"); + //__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, + //__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) + btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + launcher.setBuffer(bodiesGPU.getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + + } + +#if 0 + + //solve friction + + for(int i=0; i* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo) { @@ -774,11 +1247,17 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray* bodies,btOpenC launch.launch1D(totalNumSplitBodies); } - int maxIter = 4; + int maxIter = 14; for (int iter = 0;iterm_solveContactKernel ); launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); @@ -793,10 +1272,25 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray* bodies,btOpenC launcher.setConst(solverInfo.m_positionConstraintCoeff); launcher.setConst(solverInfo.m_fixedBodyIndex); launcher.setConst(numManifolds); + launcher.launch1D(numManifolds); clFinish(m_queue); } + { + BT_PROFILE("average velocities"); + //__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, + //__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) + btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + launcher.setBuffer(bodies->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } /* for(int i=0; i* bodies,btOpenC } + btAlignedObjectArray dLinVel; + btAlignedObjectArray dAngVel; + m_data->m_deltaLinearVelocities->copyToHost(dLinVel); + m_data->m_deltaAngularVelocities->copyToHost(dAngVel); + + btAlignedObjectArray bodiesCPU; + bodies->copyToHost(bodiesCPU); + btAlignedObjectArray bodyCountCPU; + m_data->m_bodyCount->copyToHost(bodyCountCPU); + btAlignedObjectArray offsetSplitBodiesCPU; + m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodiesCPU); + + for (int i=0;icopyFromHost(bodiesCPU); + + printf("."); /* //easy diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h index c1bcc429f..a38a33d13 100644 --- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h +++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h @@ -45,6 +45,7 @@ public: void solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); void solveGroup(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); + void solveGroupMixedHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); }; #endif //BT_GPU_JACOBI_SOLVER_H diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp index 98faf4f34..3a231ca96 100644 --- a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp +++ b/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp @@ -117,7 +117,7 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime) bool useGpu = true; if (useGpu) { - bool forceHost = false; + bool forceHost = true; if (forceHost) { btAlignedObjectArray hostBodies; @@ -133,7 +133,10 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime) { btJacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo); +// m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo); + m_data->m_solver3->solveGroupMixedHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),0,0,solverInfo); + + } { BT_PROFILE("copyFromHost"); diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.cl b/opencl/gpu_rigidbody/kernels/solverUtils.cl index 5fe68a5e2..514970a9d 100644 --- a/opencl/gpu_rigidbody/kernels/solverUtils.cl +++ b/opencl/gpu_rigidbody/kernels/solverUtils.cl @@ -371,9 +371,8 @@ typedef struct u32 m_bodyA; u32 m_bodyB; - int m_batchIdx; - u32 m_paddings[1]; + u32 m_paddings; } Constraint4; typedef struct @@ -424,6 +423,37 @@ __kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global f } +__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, +__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) +{ + int i = GET_GLOBAL_IDX; + if (im_linear, r0, r1, &linear, &angular0, &angular1 ); float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic]; + *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; rambdaDt *= cs->m_jacCoeffInv[ic]; { @@ -520,16 +550,25 @@ void solveContact(__global Constraint4* cs, float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - *linVelA += linImp0; - *angVelA += angImp0; - *linVelB += linImp1; - *angVelB += angImp1; + if (invMassA) + { + *dLinVelA += linImp0; + *dAngVelA += angImp0; + } + if (invMassB) + { + *dLinVelB += linImp1; + *dAngVelB += angImp1; + } } } +// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); + + void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, -__global int2* contactConstraintOffsets,__global int* offsetSplitBodies, +__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) { @@ -585,7 +624,7 @@ __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) deltaLinearVelocities[splitIndexA] = dLinVelA; deltaAngularVelocities[splitIndexA] = dAngVelA; } - if (gBodies[bIdx].m_invMass) + if (invMassB) { deltaLinearVelocities[splitIndexB] = dLinVelB; deltaAngularVelocities[splitIndexB] = dAngVelB; @@ -595,7 +634,7 @@ __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) __kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , -__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, +__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds ) { @@ -615,7 +654,7 @@ __kernel void SolveFrictionJacobiKernel() void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, + __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, Constraint4* dstC ) { dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); @@ -648,7 +687,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1); dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ); + invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB); relVelN = calcRelVel(linear, -linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB); @@ -683,7 +722,7 @@ void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVe setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ); + invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB); dstC->m_fAppliedRambdaDt[i] = 0.f; } dstC->m_center = center; @@ -734,9 +773,12 @@ float positionConstraintCoeff Constraint4 cs; + float countA = invMassA ? (float)bodyCount[aIdx] : 1; + float countB = invMassB ? (float)bodyCount[bIdx] : 1; + setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, - &cs ); + &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, + &cs ); cs.m_batchIdx = gContact[gIdx].m_batchIdx; diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.h b/opencl/gpu_rigidbody/kernels/solverUtils.h index c7ea815de..732da3ec3 100644 --- a/opencl/gpu_rigidbody/kernels/solverUtils.h +++ b/opencl/gpu_rigidbody/kernels/solverUtils.h @@ -373,9 +373,8 @@ static const char* solverUtilsCL= \ "\n" " u32 m_bodyA;\n" " u32 m_bodyB;\n" -"\n" " int m_batchIdx;\n" -" u32 m_paddings[1];\n" +" u32 m_paddings;\n" "} Constraint4;\n" "\n" "typedef struct\n" @@ -426,6 +425,37 @@ static const char* solverUtilsCL= \ "}\n" "\n" "\n" +"__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" +"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" +"{\n" +" int i = GET_GLOBAL_IDX;\n" +" if (im_linear, r0, r1, &linear, &angular0, &angular1 );\n" "\n" " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" -" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" +" *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" " rambdaDt *= cs->m_jacCoeffInv[ic];\n" "\n" " {\n" @@ -522,16 +552,25 @@ static const char* solverUtilsCL= \ " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" "\n" -" *linVelA += linImp0;\n" -" *angVelA += angImp0;\n" -" *linVelB += linImp1;\n" -" *angVelB += angImp1;\n" +" if (invMassA)\n" +" {\n" +" *dLinVelA += linImp0;\n" +" *dAngVelA += angImp0;\n" +" }\n" +" if (invMassB)\n" +" {\n" +" *dLinVelB += linImp1;\n" +" *dAngVelB += angImp1;\n" +" }\n" " }\n" "}\n" "\n" "\n" +"// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" +"\n" +"\n" "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" -"__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,\n" +"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" "{\n" "\n" @@ -587,7 +626,7 @@ static const char* solverUtilsCL= \ " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" " } \n" -" if (gBodies[bIdx].m_invMass)\n" +" if (invMassB)\n" " {\n" " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" @@ -597,7 +636,7 @@ static const char* solverUtilsCL= \ "\n" "\n" "__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" -"__global int2* contactConstraintOffsets,__global int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" +"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" "float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" ")\n" "{\n" @@ -617,7 +656,7 @@ static const char* solverUtilsCL= \ "\n" "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,\n" +" __global Contact4* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" " Constraint4* dstC )\n" "{\n" " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" @@ -650,7 +689,7 @@ static const char* solverUtilsCL= \ " setLinearAndAngular(src->m_worldNormal, r0, r1, &linear, &angular0, &angular1);\n" "\n" " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" "\n" " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" " linVelA, angVelA, linVelB, angVelB);\n" @@ -685,7 +724,7 @@ static const char* solverUtilsCL= \ " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" "\n" " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" +" invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" " }\n" " dstC->m_center = center;\n" @@ -736,9 +775,12 @@ static const char* solverUtilsCL= \ "\n" " Constraint4 cs;\n" "\n" +" float countA = invMassA ? (float)bodyCount[aIdx] : 1;\n" +" float countB = invMassB ? (float)bodyCount[bIdx] : 1;\n" +"\n" " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" -" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" -" &cs );\n" +" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" +" &cs );\n" " \n" " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" "\n"