diff --git a/demo/gpudemo/GpuDemo.h b/demo/gpudemo/GpuDemo.h index fda035a6b..bd3b1e433 100644 --- a/demo/gpudemo/GpuDemo.h +++ b/demo/gpudemo/GpuDemo.h @@ -39,7 +39,7 @@ public: preferredOpenCLPlatformIndex(-1), preferredOpenCLDeviceIndex(-1), arraySizeX(1), - arraySizeY(5), + arraySizeY(2), arraySizeZ(1), m_useConcaveMesh(false), gapX(14.3), diff --git a/demo/gpudemo/rigidbody/GpuSphereScene.cpp b/demo/gpudemo/rigidbody/GpuSphereScene.cpp index 9d1edcc35..5db426007 100644 --- a/demo/gpudemo/rigidbody/GpuSphereScene.cpp +++ b/demo/gpudemo/rigidbody/GpuSphereScene.cpp @@ -123,7 +123,7 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) mass=0.f; //btVector3 position((j&1)+i*2.2,2+j*2.,(j&1)+k*2.2); - btVector3 position(i*2.2,2+j*2.,k*2.2); + btVector3 position(i*2.2,2+j*4.,k*2.2); btQuaternion orn(0,0,0,1); diff --git a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp b/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp index e877dd235..eef0b1706 100644 --- a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp +++ b/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp @@ -38,7 +38,7 @@ enum bool gpuBatchContacts = true;//true; -bool gpuSolveConstraint = false;//true;//true; +bool gpuSolveConstraint = true;//true; struct btGpuBatchingPgsSolverInternalData diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp index b222a635f..0eed19a9e 100644 --- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp +++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp @@ -921,4 +921,438 @@ void btGpuJacobiSolver::solveGroup(btOpenCLArray* bodies,btOpenC +} + + +void btGpuJacobiSolver::solveGroupMixed(btOpenCLArray* bodiesGPU,btOpenCLArray* inertiasGPU,btOpenCLArray* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo) +{ + + btAlignedObjectArray bodiesCPU; + bodiesGPU->copyToHost(bodiesCPU); + btAlignedObjectArray inertiasCPU; + inertiasGPU->copyToHost(inertiasCPU); + btAlignedObjectArray manifoldPtrCPU; + manifoldPtrGPU->copyToHost(manifoldPtrCPU); + + int numBodiesCPU = bodiesGPU->size(); + int numManifoldsCPU = manifoldPtrGPU->size(); + BT_PROFILE("btGpuJacobiSolver::solveGroupMixed"); + + btAlignedObjectArray bodyCount; + bodyCount.resize(numBodiesCPU); + for (int i=0;i contactConstraintOffsets; + contactConstraintOffsets.resize(numManifoldsCPU); + + + for (int i=0;i offsetSplitBodies; + offsetSplitBodies.resize(numBodiesCPU); + unsigned int totalNumSplitBodiesCPU; + m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU); + int numlastBody = bodyCount[numBodiesCPU-1]; + totalNumSplitBodiesCPU += numlastBody; + + int numBodies = bodiesGPU->size(); + int numManifolds = manifoldPtrGPU->size(); + + m_data->m_bodyCount->resize(numBodies); + + unsigned int val=0; + btInt2 val2; + val2.x=0; + val2.y=0; + + { + BT_PROFILE("m_filler"); + m_data->m_contactConstraintOffsets->resize(numManifolds); + m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); + + + m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); + } + + { + BT_PROFILE("m_countBodiesKernel"); + btLauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); + launcher.setBuffer(manifoldPtrGPU->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setConst(numManifolds); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.launch1D(numManifolds); + } + + unsigned int totalNumSplitBodies=0; + m_data->m_offsetSplitBodies->resize(numBodies); + m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); + totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); + + if (totalNumSplitBodies != totalNumSplitBodiesCPU) + { + printf("error in totalNumSplitBodies!\n"); + } + + int numContacts = manifoldPtrGPU->size(); + m_data->m_contactConstraints->resize(numContacts); + + + { + BT_PROFILE("contactToConstraintSplitKernel"); + btLauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); + launcher.setBuffer(manifoldPtrGPU->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setConst(numContacts); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.launch1D( numContacts, 64 ); + clFinish(m_queue); + } + + + + btAlignedObjectArray contactConstraints; + contactConstraints.resize(numManifoldsCPU); + + for (int i=0;i deltaLinearVelocities; + btAlignedObjectArray deltaAngularVelocities; + deltaLinearVelocities.resize(totalNumSplitBodiesCPU); + deltaAngularVelocities.resize(totalNumSplitBodiesCPU); + for (int i=0;im_deltaLinearVelocities->resize(totalNumSplitBodies); + m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); + + + + { + BT_PROFILE("m_clearVelocitiesKernel"); + btLauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); + launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launch.setConst(totalNumSplitBodies); + launch.launch1D(totalNumSplitBodies); + } + + + ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + m_data->m_contactConstraints->copyToHost(contactConstraints); + m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); + m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + + for (int iter = 0;iterm_solveContactKernel ); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + clFinish(m_queue); + } + + + int i=0; + for( i=0; im_averageVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + //easy + for (int i=0;im_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); + //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); + m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); + m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); + +#if 0 + + { + BT_PROFILE("m_solveFrictionKernel"); + btLauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); + launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(inertiasGPU->getBufferCL()); + launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(solverInfo.m_deltaTime); + launcher.setConst(solverInfo.m_positionDrift); + launcher.setConst(solverInfo.m_positionConstraintCoeff); + launcher.setConst(solverInfo.m_fixedBodyIndex); + launcher.setConst(numManifolds); + + launcher.launch1D(numManifolds); + clFinish(m_queue); + } + + //solve friction + + for(int i=0; im_averageVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + //easy + for (int i=0;im_updateBodyVelocitiesKernel); + launcher.setBuffer(bodiesGPU->getBufferCL()); + launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); + launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); + launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); + launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); + launcher.setConst(numBodies); + launcher.launch1D(numBodies); + clFinish(m_queue); + } + + + //easy + for (int i=0;icopyFromHost(bodiesCPU); + + } \ No newline at end of file diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h index 636f39c93..0eeda3fba 100644 --- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h +++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h @@ -46,6 +46,7 @@ public: void solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); void solveGroup(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); + void solveGroupMixed(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); }; #endif //BT_GPU_JACOBI_SOLVER_H diff --git a/opencl/gpu_rigidbody/kernels/solveContact.cl b/opencl/gpu_rigidbody/kernels/solveContact.cl index fb9f836e5..4b7cb769b 100644 --- a/opencl/gpu_rigidbody/kernels/solveContact.cl +++ b/opencl/gpu_rigidbody/kernels/solveContact.cl @@ -237,7 +237,7 @@ void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) { - *linear = -n; + *linear = mymake_float4(-n.xyz,0.f); *angular0 = -cross3(r0, n); *angular1 = cross3(r1, n); } diff --git a/opencl/gpu_rigidbody/kernels/solveContact.h b/opencl/gpu_rigidbody/kernels/solveContact.h index 67ce0ca6f..b758f43d8 100644 --- a/opencl/gpu_rigidbody/kernels/solveContact.h +++ b/opencl/gpu_rigidbody/kernels/solveContact.h @@ -239,7 +239,7 @@ static const char* solveContactCL= \ "\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" -" *linear = -n;\n" +" *linear = mymake_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" diff --git a/opencl/gpu_rigidbody/kernels/solveFriction.cl b/opencl/gpu_rigidbody/kernels/solveFriction.cl index 602e9119b..d4276c24b 100644 --- a/opencl/gpu_rigidbody/kernels/solveFriction.cl +++ b/opencl/gpu_rigidbody/kernels/solveFriction.cl @@ -237,7 +237,7 @@ void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4 void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) { - *linear = -n; + *linear = mymake_float4(-n.xyz,0.f); *angular0 = -cross3(r0, n); *angular1 = cross3(r1, n); } diff --git a/opencl/gpu_rigidbody/kernels/solveFriction.h b/opencl/gpu_rigidbody/kernels/solveFriction.h index 4cf6cdfa3..9d6de6ccc 100644 --- a/opencl/gpu_rigidbody/kernels/solveFriction.h +++ b/opencl/gpu_rigidbody/kernels/solveFriction.h @@ -239,7 +239,7 @@ static const char* solveFrictionCL= \ "\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" -" *linear = -n;\n" +" *linear = mymake_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" diff --git a/opencl/gpu_rigidbody/kernels/solverSetup.cl b/opencl/gpu_rigidbody/kernels/solverSetup.cl index 9236b8b7d..814f55646 100644 --- a/opencl/gpu_rigidbody/kernels/solverSetup.cl +++ b/opencl/gpu_rigidbody/kernels/solverSetup.cl @@ -435,7 +435,7 @@ typedef struct void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) { - *linear = -n; + *linear = make_float4(-n.xyz,0.f); *angular0 = -cross3(r0, n); *angular1 = cross3(r1, n); } diff --git a/opencl/gpu_rigidbody/kernels/solverSetup.h b/opencl/gpu_rigidbody/kernels/solverSetup.h index 40839a8c7..83371897b 100644 --- a/opencl/gpu_rigidbody/kernels/solverSetup.h +++ b/opencl/gpu_rigidbody/kernels/solverSetup.h @@ -437,7 +437,7 @@ static const char* solverSetupCL= \ "\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" -" *linear = -n;\n" +" *linear = make_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.cl b/opencl/gpu_rigidbody/kernels/solverUtils.cl index 2722f3eb1..0c82d70ae 100644 --- a/opencl/gpu_rigidbody/kernels/solverUtils.cl +++ b/opencl/gpu_rigidbody/kernels/solverUtils.cl @@ -462,7 +462,7 @@ __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) { - *linear = -n; + *linear = make_float4(-n.xyz,0.f); *angular0 = -cross3(r0, n); *angular1 = cross3(r1, n); } @@ -537,10 +537,12 @@ void solveContact(__global Constraint4* cs, setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); + float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; rambdaDt *= cs->m_jacCoeffInv[ic]; + { float prevSum = cs->m_appliedRambdaDt[ic]; float updated = prevSum; @@ -550,12 +552,14 @@ void solveContact(__global Constraint4* cs, rambdaDt = updated - prevSum; cs->m_appliedRambdaDt[ic] = updated; } - + + float4 linImp0 = invMassA*linear*rambdaDt; float4 linImp1 = invMassB*(-linear)*rambdaDt; float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; + if (invMassA) { *dLinVelA += linImp0; diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.h b/opencl/gpu_rigidbody/kernels/solverUtils.h index a671e2b91..91726f36e 100644 --- a/opencl/gpu_rigidbody/kernels/solverUtils.h +++ b/opencl/gpu_rigidbody/kernels/solverUtils.h @@ -464,7 +464,7 @@ static const char* solverUtilsCL= \ "\n" "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" "{\n" -" *linear = -n;\n" +" *linear = make_float4(-n.xyz,0.f);\n" " *angular0 = -cross3(r0, n);\n" " *angular1 = cross3(r1, n);\n" "}\n" @@ -539,10 +539,12 @@ static const char* solverUtilsCL= \ " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" " \n" "\n" +"\n" " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" " rambdaDt *= cs->m_jacCoeffInv[ic];\n" "\n" +" \n" " {\n" " float prevSum = cs->m_appliedRambdaDt[ic];\n" " float updated = prevSum;\n" @@ -552,12 +554,14 @@ static const char* solverUtilsCL= \ " rambdaDt = updated - prevSum;\n" " cs->m_appliedRambdaDt[ic] = updated;\n" " }\n" -" \n" +"\n" +" \n" " float4 linImp0 = invMassA*linear*rambdaDt;\n" " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" "\n" +" \n" " if (invMassA)\n" " {\n" " *dLinVelA += linImp0;\n"