From 4a0d2847c44cc09927975d831b01bf98e21d0670 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Tue, 9 Jul 2013 13:07:34 -0700 Subject: [PATCH] allow to run each part of the GPU solver on CPU, for debugging purposes --- .../RigidBody/b3GpuPgsJacobiSolver.cpp | 174 +++++++++++------- 1 file changed, 112 insertions(+), 62 deletions(-) diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp index c9b024428..ec6bc83a7 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -14,6 +14,12 @@ subject to the following restrictions: */ //Originally written by Erwin Coumans +bool useGpuInitSolverBodies = true; +bool useGpuInfo1 = true; +bool useGpuInfo2= true; +bool useGpuSolveJointConstraintRows=true; +bool useGpuWriteBackVelocities = true; + #include "b3GpuPgsJacobiSolver.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" @@ -55,6 +61,19 @@ struct b3GpuPgsJacobiSolverInternalData b3OpenCLArray* m_gpuConstraintRows; b3OpenCLArray* m_gpuConstraintInfo1; +// b3AlignedObjectArray m_cpuSolverBodies; + b3AlignedObjectArray m_cpuBatchConstraints; + b3AlignedObjectArray m_cpuConstraintRows; + b3AlignedObjectArray m_cpuConstraintInfo1; + + b3AlignedObjectArray m_cpuBodies; + b3AlignedObjectArray m_cpuInertias; + + + b3AlignedObjectArray m_cpuConstraints; + + + }; @@ -183,8 +202,8 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuBodies->resize(numBodies); + + /* m_gpuData->m_gpuBodies->resize(numBodies); m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); b3OpenCLArray gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); @@ -197,8 +216,8 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue); -// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); + // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } else { - /* + gpuBodies->copyToHost(m_gpuData->m_cpuBodies); for (int i=0;im_cpuBodies[i]; b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; initSolverBody(i,&solverBody,&body); solverBody.m_originalBodyIndex = i; } - */ + m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); } } - + int totalBodies = 0; int totalNumRows = 0; //b3RigidBody* rb0=0,*rb1=0; @@ -232,25 +251,24 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArray gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); + // b3OpenCLArray gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); - bool useGpu = true; - if (useGpu) + if (useGpuInfo1) { B3_PROFILE("info1 and init batchConstraint"); - + if (1) { m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); -// gpuConstraints.resize(numConstraints); - // gpuConstraints.copyFromHostPointer(gpuConstraints,numConstraints); - // m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); + // gpuConstraints.resize(numConstraints); + // gpuConstraints.copyFromHostPointer(gpuConstraints,numConstraints); + // m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); } if (1) @@ -291,41 +309,43 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuBatchConstraints->copyToHost(batchConstraints); - + } } else { -#if 0 totalNumRows = 0; + gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); //calculate the total number of contraint rows - for (i=0;im_cpuConstraints[i].isEnabled()) { - constraints[i].getInfo1(&info1,bodies); + m_gpuData->m_cpuConstraints[i].getInfo1(&info1,&m_gpuData->m_cpuBodies[0]); } else { info1 = 0; } /*b3Assert(info1Prev==info1); - + b3Assert(batchConstraints[i].m_numConstraintRows==info1); b3Assert(batchConstraints[i].m_constraintRowOffset==totalNumRows); */ batchConstraints[i].m_numConstraintRows = info1; batchConstraints[i].m_constraintRowOffset = totalNumRows; totalNumRows += info1; -#endif } + m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); + m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); + } m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); m_gpuData->m_gpuConstraintRows->resize(totalNumRows); - bool useGpuInfo2= true; -// b3ConstraintArray verify; + + // b3ConstraintArray verify; if (useGpuInfo2) { @@ -359,22 +379,24 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraycopyToHost(m_gpuData->m_cpuInertias); + + ///setup the b3SolverConstraints + + for (int i=0;im_cpuConstraints[i]; - b3RigidBodyCL& rbA = bodies[ constraint.getRigidBodyA()]; + b3RigidBodyCL& rbA = m_gpuData->m_cpuBodies[ constraint.getRigidBodyA()]; //b3RigidBody& rbA = constraint.getRigidBodyA(); // b3RigidBody& rbB = constraint.getRigidBodyB(); - b3RigidBodyCL& rbB = bodies[ constraint.getRigidBodyB()]; + b3RigidBodyCL& rbB = m_gpuData->m_cpuBodies[ constraint.getRigidBodyB()]; @@ -471,26 +493,26 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_lowerLimit; info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; info2.m_numIterations = infoGlobal.m_numIterations; - constraints[i].getInfo2(&info2,bodies); + m_gpuData->m_cpuConstraints[i].getInfo2(&info2,&m_gpuData->m_cpuBodies[0]); ///finalize the constraint setup for ( j=0;j=constraints[i].getBreakingImpulseThreshold()) + if (solverConstraint.m_upperLimit>=m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { - solverConstraint.m_upperLimit = constraints[i].getBreakingImpulseThreshold(); + solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } - if (solverConstraint.m_lowerLimit<=-constraints[i].getBreakingImpulseThreshold()) + if (solverConstraint.m_lowerLimit<=-m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { - solverConstraint.m_lowerLimit = -constraints[i].getBreakingImpulseThreshold(); + solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } // solverConstraint.m_originalContactPoint = constraint; - b3Matrix3x3& invInertiaWorldA= inertias[constraint.getRigidBodyA()].m_invInertiaWorld; + b3Matrix3x3& invInertiaWorldA= m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; { //b3Vector3 angularFactorA(1,1,1); @@ -498,7 +520,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; { const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; @@ -545,8 +567,23 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); + m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); + + if (batches.size()==0) + m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); + else + m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); + + m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); + + + + }//end useGpuInfo2 + } @@ -670,13 +707,19 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_gpuBatchConstraints->copyFromHost(batchConstraints); } int maxIterations = infoGlobal.m_numIterations; + bool useBatching = true; - bool useGpu=true; if (useBatching ) { - + if (!useGpuSolveJointConstraintRows) + { + B3_PROFILE("copy to host"); + m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); + m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); + m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); + } for ( int iteration = 0 ; iteration< maxIterations ; iteration++) { @@ -689,7 +732,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_queue,m_gpuData->m_solveJointConstraintRowsKernels); @@ -703,12 +746,9 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_queue); - } else + } else//useGpu { - B3_PROFILE("copy from host"); - // m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - // m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - // m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); + for (int b=0;bm_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); + m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); + m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); + } + //B3_PROFILE("copy to host"); //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } @@ -961,8 +1008,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraym_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); + gpuBodies->copyToHost(m_gpuData->m_cpuBodies); + for ( int i=0;im_cpuBodies[bodyIndex]; if (body->getInvMass()) { if (infoGlobal.m_splitImpulse) @@ -1013,8 +1061,10 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraycopyFromHost(m_gpuData->m_cpuBodies); + } }