From ab125fbb6d559fc821f5e483606b76758488cdb6 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 17 Jul 2013 16:11:54 -0700 Subject: [PATCH] implement GPU breakable constraints add GPU fixed constraint fix performance issue with concave meshes (didn't clear the number of concave-convex pairs, so it increased every frame) --- .../GpuDemos/constraints/ConstraintsDemo.cpp | 14 +- Demos3/GpuDemos/main_opengl3core.cpp | 4 +- Demos3/GpuDemos/rigidbody/ConcaveScene.cpp | 2 +- .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 7 +- .../b3ConvexHullContact.cpp | 4 + .../RigidBody/b3GpuPgsJacobiSolver.cpp | 205 +++++++++++----- .../RigidBody/b3GpuPgsJacobiSolver.h | 4 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 14 +- .../RigidBody/b3GpuRigidBodyPipeline.h | 5 +- .../RigidBody/kernels/jointSolver.cl | 229 ++++++++++++------ .../RigidBody/kernels/jointSolver.h | 229 ++++++++++++------ 11 files changed, 477 insertions(+), 240 deletions(-) diff --git a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp index 1731ba8f6..0cac2dcd5 100644 --- a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp +++ b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp @@ -144,7 +144,7 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const int constraintType=0; for (int i=0;isetBreakingImpulseThreshold(14); + float breakingThreshold=14; +// c->setBreakingImpulseThreshold(breakingThreshold); b3Vector3 pivotInA(-1.1,0,0); b3Vector3 pivotInB (1.1,0,0); - int cid = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(pid,prevBody,pivotInA,pivotInB); + int cid = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(pid,prevBody,pivotInA,pivotInB,breakingThreshold); break; } case 1: @@ -209,8 +212,9 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const b3Quaternion relTargetAB = frameInA.getRotation()*frameInB.getRotation().inverse(); //c = new b3FixedConstraint(pid,prevBody,frameInA,frameInB); + float breakingThreshold = 15;//37.f; //c->setBreakingImpulseThreshold(37.1); - int cid = m_data->m_rigidBodyPipeline->createFixedConstraint(pid,prevBody,pivotInA,pivotInB,relTargetAB); + int cid = m_data->m_rigidBodyPipeline->createFixedConstraint(pid,prevBody,pivotInA,pivotInB,relTargetAB,breakingThreshold); diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index c25f9a89e..a4ee5b9b4 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -62,6 +62,7 @@ static void MyResizeCallback( float width, float height) b3gWindowInterface* window=0; GwenUserInterface* gui = 0; bool gPause = false; +bool gStep = false; bool gReset = false; enum @@ -925,7 +926,8 @@ int main(int argc, char* argv[]) } - + if (gStep) + gPause=true; } while (!window->requestedExit() && !gReset); diff --git a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp index 6d4b1957e..a2a7c2687 100644 --- a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp +++ b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp @@ -304,7 +304,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) //float camPos[4]={1,12.5,1.5,0}; m_instancingRenderer->setCameraPitch(45); m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(25); + m_instancingRenderer->setCameraDistance(155); } diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index 3af00f2af..95a669cf2 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -209,6 +209,7 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() { B3_PROFILE("stepSimulation"); m_data->m_rigidBodyPipeline->stepSimulation(1./60.f); + } if (numObjects) @@ -319,7 +320,8 @@ bool GpuRigidBodyDemo::mouseMoveCallback(float x,float y) dir *= m_data->m_pickDistance; newPivotB = rayFrom + dir; m_data->m_pickPivotInB = newPivotB; - m_data->m_pickConstraint = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(m_data->m_pickBody,m_data->m_pickFixedBody,m_data->m_pickPivotInA,m_data->m_pickPivotInB); + m_data->m_rigidBodyPipeline->copyConstraintsToHost(); + m_data->m_pickConstraint = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(m_data->m_pickBody,m_data->m_pickFixedBody,m_data->m_pickPivotInA,m_data->m_pickPivotInB,1e30); m_data->m_rigidBodyPipeline->writeAllInstancesToGpu(); return true; } @@ -398,7 +400,8 @@ bool GpuRigidBodyDemo::mouseButtonCallback(int button, int state, float x, float pivotInB.w = 0.f; m_data->m_pickPivotInA = pivotInA; m_data->m_pickPivotInB = pivotInB; - m_data->m_pickConstraint = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(hitBodyA,m_data->m_pickFixedBody,pivotInA,pivotInB);//hitResults[0].m_hitResult0 + m_data->m_rigidBodyPipeline->copyConstraintsToHost(); + m_data->m_pickConstraint = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(hitBodyA,m_data->m_pickFixedBody,pivotInA,pivotInB,1e30);//hitResults[0].m_hitResult0 m_data->m_rigidBodyPipeline->writeAllInstancesToGpu(); m_data->m_np->writeAllBodiesToGpu(); m_data->m_pickDistance = (pivotInB-camPos).length(); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index c33ff53d6..a9a128462 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -1881,6 +1881,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraysize() && treeNodesGPU->size()) { B3_PROFILE("m_bvhTraversalKernel"); + + numConcavePairs = m_numConcavePairsOut.at(0); + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel); launcher.setBuffer( pairs->getBufferCL()); launcher.setBuffer( bodyBuf->getBufferCL()); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp index 8749752fb..040c89130 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -19,6 +19,7 @@ bool useGpuInfo1 = true; bool useGpuInfo2= true; bool useGpuSolveJointConstraintRows=true; bool useGpuWriteBackVelocities = true; +bool gpuBreakConstraints = true; #include "b3GpuPgsJacobiSolver.h" @@ -55,8 +56,9 @@ struct b3GpuPgsJacobiSolverInternalData cl_kernel m_initBatchConstraintsKernel; cl_kernel m_getInfo2Kernel; cl_kernel m_writeBackVelocitiesKernel; + cl_kernel m_breakViolatedConstraintsKernel; - b3OpenCLArray* m_dst; + b3OpenCLArray* m_gpuConstraintRowOffsets; b3OpenCLArray* m_gpuSolverBodies; b3OpenCLArray* m_gpuBatchConstraints; @@ -67,6 +69,7 @@ struct b3GpuPgsJacobiSolverInternalData b3AlignedObjectArray m_cpuBatchConstraints; b3AlignedObjectArray m_cpuConstraintRows; b3AlignedObjectArray m_cpuConstraintInfo1; + b3AlignedObjectArray m_cpuConstraintRowOffsets; b3AlignedObjectArray m_cpuBodies; b3AlignedObjectArray m_cpuInertias; @@ -125,7 +128,7 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, m_gpuData->m_prefixScan = new b3PrefixScanCL(ctx,device,queue); - m_gpuData->m_dst = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); + m_gpuData->m_gpuConstraintRowOffsets = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); m_gpuData->m_gpuSolverBodies = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); m_gpuData->m_gpuBatchConstraints = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); @@ -134,7 +137,8 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_int errNum=0; { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH,true); + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH); + //cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,0,&errNum,"",B3_JOINT_SOLVER_PATH,true); b3Assert(errNum==CL_SUCCESS); m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device,solveConstraintRowsCL, "solveJointConstraintRows",&errNum,prog); b3Assert(errNum==CL_SUCCESS); @@ -148,6 +152,8 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, b3Assert(errNum==CL_SUCCESS); m_gpuData->m_writeBackVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"writeBackVelocitiesKernel",&errNum,prog); b3Assert(errNum==CL_SUCCESS); + m_gpuData->m_breakViolatedConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"breakViolatedConstraintsKernel",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); @@ -166,9 +172,10 @@ b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () clReleaseKernel(m_gpuData->m_initBatchConstraintsKernel); clReleaseKernel(m_gpuData->m_getInfo2Kernel); clReleaseKernel(m_gpuData->m_writeBackVelocitiesKernel); + clReleaseKernel(m_gpuData->m_breakViolatedConstraintsKernel); delete m_gpuData->m_prefixScan; - delete m_gpuData->m_dst; + delete m_gpuData->m_gpuConstraintRowOffsets; delete m_gpuData->m_gpuSolverBodies; delete m_gpuData->m_gpuBatchConstraints; delete m_gpuData->m_gpuConstraintRows; @@ -181,14 +188,8 @@ struct b3BatchConstraint { int m_bodyAPtrAndSignBit; int m_bodyBPtrAndSignBit; - int m_constraintRowOffset; - short int m_numConstraintRows; - short int m_batchId; - - short int& getBatchIdx() - { - return m_batchId; - } + int m_originalConstraintIndex; + int m_batchId; }; static b3AlignedObjectArray batchConstraints; @@ -234,7 +235,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraygetBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); - //clFinish(m_gpuData->m_queue); + clFinish(m_gpuData->m_queue); // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } else @@ -270,15 +271,10 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuConstraintInfo1->resize(numConstraints); - if (1) - { - m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); - // gpuConstraints.resize(numConstraints); - // gpuConstraints.copyFromHostPointer(gpuConstraints,numConstraints); - // m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - - } + if (1) { B3_PROFILE("getInfo1Kernel"); @@ -286,37 +282,35 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_getInfo1Kernel); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); + clFinish(m_gpuData->m_queue); } - //clFinish(m_gpuData->m_queue); - if (batches.size()==0) - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - if (1) + if (batches.size()==0) { - //m_gpuData->m_gpuConstraintInfo1->copyToHost(m_tmpConstraintSizesPool); - m_gpuData->m_dst->resize(numConstraints); + B3_PROFILE("initBatchConstraintsKernel"); + + m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); unsigned int total=0; - m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,*m_gpuData->m_dst,numConstraints,&total); + m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,*m_gpuData->m_gpuConstraintRowOffsets,numConstraints,&total); unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints-1); - //b3AlignedObjectArray dstHost; - //dst.copyToHost(dstHost); totalNumRows = total+lastElem; { B3_PROFILE("init batch constraints"); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel); - launcher.setBuffer(m_gpuData->m_dst->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); + launcher.setBuffer(gpuConstraints->getBufferCL()); + launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); - //clFinish(m_gpuData->m_queue); + clFinish(m_gpuData->m_queue); } - if (batches.size()==0) - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - + //assume the batching happens on CPU, so copy the data + m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); } } else @@ -336,15 +330,10 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); @@ -361,6 +350,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_getInfo2Kernel); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); @@ -373,7 +363,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue); + clFinish(m_gpuData->m_queue); if (batches.size()==0) m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); @@ -397,7 +387,10 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_cpuConstraintRowOffsets[constraintIndex]; + + b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; b3RigidBodyCL& rbA = m_gpuData->m_cpuBodies[ constraint.getRigidBodyA()]; @@ -685,13 +678,15 @@ void b3GpuPgsJacobiSolver::averageVelocities() } -b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) +b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal) { //only create the batches once. //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated + B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); + bool createBatches = batches.size()==0; { - B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); + if (createBatches) { @@ -711,7 +706,12 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_gpuBatchConstraints->copyFromHost(batchConstraints); + /*b3AlignedObjectArray cpuCheckBatches; + m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); + b3Assert(cpuCheckBatches.size()==batchConstraints.size()); + printf(".\n"); + */ + //>copyFromHost(batchConstraints); } int maxIterations = infoGlobal.m_numIterations; @@ -726,6 +726,10 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); + m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); + m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); + gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); + } for ( int iteration = 0 ; iteration< maxIterations ; iteration++) @@ -742,16 +746,30 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_queue,m_gpuData->m_solveJointConstraintRowsKernels); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); + launcher.setBuffer(gpuConstraints1->getBufferCL());//to detect disabled constraints launcher.setConst(batchOffset); - launcher.setConst(constraintOffset); launcher.setConst(numConstraintsInBatch); launcher.launch1D(numConstraintsInBatch); - //clFinish(m_gpuData->m_queue); + } else//useGpu { @@ -766,17 +784,19 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3OpenCLArraym_cpuConstraints[c.m_originalConstraintIndex]; + if (constraint->m_flags&B3_CONSTRAINT_FLAG_ENABLED) { -// - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[c.m_constraintRowOffset+jj]; - //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA],&m_tmpSolverBodyPool[constraint.m_solverBodyIdB],&constraint); - + int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; + int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; + + for (int jj=0;jjm_queue); return 0.f; } @@ -866,7 +887,7 @@ inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, #if defined(_DEBUG) for(int i=0; i* gpuBodie solveGroupCacheFriendlySetup( gpuBodies, gpuInertias,numBodies,gpuConstraints, numConstraints,infoGlobal); - solveGroupCacheFriendlyIterations(m_gpuData->m_gpuConstraintRows, numConstraints,infoGlobal); + solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints,infoGlobal); - solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias,numBodies, infoGlobal); + solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias,numBodies, gpuConstraints, numConstraints, infoGlobal); return 0.f; } @@ -1007,13 +1028,62 @@ void b3GpuPgsJacobiSolver::solveJoints(int numBodies, b3OpenCLArray testBodies; -b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,const b3ContactSolverInfo& infoGlobal) +b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("solveGroupCacheFriendlyFinish"); int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); // int i,j; + { + if (gpuBreakConstraints) + { + B3_PROFILE("breakViolatedConstraintsKernel"); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel); + launcher.setBuffer(gpuConstraints->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); + launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); + launcher.setConst(numConstraints); + launcher.launch1D(numConstraints); + } else + { + gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); + m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints); + m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows); + gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); + m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); + m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); + + for (int cid=0;cidm_cpuConstraintRowOffsets[originalConstraintIndex]; + int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex]; + if (numRows) + { + + // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); + for (int i=0;im_cpuConstraintRows[rowIndex].m_originalContactPoint; + float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold; + // printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse); + if (b3Fabs((m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold)) + { + + m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; + } + } + } + } + + + gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints); + } + } + { if (useGpuWriteBackVelocities) { @@ -1024,7 +1094,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraym_gpuSolverBodies->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); - //clFinish(m_gpuData->m_queue); + clFinish(m_gpuData->m_queue); // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); // m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies); //m_gpuData->m_gpuBodies->copyToHost(testBodies); @@ -1075,6 +1145,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraym_queue); m_tmpSolverContactConstraintPool.resizeNoInitialize(0); m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h index dc62fd081..ca73a48eb 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h @@ -62,9 +62,9 @@ public: b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs); virtual~b3GpuPgsJacobiSolver (); - virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); + virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal); virtual b3Scalar solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - b3Scalar solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,const b3ContactSolverInfo& infoGlobal); + b3Scalar solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias,int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); b3Scalar solveGroup(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias, int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 09cb7b21c..5af4cfb39 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -170,7 +170,7 @@ void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid) } } -int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB) +int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold) { m_data->m_gpuSolver->recomputeBatches(); b3GpuGenericConstraint c; @@ -181,12 +181,12 @@ int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, co c.m_rbB = bodyB; c.m_pivotInA.setValue(pivotInA[0],pivotInA[1],pivotInA[2]); c.m_pivotInB.setValue(pivotInB[0],pivotInB[1],pivotInB[2]); - c.m_breakingImpulseThreshold = 1e30f; + c.m_breakingImpulseThreshold = breakingThreshold; c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE; m_data->m_cpuConstraints.push_back(c); return c.m_uid; } -int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB) +int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB,float breakingThreshold) { m_data->m_gpuSolver->recomputeBatches(); b3GpuGenericConstraint c; @@ -198,8 +198,9 @@ int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const fl c.m_pivotInA.setValue(pivotInA[0],pivotInA[1],pivotInA[2]); c.m_pivotInB.setValue(pivotInB[0],pivotInB[1],pivotInB[2]); c.m_relTargetAB.setValue(relTargetAB[0],relTargetAB[1],relTargetAB[2],relTargetAB[3]); - c.m_breakingImpulseThreshold = 1e30f; + c.m_breakingImpulseThreshold = breakingThreshold; c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE; + m_data->m_cpuConstraints.push_back(c); return c.m_uid; } @@ -476,6 +477,11 @@ void b3GpuRigidBodyPipeline::setGravity(const float* grav) m_data->m_gravity.setValue(grav[0],grav[1],grav[2]); } +void b3GpuRigidBodyPipeline::copyConstraintsToHost() +{ + m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); +} + void b3GpuRigidBodyPipeline::writeAllInstancesToGpu() { m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h index 1dce3b15e..e9d9cca97 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h @@ -52,11 +52,12 @@ public: int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData, bool writeInstanceToGpu); //if you passed "writeInstanceToGpu" false in the registerPhysicsInstance method (for performance) you need to call writeAllInstancesToGpu after all instances are registered void writeAllInstancesToGpu(); + void copyConstraintsToHost(); void setGravity(const float* grav); void reset(); - int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB); - int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB); + int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold); + int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold); void removeConstraintByUid(int uid); void addConstraint(class b3TypedConstraint* constraint); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl b/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl index b88180022..1e32c1052 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl @@ -13,11 +13,12 @@ subject to the following restrictions: */ //Originally written by Erwin Coumans +#define B3_CONSTRAINT_FLAG_ENABLED 1 #define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3 #define B3_GPU_FIXED_CONSTRAINT_TYPE 4 - +#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails #define B3_INFINITY 1e30f #define mymake_float4 (float4) @@ -148,12 +149,8 @@ typedef struct float m_lowerLimit; float m_upperLimit; float m_rhsPenetration; + int m_originalConstraint; - union - { - void* m_originalContactPoint; - float m_unusedPadding4; - }; int m_overrideNumSolverIterations; int m_frictionIndex; @@ -162,20 +159,19 @@ typedef struct } b3SolverConstraint; -typedef struct +typedef struct { int m_bodyAPtrAndSignBit; int m_bodyBPtrAndSignBit; - int m_constraintRowOffset; - short int m_numConstraintRows; - short int m_batchId; - + int m_originalConstraintIndex; + int m_batchId; } b3BatchConstraint; + typedef struct { int m_constraintType; @@ -304,12 +300,13 @@ void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global } -__kernel -void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, +__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, __global b3BatchConstraint* batchConstraints, __global b3SolverConstraint* rows, + __global unsigned int* numConstraintRowsInfo1, + __global unsigned int* rowOffsets, + __global b3GpuGenericConstraint* constraints, int batchOffset, - int constraintOffset, int numConstraintsInBatch ) { @@ -318,10 +315,16 @@ void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, return; __global b3BatchConstraint* c = &batchConstraints[b+batchOffset]; - for (int jj=0;jjm_numConstraintRows;jj++) + int originalConstraintIndex = c->m_originalConstraintIndex; + if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED) { - __global b3SolverConstraint* constraint = &rows[c->m_constraintRowOffset+jj]; - resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint); + int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex]; + int rowOffset = rowOffsets[originalConstraintIndex]; + for (int jj=0;jjm_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint); + } } }; @@ -346,7 +349,31 @@ __kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b solverBody->m_angularVelocity = bodyCL->m_angVel; } -__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, __global b3BatchConstraint* batchConstraints, int numConstraints) +__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints) +{ + int cid = get_global_id(0); + if (cid>=numConstraints) + return; + int numRows = numConstraintRows[cid]; + if (numRows) + { + // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); + for (int i=0;i= breakingThreshold) + { + constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; + } + } + } +} + + + +__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints) { int i = get_global_id(0); if (i>=numConstraints) @@ -359,13 +386,11 @@ __kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGeneric case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: { infos[i] = 3; - batchConstraints[i].m_numConstraintRows = 3; break; } case B3_GPU_FIXED_CONSTRAINT_TYPE: { infos[i] = 6; - batchConstraints[i].m_numConstraintRows = 6; break; } default: @@ -374,13 +399,24 @@ __kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGeneric } } -__kernel void initBatchConstraintsKernel(__global unsigned int* rowOffsets, __global b3BatchConstraint* batchConstraints, int numConstraints) +__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, + __global b3BatchConstraint* batchConstraints, + __global b3GpuGenericConstraint* constraints, + __global b3RigidBodyCL* bodies, + int numConstraints) { int i = get_global_id(0); if (i>=numConstraints) return; - batchConstraints[i].m_constraintRowOffset = rowOffsets[i]; + int rbA = constraints[i].m_rbA; + int rbB = constraints[i].m_rbB; + + batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass? rbA : -rbA; + batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass? rbB : -rbB; + batchConstraints[i].m_batchId = -1; + batchConstraints[i].m_originalConstraintIndex = i; + } @@ -509,23 +545,51 @@ void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstr } } -/* -@todo: convert this code to OpenCL -void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0,const b3Quaternion& orn1a,b3Vector3& axis,b3Scalar& angle) +Quaternion nearest( Quaternion first, Quaternion qd) { - Quaternion orn1 = orn0.nearest(orn1a); - Quaternion dorn = orn1 * orn0.inverse(); - angle = dorn.getAngle(); - axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ()); - axis[3] = b3Scalar(0.); - //check for axis length - b3Scalar len = axis.length2(); - if (len < B3_EPSILON*B3_EPSILON) - axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.)); - else - axis /= b3Sqrt(len); + Quaternion diff,sum; + diff = first- qd; + sum = first + qd; + + if( dot(diff,diff) < dot(sum,sum) ) + return qd; + return (-qd); } -*/ + +float b3Acos(float x) +{ + if (x<-1) + x=-1; + if (x>1) + x=1; + return acos(x); +} + +float getAngle(Quaternion orn) +{ + if (orn.w>=1.f) + orn.w=1.f; + float s = 2.f * b3Acos(orn.w); + return s; +} + +void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle) +{ + Quaternion orn1 = nearest(orn0,orn1a); + + Quaternion dorn = qtMul(orn1,qtInvert(orn0)); + *angle = getAngle(dorn); + *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f); + + //check for axis length + float len = dot3F4(*axis,*axis); + if (len < FLT_EPSILON*FLT_EPSILON) + *axis = (float4)(1,0,0,0); + else + *axis /= sqrt(len); +} + + void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row) { @@ -545,21 +609,23 @@ void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuC info->m_J2angularAxis[start_index + s+1] = -1; info->m_J2angularAxis[start_index + s*2+2] = -1; } - /* - @todo + float currERP = info->erp; float k = info->fps * currERP; float4 diff; float angle; - float4 qrelCur = worldOrnA *qtInvert(worldOrnB); - - calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,diff,angle); + float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB)); + + calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle); diff*=-angle; - for (j=0; j<3; j++) + + float* resultPtr = &diff; + + for (int j=0; j<3; j++) { - info->m_constraintError[(start_row+j)*info->rowskip] = k * diff[j]; + info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j]; } - */ + } @@ -572,16 +638,21 @@ __kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global if (bodies[i].m_invMass) { -// solverBodies[i].m_linearVelocity += solverBodies[i].m_deltaLinearVelocity; -// solverBodies[i].m_angularVelocity += solverBodies[i].m_deltaAngularVelocity; - bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity; - bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity; +// if (length(solverBodies[i].m_deltaLinearVelocity)=numConstraints) return; + //for now, always initialize the batch info int info1 = infos[i]; - - if (info1) + + __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]]; + __global b3GpuGenericConstraint* constraint = &constraints[i]; + + __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA]; + __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB]; + + int solverBodyIdA = constraint->m_rbA; + int solverBodyIdB = constraint->m_rbB; + + __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA]; + __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB]; + + + if (rbA->m_invMass) + { + batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; + } else { - __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[batchConstraints[i].m_constraintRowOffset]; - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA]; - __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB]; - - int solverBodyIdA = constraint->m_rbA; - int solverBodyIdB = constraint->m_rbB; - - __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA]; - __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB]; - - if (rbA->m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } else - { // if (!solverBodyIdA) // m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } + batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; + } - if (rbB->m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } else - { + if (rbB->m_invMass) + { + batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; + } else + { // if (!solverBodyIdB) // m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } + batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; + } + if (info1) + { int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; // if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) // m_maxOverrideNumSolverIterations = overrideNumSolverIterations; @@ -656,7 +729,7 @@ __kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, currentConstraintRow[j].m_lowerLimit = 0.f; currentConstraintRow[j].m_upperLimit = 0.f; - currentConstraintRow[j].m_originalContactPoint = 0; + currentConstraintRow[j].m_originalConstraint = i; currentConstraintRow[j].m_overrideNumSolverIterations = 0; currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0); currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h b/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h index a5dc1525e..83d0fe938 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h @@ -15,11 +15,12 @@ static const char* solveConstraintRowsCL= \ "*/\n" "//Originally written by Erwin Coumans\n" "\n" +"#define B3_CONSTRAINT_FLAG_ENABLED 1\n" "\n" "#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" "#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" "\n" -"\n" +"#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" "#define B3_INFINITY 1e30f\n" "\n" "#define mymake_float4 (float4)\n" @@ -150,12 +151,8 @@ static const char* solveConstraintRowsCL= \ " float m_lowerLimit;\n" " float m_upperLimit;\n" " float m_rhsPenetration;\n" +" int m_originalConstraint;\n" "\n" -" union\n" -" {\n" -" void* m_originalContactPoint;\n" -" float m_unusedPadding4;\n" -" };\n" "\n" " int m_overrideNumSolverIterations;\n" " int m_frictionIndex;\n" @@ -164,20 +161,19 @@ static const char* solveConstraintRowsCL= \ "\n" "} b3SolverConstraint;\n" "\n" -"typedef struct\n" +"typedef struct \n" "{\n" " int m_bodyAPtrAndSignBit;\n" " int m_bodyBPtrAndSignBit;\n" -" int m_constraintRowOffset;\n" -" short int m_numConstraintRows;\n" -" short int m_batchId;\n" -"\n" +" int m_originalConstraintIndex;\n" +" int m_batchId;\n" "} b3BatchConstraint;\n" "\n" "\n" "\n" "\n" "\n" +"\n" "typedef struct \n" "{\n" " int m_constraintType;\n" @@ -306,12 +302,13 @@ static const char* solveConstraintRowsCL= \ "\n" "}\n" "\n" -"__kernel\n" -"void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" +"__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" " __global b3BatchConstraint* batchConstraints,\n" " __global b3SolverConstraint* rows,\n" +" __global unsigned int* numConstraintRowsInfo1, \n" +" __global unsigned int* rowOffsets,\n" +" __global b3GpuGenericConstraint* constraints,\n" " int batchOffset,\n" -" int constraintOffset,\n" " int numConstraintsInBatch\n" " )\n" "{\n" @@ -320,10 +317,16 @@ static const char* solveConstraintRowsCL= \ " return;\n" "\n" " __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" -" for (int jj=0;jjm_numConstraintRows;jj++)\n" +" int originalConstraintIndex = c->m_originalConstraintIndex;\n" +" if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" " {\n" -" __global b3SolverConstraint* constraint = &rows[c->m_constraintRowOffset+jj];\n" -" resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" +" int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" +" int rowOffset = rowOffsets[originalConstraintIndex];\n" +" for (int jj=0;jjm_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" +" }\n" " }\n" "};\n" "\n" @@ -348,7 +351,31 @@ static const char* solveConstraintRowsCL= \ " solverBody->m_angularVelocity = bodyCL->m_angVel;\n" "}\n" "\n" -"__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, __global b3BatchConstraint* batchConstraints, int numConstraints)\n" +"__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" +"{\n" +" int cid = get_global_id(0);\n" +" if (cid>=numConstraints)\n" +" return;\n" +" int numRows = numConstraintRows[cid];\n" +" if (numRows)\n" +" {\n" +" // printf(\"cid=%d, breakingThreshold =%f\n\",cid,breakingThreshold);\n" +" for (int i=0;i= breakingThreshold)\n" +" {\n" +" constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" +" }\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"\n" +"__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numConstraints)\n" @@ -361,13 +388,11 @@ static const char* solveConstraintRowsCL= \ " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" " {\n" " infos[i] = 3;\n" -" batchConstraints[i].m_numConstraintRows = 3;\n" " break;\n" " }\n" " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" " {\n" " infos[i] = 6;\n" -" batchConstraints[i].m_numConstraintRows = 6;\n" " break;\n" " }\n" " default:\n" @@ -376,13 +401,24 @@ static const char* solveConstraintRowsCL= \ " }\n" "}\n" "\n" -"__kernel void initBatchConstraintsKernel(__global unsigned int* rowOffsets, __global b3BatchConstraint* batchConstraints, int numConstraints)\n" +"__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" +" __global b3BatchConstraint* batchConstraints, \n" +" __global b3GpuGenericConstraint* constraints,\n" +" __global b3RigidBodyCL* bodies,\n" +" int numConstraints)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numConstraints)\n" " return;\n" "\n" -" batchConstraints[i].m_constraintRowOffset = rowOffsets[i];\n" +" int rbA = constraints[i].m_rbA;\n" +" int rbB = constraints[i].m_rbB;\n" +"\n" +" batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass? rbA : -rbA;\n" +" batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass? rbB : -rbB;\n" +" batchConstraints[i].m_batchId = -1;\n" +" batchConstraints[i].m_originalConstraintIndex = i;\n" +"\n" "}\n" "\n" "\n" @@ -511,23 +547,51 @@ static const char* solveConstraintRowsCL= \ " }\n" "}\n" "\n" -"/*\n" -"@todo: convert this code to OpenCL\n" -"void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0,const b3Quaternion& orn1a,b3Vector3& axis,b3Scalar& angle)\n" +"Quaternion nearest( Quaternion first, Quaternion qd)\n" "{\n" -" Quaternion orn1 = orn0.nearest(orn1a);\n" -" Quaternion dorn = orn1 * orn0.inverse();\n" -" angle = dorn.getAngle();\n" -" axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ());\n" -" axis[3] = b3Scalar(0.);\n" -" //check for axis length\n" -" b3Scalar len = axis.length2();\n" -" if (len < B3_EPSILON*B3_EPSILON)\n" -" axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));\n" -" else\n" -" axis /= b3Sqrt(len);\n" +" Quaternion diff,sum;\n" +" diff = first- qd;\n" +" sum = first + qd;\n" +" \n" +" if( dot(diff,diff) < dot(sum,sum) )\n" +" return qd;\n" +" return (-qd);\n" "}\n" -"*/\n" +"\n" +"float b3Acos(float x) \n" +"{ \n" +" if (x<-1) \n" +" x=-1; \n" +" if (x>1) \n" +" x=1;\n" +" return acos(x); \n" +"}\n" +"\n" +"float getAngle(Quaternion orn)\n" +"{\n" +" if (orn.w>=1.f)\n" +" orn.w=1.f;\n" +" float s = 2.f * b3Acos(orn.w);\n" +" return s;\n" +"}\n" +"\n" +"void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" +"{\n" +" Quaternion orn1 = nearest(orn0,orn1a);\n" +" \n" +" Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" +" *angle = getAngle(dorn);\n" +" *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" +" \n" +" //check for axis length\n" +" float len = dot3F4(*axis,*axis);\n" +" if (len < FLT_EPSILON*FLT_EPSILON)\n" +" *axis = (float4)(1,0,0,0);\n" +" else\n" +" *axis /= sqrt(len);\n" +"}\n" +"\n" +"\n" "\n" "void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" "{\n" @@ -547,21 +611,23 @@ static const char* solveConstraintRowsCL= \ " info->m_J2angularAxis[start_index + s+1] = -1;\n" " info->m_J2angularAxis[start_index + s*2+2] = -1;\n" " }\n" -" /*\n" -" @todo\n" +" \n" " float currERP = info->erp;\n" " float k = info->fps * currERP;\n" " float4 diff;\n" " float angle;\n" -" float4 qrelCur = worldOrnA *qtInvert(worldOrnB);\n" -"\n" -" calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,diff,angle);\n" +" float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" +" \n" +" calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" " diff*=-angle;\n" -" for (j=0; j<3; j++)\n" +" \n" +" float* resultPtr = &diff;\n" +" \n" +" for (int j=0; j<3; j++)\n" " {\n" -" info->m_constraintError[(start_row+j)*info->rowskip] = k * diff[j];\n" +" info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" " }\n" -" */\n" +" \n" "\n" "}\n" "\n" @@ -574,16 +640,21 @@ static const char* solveConstraintRowsCL= \ "\n" " if (bodies[i].m_invMass)\n" " {\n" -"// solverBodies[i].m_linearVelocity += solverBodies[i].m_deltaLinearVelocity;\n" -"// solverBodies[i].m_angularVelocity += solverBodies[i].m_deltaAngularVelocity;\n" -" bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;\n" -" bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;\n" +"// if (length(solverBodies[i].m_deltaLinearVelocity)=numConstraints)\n" " return;\n" " \n" +" //for now, always initialize the batch info\n" " int info1 = infos[i];\n" -" \n" -" if (info1)\n" +" \n" +" __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" +" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" +"\n" +" __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" +" __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" +"\n" +" int solverBodyIdA = constraint->m_rbA;\n" +" int solverBodyIdB = constraint->m_rbB;\n" +"\n" +" __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" +" __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" +"\n" +"\n" +" if (rbA->m_invMass)\n" +" {\n" +" batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" +" } else\n" " {\n" -" __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[batchConstraints[i].m_constraintRowOffset];\n" -" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -"\n" -" __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" -" __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" -"\n" -" int solverBodyIdA = constraint->m_rbA;\n" -" int solverBodyIdB = constraint->m_rbB;\n" -"\n" -" __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" -" __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" -"\n" -" if (rbA->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" -" } else\n" -" {\n" "// if (!solverBodyIdA)\n" "// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" -" }\n" +" batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" +" }\n" "\n" -" if (rbB->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" -" } else\n" -" {\n" +" if (rbB->m_invMass)\n" +" {\n" +" batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" +" } else\n" +" {\n" "// if (!solverBodyIdB)\n" "// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" -" }\n" +" batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" +" }\n" "\n" +" if (info1)\n" +" {\n" " int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" "// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" " // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" @@ -658,7 +731,7 @@ static const char* solveConstraintRowsCL= \ " currentConstraintRow[j].m_lowerLimit = 0.f;\n" " currentConstraintRow[j].m_upperLimit = 0.f;\n" "\n" -" currentConstraintRow[j].m_originalContactPoint = 0;\n" +" currentConstraintRow[j].m_originalConstraint = i;\n" " currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n" " currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n" " currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);\n"