diff --git a/Demos3/GpuDemos/GpuDemo.h b/Demos3/GpuDemos/GpuDemo.h index 0cfe70ef0..a1d053c2f 100644 --- a/Demos3/GpuDemos/GpuDemo.h +++ b/Demos3/GpuDemos/GpuDemo.h @@ -48,9 +48,9 @@ public: arraySizeZ(10), #else - arraySizeX(30), - arraySizeY(30), - arraySizeZ(30), + arraySizeX(20), + arraySizeY(20), + arraySizeZ(20), #endif m_useConcaveMesh(false), gapX(16.3), diff --git a/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp index 5ce6deabd..2488db6da 100644 --- a/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp @@ -18,6 +18,7 @@ void Bullet2FileDemo::setupScene(const ConstructionInfo& ci) b3Assert(ci.m_instancingRenderer); const char* fileName="data/testFile.bullet"; +//const char* fileName="data/testFileFracture.bullet"; FILE* f = 0; diff --git a/Demos3/bullet2/RagdollDemo/RagdollDemo.cpp b/Demos3/bullet2/RagdollDemo/RagdollDemo.cpp index b109cd531..1d545fd48 100644 --- a/Demos3/bullet2/RagdollDemo/RagdollDemo.cpp +++ b/Demos3/bullet2/RagdollDemo/RagdollDemo.cpp @@ -176,22 +176,22 @@ public: transform.setIdentity(); transform.setOrigin(btVector3(btScalar(-0.35), btScalar(1.45), btScalar(0.))); - transform.getBasis().setEulerZYX(0,0,M_PI_2); + transform.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); m_bodies[BODYPART_LEFT_UPPER_ARM] = localCreateRigidBody(btScalar(1.), offset*transform, m_shapes[BODYPART_LEFT_UPPER_ARM]); transform.setIdentity(); transform.setOrigin(btVector3(btScalar(-0.7), btScalar(1.45), btScalar(0.))); - transform.getBasis().setEulerZYX(0,0,M_PI_2); + transform.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); m_bodies[BODYPART_LEFT_LOWER_ARM] = localCreateRigidBody(btScalar(1.), offset*transform, m_shapes[BODYPART_LEFT_LOWER_ARM]); transform.setIdentity(); transform.setOrigin(btVector3(btScalar(0.35), btScalar(1.45), btScalar(0.))); - transform.getBasis().setEulerZYX(0,0,-M_PI_2); + transform.getBasis().setEulerZYX(0,0,-SIMD_HALF_PI); m_bodies[BODYPART_RIGHT_UPPER_ARM] = localCreateRigidBody(btScalar(1.), offset*transform, m_shapes[BODYPART_RIGHT_UPPER_ARM]); transform.setIdentity(); transform.setOrigin(btVector3(btScalar(0.7), btScalar(1.45), btScalar(0.))); - transform.getBasis().setEulerZYX(0,0,-M_PI_2); + transform.getBasis().setEulerZYX(0,0,-SIMD_HALF_PI); m_bodies[BODYPART_RIGHT_LOWER_ARM] = localCreateRigidBody(btScalar(1.), offset*transform, m_shapes[BODYPART_RIGHT_LOWER_ARM]); // Setup some damping on the m_bodies @@ -209,10 +209,10 @@ public: btTransform localA, localB; localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,M_PI_2,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.15), btScalar(0.))); - localB.getBasis().setEulerZYX(0,M_PI_2,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.15), btScalar(0.))); + localA.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.15), btScalar(0.))); + localB.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.15), btScalar(0.))); hingeC = new btHingeConstraint(*m_bodies[BODYPART_PELVIS], *m_bodies[BODYPART_SPINE], localA, localB); - hingeC->setLimit(btScalar(-M_PI_4), btScalar(M_PI_2)); + hingeC->setLimit(btScalar(-0.5f*SIMD_HALF_PI), btScalar(SIMD_HALF_PI)); m_joints[JOINT_PELVIS_SPINE] = hingeC; hingeC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); @@ -220,10 +220,10 @@ public: localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,0,M_PI_2); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.30), btScalar(0.))); - localB.getBasis().setEulerZYX(0,0,M_PI_2); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); + localA.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.30), btScalar(0.))); + localB.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); coneC = new btConeTwistConstraint(*m_bodies[BODYPART_SPINE], *m_bodies[BODYPART_HEAD], localA, localB); - coneC->setLimit(M_PI_4, M_PI_4, M_PI_2); + coneC->setLimit(0.5f*SIMD_HALF_PI, 0.5f*SIMD_HALF_PI, SIMD_HALF_PI); m_joints[JOINT_SPINE_HEAD] = coneC; coneC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); @@ -231,20 +231,20 @@ public: localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,0,-M_PI_4*5); localA.setOrigin(btVector3(btScalar(-0.18), btScalar(-0.10), btScalar(0.))); - localB.getBasis().setEulerZYX(0,0,-M_PI_4*5); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.225), btScalar(0.))); + localA.getBasis().setEulerZYX(0,0,-0.5f*SIMD_HALF_PI*5); localA.setOrigin(btVector3(btScalar(-0.18), btScalar(-0.10), btScalar(0.))); + localB.getBasis().setEulerZYX(0,0,-0.5f*SIMD_HALF_PI*5); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.225), btScalar(0.))); coneC = new btConeTwistConstraint(*m_bodies[BODYPART_PELVIS], *m_bodies[BODYPART_LEFT_UPPER_LEG], localA, localB); - coneC->setLimit(M_PI_4, M_PI_4, 0); + coneC->setLimit(0.5f*SIMD_HALF_PI, 0.5f*SIMD_HALF_PI, 0); m_joints[JOINT_LEFT_HIP] = coneC; coneC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); m_ownerWorld->addConstraint(m_joints[JOINT_LEFT_HIP], true); localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,M_PI_2,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(-0.225), btScalar(0.))); - localB.getBasis().setEulerZYX(0,M_PI_2,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.185), btScalar(0.))); + localA.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(-0.225), btScalar(0.))); + localB.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.185), btScalar(0.))); hingeC = new btHingeConstraint(*m_bodies[BODYPART_LEFT_UPPER_LEG], *m_bodies[BODYPART_LEFT_LOWER_LEG], localA, localB); - hingeC->setLimit(btScalar(0), btScalar(M_PI_2)); + hingeC->setLimit(btScalar(0), btScalar(SIMD_HALF_PI)); m_joints[JOINT_LEFT_KNEE] = hingeC; hingeC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); @@ -252,20 +252,20 @@ public: localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,0,M_PI_4); localA.setOrigin(btVector3(btScalar(0.18), btScalar(-0.10), btScalar(0.))); - localB.getBasis().setEulerZYX(0,0,M_PI_4); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.225), btScalar(0.))); + localA.getBasis().setEulerZYX(0,0,0.5f*SIMD_HALF_PI); localA.setOrigin(btVector3(btScalar(0.18), btScalar(-0.10), btScalar(0.))); + localB.getBasis().setEulerZYX(0,0,0.5f*SIMD_HALF_PI); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.225), btScalar(0.))); coneC = new btConeTwistConstraint(*m_bodies[BODYPART_PELVIS], *m_bodies[BODYPART_RIGHT_UPPER_LEG], localA, localB); - coneC->setLimit(M_PI_4, M_PI_4, 0); + coneC->setLimit(0.5f*SIMD_HALF_PI, 0.5f*SIMD_HALF_PI, 0); m_joints[JOINT_RIGHT_HIP] = coneC; coneC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); m_ownerWorld->addConstraint(m_joints[JOINT_RIGHT_HIP], true); localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,M_PI_2,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(-0.225), btScalar(0.))); - localB.getBasis().setEulerZYX(0,M_PI_2,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.185), btScalar(0.))); + localA.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(-0.225), btScalar(0.))); + localB.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(0.185), btScalar(0.))); hingeC = new btHingeConstraint(*m_bodies[BODYPART_RIGHT_UPPER_LEG], *m_bodies[BODYPART_RIGHT_LOWER_LEG], localA, localB); - hingeC->setLimit(btScalar(0), btScalar(M_PI_2)); + hingeC->setLimit(btScalar(0), btScalar(SIMD_HALF_PI)); m_joints[JOINT_RIGHT_KNEE] = hingeC; hingeC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); @@ -273,21 +273,21 @@ public: localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,0,M_PI); localA.setOrigin(btVector3(btScalar(-0.2), btScalar(0.15), btScalar(0.))); - localB.getBasis().setEulerZYX(0,0,M_PI_2); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.18), btScalar(0.))); + localA.getBasis().setEulerZYX(0,0,SIMD_PI); localA.setOrigin(btVector3(btScalar(-0.2), btScalar(0.15), btScalar(0.))); + localB.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.18), btScalar(0.))); coneC = new btConeTwistConstraint(*m_bodies[BODYPART_SPINE], *m_bodies[BODYPART_LEFT_UPPER_ARM], localA, localB); - coneC->setLimit(M_PI_2, M_PI_2, 0); + coneC->setLimit(SIMD_HALF_PI, SIMD_HALF_PI, 0); coneC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); m_joints[JOINT_LEFT_SHOULDER] = coneC; m_ownerWorld->addConstraint(m_joints[JOINT_LEFT_SHOULDER], true); localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,M_PI_2,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.18), btScalar(0.))); - localB.getBasis().setEulerZYX(0,M_PI_2,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); + localA.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.18), btScalar(0.))); + localB.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); hingeC = new btHingeConstraint(*m_bodies[BODYPART_LEFT_UPPER_ARM], *m_bodies[BODYPART_LEFT_LOWER_ARM], localA, localB); -// hingeC->setLimit(btScalar(-M_PI_2), btScalar(0)); - hingeC->setLimit(btScalar(0), btScalar(M_PI_2)); +// hingeC->setLimit(btScalar(-SIMD_HALF_PI), btScalar(0)); + hingeC->setLimit(btScalar(0), btScalar(SIMD_HALF_PI)); m_joints[JOINT_LEFT_ELBOW] = hingeC; hingeC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); @@ -297,20 +297,20 @@ public: localA.setIdentity(); localB.setIdentity(); localA.getBasis().setEulerZYX(0,0,0); localA.setOrigin(btVector3(btScalar(0.2), btScalar(0.15), btScalar(0.))); - localB.getBasis().setEulerZYX(0,0,M_PI_2); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.18), btScalar(0.))); + localB.getBasis().setEulerZYX(0,0,SIMD_HALF_PI); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.18), btScalar(0.))); coneC = new btConeTwistConstraint(*m_bodies[BODYPART_SPINE], *m_bodies[BODYPART_RIGHT_UPPER_ARM], localA, localB); - coneC->setLimit(M_PI_2, M_PI_2, 0); + coneC->setLimit(SIMD_HALF_PI, SIMD_HALF_PI, 0); m_joints[JOINT_RIGHT_SHOULDER] = coneC; coneC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); m_ownerWorld->addConstraint(m_joints[JOINT_RIGHT_SHOULDER], true); localA.setIdentity(); localB.setIdentity(); - localA.getBasis().setEulerZYX(0,M_PI_2,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.18), btScalar(0.))); - localB.getBasis().setEulerZYX(0,M_PI_2,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); + localA.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localA.setOrigin(btVector3(btScalar(0.), btScalar(0.18), btScalar(0.))); + localB.getBasis().setEulerZYX(0,SIMD_HALF_PI,0); localB.setOrigin(btVector3(btScalar(0.), btScalar(-0.14), btScalar(0.))); hingeC = new btHingeConstraint(*m_bodies[BODYPART_RIGHT_UPPER_ARM], *m_bodies[BODYPART_RIGHT_LOWER_ARM], localA, localB); -// hingeC->setLimit(btScalar(-M_PI_2), btScalar(0)); - hingeC->setLimit(btScalar(0), btScalar(M_PI_2)); +// hingeC->setLimit(btScalar(-SIMD_HALF_PI), btScalar(0)); + hingeC->setLimit(btScalar(0), btScalar(SIMD_HALF_PI)); m_joints[JOINT_RIGHT_ELBOW] = hingeC; hingeC->setDbgDrawSize(CONSTRAINT_DEBUG_SIZE); diff --git a/bullet.pc.cmake b/bullet.pc.cmake new file mode 100644 index 000000000..c5649d58d --- /dev/null +++ b/bullet.pc.cmake @@ -0,0 +1,6 @@ +Name: bullet +Description: Bullet Continuous Collision Detection and Physics Library +Requires: +Version: @BULLET_VERSION@ +Libs: -L@LIB_DESTINATION@ -lBulletSoftBody -lBulletDynamics -lBulletCollision -lLinearMath +Cflags: @BULLET_DOUBLE_DEF@ -I@INCLUDE_INSTALL_DIR@ diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h index 9a79168b4..b36ac4b94 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h @@ -860,8 +860,8 @@ inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB, b3MprSimplex_t portal; - if (!hasSepAxis[pairIndex]) - return -1; +// if (!hasSepAxis[pairIndex]) + // return -1; hasSepAxis[pairIndex] = 0; int res; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 3699628cb..fc6f9c880 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -17,6 +17,7 @@ bool findSeparatingAxisOnGpu = true; bool splitSearchSepAxisConcave = false; bool splitSearchSepAxisConvex = true; bool useMprGpu = false;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... +bool useUnitDirections = false; bool bvhTraversalKernelGPU = true; bool findConcaveSeparatingAxisKernelGPU = true; bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; @@ -118,7 +119,8 @@ m_gpuCompoundPairs(m_context, m_queue), m_gpuCompoundSepNormals(m_context, m_queue), m_gpuHasCompoundSepNormals(m_context, m_queue), m_numCompoundPairsOut(m_context, m_queue), -m_dmins(m_context,m_queue) +m_dmins(m_context,m_queue), +m_unitSphereDirections(m_context,m_queue) { m_totalContactsOut.push_back(0); @@ -134,8 +136,8 @@ m_dmins(m_context,m_queue) // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); //#endif m_mprPenetrationKernel = 0; - - if (useMprGpu) + m_findSeparatingAxisUnitSphereKernel = 0; + if (useMprGpu||useUnitDirections) { cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); b3Assert(errNum==CL_SUCCESS); @@ -143,6 +145,16 @@ m_dmins(m_context,m_queue) m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); b3Assert(m_mprPenetrationKernel); b3Assert(errNum==CL_SUCCESS); + + m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); + b3Assert(m_findSeparatingAxisUnitSphereKernel); + b3Assert(errNum==CL_SUCCESS); + + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + m_unitSphereDirections.resize(numDirections); + m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); + + } @@ -278,6 +290,9 @@ GpuSatCollision::~GpuSatCollision() if (m_mprPenetrationKernel) clReleaseKernel(m_mprPenetrationKernel); + if (m_findSeparatingAxisUnitSphereKernel) + clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); + if (m_findSeparatingAxisKernel) clReleaseKernel(m_findSeparatingAxisKernel); @@ -3108,9 +3123,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* m_dmins.resize(nPairs); if (splitSearchSepAxisConvex) { + + if (useMprGpu) { - nContacts = m_totalContactsOut.at(0); + nContacts = m_totalContactsOut.at(0); { B3_PROFILE("mprPenetrationKernel"); b3BufferInfoCL bInfo[] = { @@ -3142,7 +3159,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* */ nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); - //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); + // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); if (nContacts>maxContactCapacity) { @@ -3151,37 +3168,67 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* } } - } //else + } + // else + if (1) { { - B3_PROFILE("findSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()) - }; + B3_PROFILE("findSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; - b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); + b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + if (useUnitDirections) + { + B3_PROFILE("findSeparatingAxisUnitSphereKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + launcher.setConst( numDirections); + + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } else { B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); b3BufferInfoCL bInfo[] = { diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index ff75ccd91..e24c1579c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -27,6 +27,8 @@ struct GpuSatCollision cl_command_queue m_queue; cl_kernel m_findSeparatingAxisKernel; cl_kernel m_mprPenetrationKernel; + cl_kernel m_findSeparatingAxisUnitSphereKernel; + cl_kernel m_findSeparatingAxisVertexFaceKernel; cl_kernel m_findSeparatingAxisEdgeEdgeKernel; @@ -57,6 +59,7 @@ struct GpuSatCollision cl_kernel m_processCompoundPairsPrimitivesKernel; + b3OpenCLArray m_unitSphereDirections; b3OpenCLArray m_totalContactsOut; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl index f132f11a3..bb1838e31 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl @@ -87,3 +87,220 @@ __kernel void mprPenetrationKernel( __global int4* pairs, } } + +typedef float4 Quaternion; +#define make_float4 (float4) + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + + + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + + +inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, +const float4* dir, __global const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;im_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + + +bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* unitSphereDirections, + int numUnitSphereDirections, + float4* sep, + float* dmin) +{ + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test unit sphere directions + for (int i=0;i0) + crossje *= -1.f; + { + float dist; + bool result = true; + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); + + if(Max00.0f) + { + *sep = -(*sep); + } + return true; +} + + + +__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* unitSphereDirections, + __global float4* separatingNormals, + __global int* hasSeparatingAxis, + __global float* dmins, + int numUnitSphereDirections, + int numPairs + ) +{ + + int i = get_global_id(0); + + if (i=0)\n" +" b3Assert(ptIndex>=0);\n" " if (ptIndex<0)\n" " {\n" -" //ptIndex = 0;\n" +" ptIndex = 0;\n" " }\n" " *dotOut = maxDot;\n" " return ptIndex;\n" @@ -1082,8 +1082,8 @@ static const char* mprKernelsCL= \ " \n" " b3MprSimplex_t portal;\n" " \n" -" if (!hasSepAxis[pairIndex])\n" -" return -1;\n" +"// if (!hasSepAxis[pairIndex])\n" +" // return -1;\n" " \n" " hasSepAxis[pairIndex] = 0;\n" " int res;\n" @@ -1241,4 +1241,195 @@ static const char* mprKernelsCL= \ " }\n" " }\n" "}\n" +"typedef float4 Quaternion;\n" +"#define make_float4 (float4)\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;im_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices,\n" +" __global const float4* unitSphereDirections,\n" +" int numUnitSphereDirections,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test unit sphere directions\n" +" for (int i=0;i0)\n" +" crossje *= -1.f;\n" +" {\n" +" float dist;\n" +" bool result = true;\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +" \n" +" if(Max00.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* unitSphereDirections,\n" +" __global float4* separatingNormals,\n" +" __global int* hasSeparatingAxis,\n" +" __global float* dmins,\n" +" int numUnitSphereDirections,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" \n" +" if (i maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" @@ -137,6 +165,7 @@ static const char* satClipKernelsCL= \ "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "{\n" " b3Quat ans;\n" @@ -171,6 +200,10 @@ static const char* satClipKernelsCL= \ " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" " return out;\n" "}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" "{\n" " return (b3Quat)(-q.xyz, q.w);\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index b4a96188c..dd468b153 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -153,6 +153,11 @@ static const char* satKernelsCL= \ "};\n" "#ifdef __cplusplus\n" "#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" "#define b3AtomicInc atomic_inc\n" "#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" @@ -189,6 +194,29 @@ static const char* satKernelsCL= \ " return false;\n" " return true;\n" "}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "#ifndef B3_MAT3x3_H\n" "#define B3_MAT3x3_H\n" @@ -220,6 +248,7 @@ static const char* satKernelsCL= \ "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "{\n" " b3Quat ans;\n" @@ -254,6 +283,10 @@ static const char* satKernelsCL= \ " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" " return out;\n" "}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" "{\n" " return (b3Quat)(-q.xyz, q.w);\n"