From 677722bba31a181e1f46cced6f776f6d144603dc Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Tue, 20 Aug 2013 03:19:59 -0700 Subject: [PATCH] support compound versus compound collision shape acceleration on GPU, using aabb tree versus aabb tree. Remove constructor from b3Vector3, to make it a POD type, so it can go into a union (and more compatible with OpenCL float4) Use b3MakeVector3 instead of constructor Share some code between C++ and GPU in a shared file: see b3TransformAabb2 in src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h Improve PairBench a bit, show timings and #overlapping pairs. Increase shadowmap default size to 8192x8192 (hope the GPU supports it) --- Demos3/GpuDemos/GpuDemo.h | 8 +- Demos3/GpuDemos/ParticleDemo.cpp | 6 +- Demos3/GpuDemos/broadphase/PairBench.cpp | 76 ++- .../GpuDemos/constraints/ConstraintsDemo.cpp | 28 +- Demos3/GpuDemos/main_opengl3core.cpp | 13 +- .../GpuDemos/raytrace/RaytracedShadowDemo.cpp | 16 +- Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp | 2 +- .../rigidbody/BulletDataExtractor.cpp | 4 +- Demos3/GpuDemos/rigidbody/ConcaveScene.cpp | 100 ++-- .../GpuDemos/rigidbody/GpuCompoundScene.cpp | 72 ++- Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp | 39 +- .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 8 +- Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp | 34 +- btgui/OpenGLWindow/GLInstancingRenderer.cpp | 54 +- .../BroadPhaseCollision/b3DynamicBvh.cpp | 10 +- .../BroadPhaseCollision/b3DynamicBvh.h | 36 +- .../b3DynamicBvhBroadphase.cpp | 2 +- .../b3DynamicBvhBroadphase.h | 2 +- .../BroadPhaseCollision/shared/b3Aabb.h | 42 +- .../NarrowPhaseCollision/b3Contact4.h | 4 +- src/Bullet3Common/b3Logging.cpp | 22 + src/Bullet3Common/b3Matrix3x3.h | 8 +- src/Bullet3Common/b3Quaternion.h | 6 +- src/Bullet3Common/b3Transform.h | 4 +- src/Bullet3Common/b3TransformUtil.h | 10 +- src/Bullet3Common/b3Vector3.h | 227 +++++---- src/Bullet3Common/premake4.lua | 2 + src/Bullet3Common/shared/b3Float4.h | 18 + src/Bullet3Common/shared/b3Int2.h | 8 + src/Bullet3Common/shared/b3Mat3x3.h | 75 +++ src/Bullet3Common/shared/b3Quat.h | 64 +++ .../ConstraintSolver/b3JacobianEntry.h | 6 +- .../ConstraintSolver/b3PgsJacobiSolver.cpp | 62 +-- src/Bullet3Geometry/b3AabbUtil.h | 6 +- src/Bullet3Geometry/b3ConvexHullComputer.cpp | 10 +- .../b3GpuSapBroadphase.cpp | 10 +- .../b3ConvexHullContact.cpp | 239 ++++++--- .../NarrowphaseCollision/b3ConvexUtility.cpp | 8 +- .../NarrowphaseCollision/b3GjkEpa.cpp | 22 +- .../b3GjkPairDetector.cpp | 8 +- .../NarrowphaseCollision/b3OptimizedBvh.cpp | 8 +- .../NarrowphaseCollision/b3QuantizedBvh.cpp | 12 +- .../b3StridingMeshInterface.h | 2 +- .../NarrowphaseCollision/b3SupportMappings.h | 2 +- .../b3TriangleCallback.cpp | 2 +- .../b3VoronoiSimplexSolver.cpp | 8 +- .../kernels/primitiveContacts.h | 14 + .../NarrowphaseCollision/kernels/sat.cl | 314 +++++++++++- .../kernels/satClipHullContacts.h | 14 + .../NarrowphaseCollision/kernels/satKernels.h | 461 +++++++++++++++++- .../b3PrefixScanFloat4CL.cpp | 2 +- src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 2 +- src/Bullet3OpenCL/RigidBody/b3Config.h | 2 +- .../RigidBody/b3GpuNarrowPhase.cpp | 83 ++-- .../RigidBody/b3GpuPgsJacobiSolver.cpp | 6 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 14 +- .../RigidBody/kernels/batchingKernels.h | 14 + .../RigidBody/kernels/batchingKernelsNew.h | 14 + .../RigidBody/kernels/solverSetup.h | 14 + .../RigidBody/kernels/solverSetup2.h | 14 + .../RigidBody/kernels/solverUtils.h | 14 + test/OpenCL/KernelLaunch/main.cpp | 4 +- 62 files changed, 1827 insertions(+), 564 deletions(-) create mode 100644 src/Bullet3Common/shared/b3Mat3x3.h diff --git a/Demos3/GpuDemos/GpuDemo.h b/Demos3/GpuDemos/GpuDemo.h index 87d18145b..0cfe70ef0 100644 --- a/Demos3/GpuDemos/GpuDemo.h +++ b/Demos3/GpuDemos/GpuDemo.h @@ -48,14 +48,14 @@ public: arraySizeZ(10), #else - arraySizeX(30), + arraySizeX(30), arraySizeY(30), arraySizeZ(30), #endif m_useConcaveMesh(false), - gapX(14.3), - gapY(14.0), - gapZ(14.3), + gapX(16.3), + gapY(6.3), + gapZ(16.3), m_useInstancedCollisionShapes(true), m_instancingRenderer(0), m_window(0), diff --git a/Demos3/GpuDemos/ParticleDemo.cpp b/Demos3/GpuDemos/ParticleDemo.cpp index a8234a9eb..a12071128 100644 --- a/Demos3/GpuDemos/ParticleDemo.cpp +++ b/Demos3/GpuDemos/ParticleDemo.cpp @@ -243,10 +243,10 @@ void ParticleDemo::setupScene(const ConstructionInfo& ci) void* userPtr = (void*)userIndex; int collidableIndex = userIndex; b3Vector3 aabbMin,aabbMax; - b3Vector3 particleRadius(rad,rad,rad); + b3Vector3 particleRadius=b3MakeVector3(rad,rad,rad); - aabbMin = b3Vector3(position[0],position[1],position[2])-particleRadius; - aabbMax = b3Vector3(position[0],position[1],position[2])+particleRadius; + aabbMin = b3MakeVector3(position[0],position[1],position[2])-particleRadius; + aabbMax = b3MakeVector3(position[0],position[1],position[2])+particleRadius; m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,collidableIndex,1,1); userIndex++; diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index a250b74d6..934047d05 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -10,6 +10,7 @@ #include "OpenGLWindow/GLInstanceRendererInternalData.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "../../../btgui/Timing/b3Quickprof.h" +#include "../gwenUserInterface.h" static b3KeyboardCallback oldCallback = 0; extern bool gReset; @@ -49,7 +50,7 @@ __kernel void { int nodeID = get_global_id(0); float timeStepPos = 0.000166666; - float mAmplitude = 86.f; + float mAmplitude = 51.f; if( nodeID < numNodes ) { pBodyTimes[nodeID] += timeStepPos; @@ -102,6 +103,8 @@ struct PairBenchInternalData cl_kernel m_colorPairsKernel; cl_kernel m_updateAabbSimple; + GwenUserInterface* m_gui; + b3OpenCLArray* m_instancePosOrnColor; b3OpenCLArray* m_bodyTimes; PairBenchInternalData() @@ -149,6 +152,9 @@ static void PairKeyboardCallback(int key, int state) void PairBench::initPhysics(const ConstructionInfo& ci) { + m_data->m_gui = ci.m_gui; + + initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex); if (m_clData->m_clContext) { @@ -190,13 +196,13 @@ void PairBench::initPhysics(const ConstructionInfo& ci) { for (int k=0;kregisterGraphicsInstance(shapeId,position,orn,color,scaling); - b3Vector3 aabbHalfExtents(1,1,1); + b3Vector3 aabbHalfExtents=b3MakeVector3(1,1,1); b3Vector3 aabbMin = position-aabbHalfExtents; b3Vector3 aabbMax = position+aabbHalfExtents; @@ -210,7 +216,7 @@ void PairBench::initPhysics(const ConstructionInfo& ci) float camPos[4]={15.5,12.5,15.5,0}; m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(60); + m_instancingRenderer->setCameraDistance(130); m_instancingRenderer->writeTransforms(); m_data->m_broadphaseGPU->writeAabbsToGpu(); @@ -300,7 +306,11 @@ void PairBench::clientMoveAndDisplay() } } + bool updateOnGpu=true; + + if (updateOnGpu) { + B3_PROFILE("updateOnGpu"); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); @@ -308,16 +318,66 @@ void PairBench::clientMoveAndDisplay() launcher.launch1D( numObjects); clFinish(m_clData->m_clQueue); - } + } else { + B3_PROFILE("updateOnCpu"); + int allAabbs = m_data->m_broadphaseGPU->m_allAabbsCPU.size(); + + + + b3AlignedObjectArray posOrnColorsCpu; + m_data->m_instancePosOrnColor->copyToHost(posOrnColorsCpu); + + + + for (int nodeId=0;nodeIdm_broadphaseGPU->m_allAabbsCPU[nodeId].m_minVec = position-halfExtents; + m_data->m_broadphaseGPU->m_allAabbsCPU[nodeId].m_minIndices[3] = nodeId; + m_data->m_broadphaseGPU->m_allAabbsCPU[nodeId].m_maxVec = position+halfExtents; + m_data->m_broadphaseGPU->m_allAabbsCPU[nodeId].m_signedMaxIndices[3]= nodeId; + } + } + m_data->m_broadphaseGPU->writeAabbsToGpu(); + + + } + + unsigned long dt = 0; + { + b3Clock cl; + dt = cl.getTimeMicroseconds(); B3_PROFILE("calculateOverlappingPairs"); - m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numObjects); + int sz = sizeof(b3Int4)*64*numObjects; + + m_data->m_broadphaseGPU->calculateOverlappingPairs(16*numObjects); //int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); //printf("numPairs = %d\n", numPairs); + dt = cl.getTimeMicroseconds()-dt; } + + if (m_data->m_gui) + { + int allAabbs = m_data->m_broadphaseGPU->m_allAabbsCPU.size(); + int numOverlap = m_data->m_broadphaseGPU->getNumOverlap(); + + float time = dt/1000.f; + //printf("time = %f\n", time); + + char msg[1024]; + sprintf(msg,"#objects = %d, #overlapping pairs = %d, time = %f ms", allAabbs,numOverlap,time ); + //printf("msg=%s\n",msg); + m_data->m_gui->setStatusBarMessage(msg,true); + } + + if (animate) { + B3_PROFILE("animate"); GLint err = glGetError(); assert(err==GL_NO_ERROR); //color overlapping objects in red diff --git a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp index 192c5bbef..75a7c3e70 100644 --- a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp +++ b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp @@ -108,10 +108,10 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const { b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; int curColor = 0; @@ -127,7 +127,7 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -193,16 +193,16 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const //c = new b3Point2PointConstraint(pid,prevBody,b3Vector3(-1.1,0,0),b3Vector3(1.1,0,0)); float breakingThreshold=44; // c->setBreakingImpulseThreshold(breakingThreshold); - b3Vector3 pivotInA(-1.1,0,0); - b3Vector3 pivotInB (1.1,0,0); + b3Vector3 pivotInA=b3MakeVector3(-1.1,0,0); + b3Vector3 pivotInB=b3MakeVector3(1.1,0,0); int cid = m_data->m_rigidBodyPipeline->createPoint2PointConstraint(pid,prevBody,pivotInA,pivotInB,breakingThreshold); break; } case 1: { - b3Vector3 pivotInA(-1.05,0,0); - b3Vector3 pivotInB (1.05,0,0); + b3Vector3 pivotInA=b3MakeVector3(-1.05,0,0); + b3Vector3 pivotInB=b3MakeVector3(1.05,0,0); b3Transform frameInA,frameInB; frameInA.setIdentity(); @@ -276,12 +276,12 @@ void GpuConstraintsDemo::createStaticEnvironment(const ConstructionInfo& ci) { - b3Vector4 scaling(400,400,400,1); + b3Vector4 scaling=b3MakeVector4(400,400,400,1); int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,-405,0); + b3Vector3 position=b3MakeVector3(0,-405,0); b3Quaternion orn(0,0,0,1); - b3Vector4 color(0,0,1,1); + b3Vector4 color=b3MakeVector4(0,0,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0.f,position,orn,colIndex,index,false); diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index ca5608cef..60fa8954b 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -85,11 +85,11 @@ GpuDemo::CreateFunc* allDemos[]= { //ConcaveCompound2Scene::MyCreateFunc, - + //ConcaveSphereScene::MyCreateFunc, - + // ConcaveSphereScene::MyCreateFunc, @@ -102,6 +102,7 @@ GpuDemo::CreateFunc* allDemos[]= GpuConvexScene::MyCreateFunc, GpuCompoundScene::MyCreateFunc, + GpuCompoundPlaneScene::MyCreateFunc, GpuSphereScene::MyCreateFunc, @@ -112,11 +113,11 @@ GpuDemo::CreateFunc* allDemos[]= ConcaveCompoundScene::MyCreateFunc, - GpuCompoundPlaneScene::MyCreateFunc, + - GpuTetraScene::MyCreateFunc, + //GpuTetraScene::MyCreateFunc, - GpuSoftClothDemo::MyCreateFunc, + //GpuSoftClothDemo::MyCreateFunc, Bullet2FileDemo::MyCreateFunc, @@ -546,7 +547,7 @@ int main(int argc, char* argv[]) b3SetCustomPrintfFunc(myprintf); - b3Vector3 test(1,2,3); + b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; diff --git a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp index d30c14d9c..a21b84baf 100644 --- a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp +++ b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp @@ -116,10 +116,10 @@ int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci2) { b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; int curColor = 0; @@ -143,7 +143,7 @@ int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci2) { //mass=0.f; } - b3Vector3 position((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2); + b3Vector3 position=b3MakeVector3((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2); //b3Vector3 position(i*2.2,10+j*1.9,k*2.2); b3Quaternion orn(0,0,0,1); @@ -151,7 +151,7 @@ int GpuRaytraceScene::createDynamicsObjects(const ConstructionInfo& ci2) b3Vector4 color = colors[curColor]; curColor++; curColor&=3; - b3Vector4 scaling(1,1,1,1); + b3Vector4 scaling=b3MakeVector4(1,1,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -228,7 +228,7 @@ void GpuRaytraceScene::renderScene2() rayForward*= farPlane; b3Vector3 rightOffset; - b3Vector3 vertical(0.f,1.f,0.f); + b3Vector3 vertical=b3MakeVector3(0.f,1.f,0.f); b3Vector3 hor; hor = rayForward.cross(vertical); hor.normalize(); @@ -303,7 +303,7 @@ void GpuRaytraceScene::renderScene2() } } - b3Vector3 lightPos(1000,1000,100); + b3Vector3 lightPos=b3MakeVector3(1000,1000,100); { B3_PROFILE("cast primary rays"); diff --git a/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp index ec64bf269..5ce6deabd 100644 --- a/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp @@ -38,7 +38,7 @@ void Bullet2FileDemo::setupScene(const ConstructionInfo& ci) // m_loader = new b3BulletDataExtractor(*ci.m_instancingRenderer,*m_data->m_np,*m_data->m_rigidBodyPipeline); // m_loader->convertAllObjects(bulletFile); - b3Vector3 pos(-20,10,0); + b3Vector3 pos=b3MakeVector3(-20,10,0); ci.m_instancingRenderer->setCameraTargetPosition(pos); ci.m_instancingRenderer->setCameraDistance(10); } diff --git a/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp index 7e81f009e..a28dd8d65 100644 --- a/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp +++ b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp @@ -328,7 +328,7 @@ int b3BulletDataExtractor::convertCollisionShape( Bullet3SerializeBullet2::b3Co { for ( i=0;im_unscaledPointsFloatPtr[i].m_floats[0], + b3Vector3 pt = b3MakeVector3(convexData->m_unscaledPointsFloatPtr[i].m_floats[0], convexData->m_unscaledPointsFloatPtr[i].m_floats[1], convexData->m_unscaledPointsFloatPtr[i].m_floats[2]);//convexData->m_unscaledPointsFloatPtr[i].m_floats[3]); @@ -591,7 +591,7 @@ GraphicsShape* b3BulletDataExtractor::createGraphicsShapeFromConvexHull(const b3 for (int f=0;fm_faces.size();f++) { const b3MyFace& face = utilPtr->m_faces[f]; - b3Vector3 normal(face.m_plane[0],face.m_plane[1],face.m_plane[2]); + b3Vector3 normal=b3MakeVector3(face.m_plane[0],face.m_plane[1],face.m_plane[2]); if (face.m_indices.size()>2) { diff --git a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp index 413542095..30f3dfe49 100644 --- a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp +++ b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp @@ -47,7 +47,7 @@ GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(std::vectorsize(); @@ -81,9 +81,9 @@ GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(std::vectorm_vertices->at(i).xyzw[j] += shift[j]; - b3Vector3 vtx(shape->m_vertices->at(i).xyzw[0], + b3Vector3 vtx=b3MakeVector3(shape->m_vertices->at(i).xyzw[0], shape->m_vertices->at(i).xyzw[1], shape->m_vertices->at(i).xyzw[2]); verts.push_back(vtx*scaling); } - int colIndex = m_data->m_np->registerConcaveMesh(&verts,shape->m_indices,b3Vector3(1,1,1)); + int colIndex = m_data->m_np->registerConcaveMesh(&verts,shape->m_indices,b3MakeVector3(1,1,1)); { int strideInBytes = 9*sizeof(float); @@ -182,12 +182,12 @@ void ConcaveScene::createConcaveMesh(const ConstructionInfo& ci, const char* fil int shapeId = ci.m_instancingRenderer->registerShape(&shape->m_vertices->at(0).xyzw[0], shape->m_numvertices, &shape->m_indices->at(0), shape->m_numIndices); b3Quaternion orn(0,0,0,1); - b3Vector4 color(0.3,0.3,1,1.f);//0.5);//1.f + b3Vector4 color=b3MakeVector4(0.3,0.3,1,1.f);//0.5);//1.f { float mass = 0.f; - b3Vector3 position(0,0,0); + b3Vector3 position=b3MakeVector3(0,0,0); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); index++; @@ -222,9 +222,9 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) char* fileName = "samurai_monastry.obj"; // char* fileName = "teddy2_VHACD_CHs.obj"; - b3Vector3 shift1(0,0,0);//0,230,80);//150,-100,-120); + b3Vector3 shift1=b3MakeVector3(0,0,0);//0,230,80);//150,-100,-120); - b3Vector4 scaling(10,10,10,1); + b3Vector4 scaling=b3MakeVector4(10,10,10,1); // createConcaveMesh(ci,"plane100.obj",shift1,scaling); //createConcaveMesh(ci,"plane100.obj",shift,scaling); @@ -247,12 +247,12 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) int mask=1; int index=0; { - b3Vector4 scaling(400,1.,400,1); + b3Vector4 scaling=b3MakeVector4(400,1.,400,1); int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,-2,0); + b3Vector3 position=b3MakeVector3(0,-2,0); b3Quaternion orn(0,0,0,1); - b3Vector4 color(0,0,1,1); + b3Vector4 color=b3MakeVector4(0,0,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0.f,position,orn,colIndex,index,false); @@ -268,7 +268,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci) //float camPos[4]={1,12.5,1.5,0}; m_instancingRenderer->setCameraPitch(45); m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(155); + m_instancingRenderer->setCameraDistance(355); char msg[1024]; int numInstances = m_data->m_rigidBodyPipeline->getNumBodies(); sprintf(msg,"Num objects = %d",numInstances); @@ -296,15 +296,15 @@ void ConcaveScene::createDynamicObjects(const ConstructionInfo& ci) int curColor = 0; b3Vector4 colors[4] = { - b3Vector4(1,1,1,1), - b3Vector4(1,1,0.3,1), - b3Vector4(0.3,1,1,1), - b3Vector4(0.3,0.3,1,1), + b3MakeVector4(1,1,1,1), + b3MakeVector4(1,1,0.3,1), + b3MakeVector4(0.3,1,1,1), + b3MakeVector4(0.3,0.3,1,1), }; b3ConvexUtility* utilPtr = new b3ConvexUtility(); - b3Vector4 scaling(1,1,1,1); + b3Vector4 scaling=b3MakeVector4(1,1,1,1); { b3AlignedObjectArray verts; @@ -313,7 +313,7 @@ void ConcaveScene::createDynamicObjects(const ConstructionInfo& ci) for (int i=0;i tmpVertices; //add transformed graphics vertices and indices - b3Vector3 myScaling(50,50,50);//300,300,300); + b3Vector3 myScaling=b3MakeVector3(50,50,50);//300,300,300); for (int v=0;vregisterGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -612,9 +612,9 @@ void ConcaveCompoundScene::createDynamicObjects(const ConstructionInfo& ci) b3Vector3 childPositions[3] = { - b3Vector3(0,-2,0), - b3Vector3(0,0,0), - b3Vector3(0,0,2) + b3MakeVector3(0,-2,0), + b3MakeVector3(0,0,0), + b3MakeVector3(0,0,2) }; b3AlignedObjectArray childShapes; @@ -645,7 +645,7 @@ b3Vector3 childPositions[3] = { for (int v=0;vregisterGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -715,10 +715,10 @@ void ConcaveSphereScene::createDynamicObjects(const ConstructionInfo& ci) { b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; int index=0; @@ -737,7 +737,7 @@ void ConcaveSphereScene::createDynamicObjects(const ConstructionInfo& ci) float mass = 1.f; - b3Vector3 position(-(ci.arraySizeX/2)*8+i*8,50+j*8,-(ci.arraySizeZ/2)*8+k*8); + b3Vector3 position=b3MakeVector3(-(ci.arraySizeX/2)*8+i*8,50+j*8,-(ci.arraySizeZ/2)*8+k*8); //b3Vector3 position(0,-41,0);//0,0,0);//i*radius*3,-41+j*radius*3,k*radius*3); @@ -746,7 +746,7 @@ void ConcaveSphereScene::createDynamicObjects(const ConstructionInfo& ci) b3Vector4 color = colors[curColor]; curColor++; curColor&=3; - b3Vector4 scaling(radius,radius,radius,1); + b3Vector4 scaling=b3MakeVector4(radius,radius,radius,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(prevGraphicsShapeIndex,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); diff --git a/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp index f9977811d..d1c556ed1 100644 --- a/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp @@ -19,6 +19,10 @@ #include "OpenGLWindow/GLInstanceGraphicsShape.h" +#define NUM_COMPOUND_CHILDREN_X 4 +#define NUM_COMPOUND_CHILDREN_Y 4 +#define NUM_COMPOUND_CHILDREN_Z 4 + void GpuCompoundScene::setupScene(const ConstructionInfo& ci) @@ -42,21 +46,33 @@ void GpuCompoundScene::setupScene(const ConstructionInfo& ci) int childColIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 childPositions[3] = { +/* b3Vector3 childPositions[3] = { b3Vector3(0,-2,0), b3Vector3(0,0,0), b3Vector3(0,0,2) }; + */ b3AlignedObjectArray childShapes; - int numChildShapes = 3; - for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -134,7 +150,7 @@ void GpuCompoundScene::setupScene(const ConstructionInfo& ci) float camPos[4]={0,0,0};//65.5,4.5,65.5,0}; //float camPos[4]={1,12.5,1.5,0}; m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(20); + m_instancingRenderer->setCameraDistance(320); } @@ -198,10 +214,10 @@ void GpuCompoundScene::createStaticEnvironment(const ConstructionInfo& ci) } b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; int curColor = 1; @@ -211,7 +227,7 @@ void GpuCompoundScene::createStaticEnvironment(const ConstructionInfo& ci) float mass = 0.f; //b3Vector3 position((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2); - b3Vector3 position(0,-41,0); + b3Vector3 position=b3MakeVector3(0,-41,0); b3Quaternion orn(0,0,0,1); @@ -219,7 +235,7 @@ void GpuCompoundScene::createStaticEnvironment(const ConstructionInfo& ci) b3Vector4 color = colors[curColor]; curColor++; curColor&=3; - b3Vector4 scaling(radius,radius,radius,1); + b3Vector4 scaling=b3MakeVector4(radius,radius,radius,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(prevGraphicsShapeIndex,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -234,17 +250,21 @@ void GpuCompoundPlaneScene::createStaticEnvironment(const ConstructionInfo& ci) { int index=0; - b3Vector3 normal(0,1,0); + b3Vector3 normal=b3MakeVector3(0,1,0); float constant=0.f; - int colIndex = m_data->m_np->registerPlaneShape(normal,constant);//>registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,0,0); - b3Quaternion orn(0,0,0,1); - // b3Quaternion orn(b3Vector3(1,0,0),0.3); - b3Vector4 color(0,0,1,1); - b3Vector4 scaling(100,0.01,100,1); int strideInBytes = 9*sizeof(float); int numVertices = sizeof(cube_vertices)/strideInBytes; int numIndices = sizeof(cube_indices)/sizeof(int); + + b3Vector4 scaling=b3MakeVector4(400,1.,400,1); + + //int colIndex = m_data->m_np->registerPlaneShape(normal,constant);//>registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); + b3Vector3 position=b3MakeVector3(0,0,0); + b3Quaternion orn(0,0,0,1); + // b3Quaternion orn(b3Vector3(1,0,0),0.3); + b3Vector4 color=b3MakeVector4(0,0,1,1); + int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); diff --git a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp index 41febc3fd..bac3ccf01 100644 --- a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp @@ -26,10 +26,10 @@ b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; void GpuConvexScene::setupScene(const ConstructionInfo& ci) @@ -49,7 +49,7 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci) //float camPos[4]={1,12.5,1.5,0}; m_instancingRenderer->setCameraTargetPosition(camPos); - m_instancingRenderer->setCameraDistance(120); + m_instancingRenderer->setCameraDistance(114); //m_instancingRenderer->setCameraYaw(85); m_instancingRenderer->setCameraYaw(30); m_instancingRenderer->setCameraPitch(225); @@ -123,7 +123,7 @@ int GpuConvexScene::createDynamicsObjects2(const ConstructionInfo& ci, const flo for (int i=0;iregisterGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index,false); @@ -200,12 +200,12 @@ void GpuConvexScene::createStaticEnvironment(const ConstructionInfo& ci) { - b3Vector4 scaling(400,400,400,1); + b3Vector4 scaling=b3MakeVector4(400,400,400,1); int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,-400,0); + b3Vector3 position=b3MakeVector3(0,-400,0); b3Quaternion orn(0,0,0,1); - b3Vector4 color(0,0,1,1); + b3Vector4 color=b3MakeVector4(0,0,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0.f,position,orn,colIndex,index,false); @@ -226,12 +226,12 @@ void GpuConvexPlaneScene::createStaticEnvironment(const ConstructionInfo& ci) { - b3Vector4 scaling(400,400,400,1); + b3Vector4 scaling=b3MakeVector4(400,400,400,1); int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,-400,0); + b3Vector3 position=b3MakeVector3(0,-400,0); b3Quaternion orn(0,0,0,1); - b3Vector4 color(0,0,1,1); + b3Vector4 color=b3MakeVector4(0,0,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0.f,position,orn,colIndex,index,false); @@ -281,8 +281,7 @@ static float mytetra_vertices[] = -1.f, 0, -1.f, 0.5f, 0, 1,0, 0,0, -1.f, 0, 1.f, 0.5f, 0, 1,0, 1,0, 1.f, 0, 1.f, 0.5f, 0, 1,0, 1,1, - 1.f, 0, -1.f, 0.5f, 0, 1,0, 0,1, - 0, -1, 0 , 0.5f, 0, 1,0, 0,1 + 1.f, 0, -1.f, 0.5f, 0, 1,0, 0,1 }; static int mytetra_indices[]= @@ -355,7 +354,7 @@ void GpuTetraScene::createFromTetGenData(const char* ele, sscanf(ele,"%d %d %d %d %d",&index,&ni[0],&ni[1],&ni[2],&ni[3]); ele+=nextLine(ele); - b3Vector3 average(0,0,0); + b3Vector3 average=b3MakeVector3(0,0,0); for (int v=0;v<4;v++) { @@ -382,9 +381,9 @@ void GpuTetraScene::createFromTetGenData(const char* ele, { - b3Vector4 scaling(1,1,1,1); + b3Vector4 scaling=b3MakeVector4(1,1,1,1); int colIndex = m_data->m_np->registerConvexHullShape(&mytetra_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 position(0,150,0); + b3Vector3 position=b3MakeVector3(0,150,0); // position+=average;//*1.2;//*2; position+=average*1.2;//*2; //rigidBodyPositions.push_back(position); diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index dd8fe26d2..7396ce01d 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -109,7 +109,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci) m_data->m_config.m_maxConvexBodies = b3Max(m_data->m_config.m_maxConvexBodies,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ+10); m_data->m_config.m_maxConvexShapes = m_data->m_config.m_maxConvexBodies; - m_data->m_config.m_maxBroadphasePairs = 16*m_data->m_config.m_maxConvexBodies; + m_data->m_config.m_maxBroadphasePairs = 32*m_data->m_config.m_maxConvexBodies; m_data->m_config.m_maxContactCapacity = m_data->m_config.m_maxBroadphasePairs; @@ -249,7 +249,7 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() b3Vector3 GpuRigidBodyDemo::getRayTo(int x,int y) { if (!m_instancingRenderer) - return b3Vector3(0,0,0); + return b3MakeVector3(0,0,0); float top = 1.f; float bottom = -1.f; @@ -268,7 +268,7 @@ b3Vector3 GpuRigidBodyDemo::getRayTo(int x,int y) rayForward*= farPlane; b3Vector3 rightOffset; - b3Vector3 m_cameraUp(0,1,0); + b3Vector3 m_cameraUp=b3MakeVector3(0,1,0); b3Vector3 vertical = m_cameraUp; b3Vector3 hor; @@ -390,7 +390,7 @@ bool GpuRigidBodyDemo::mouseButtonCallback(int button, int state, float x, float b3Vector3 pivotInA = tr.inverse()*pivotInB; if (m_data->m_pickFixedBody<0) { - b3Vector3 pos(0,0,0); + b3Vector3 pos=b3MakeVector3(0,0,0); b3Quaternion orn(0,0,0,1); int fixedSphere = m_data->m_np->registerConvexHullShape(0,0,0,0);//>registerSphereShape(0.1); m_data->m_pickFixedBody = m_data->m_rigidBodyPipeline->registerPhysicsInstance(0,pos,orn,fixedSphere,0,false); diff --git a/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp index d4daa77ce..1d4b93922 100644 --- a/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp @@ -71,10 +71,10 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) } b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; int curColor = 0; @@ -84,14 +84,14 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) float mass = 0.f; //b3Vector3 position((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2); - b3Vector3 position(0,0,0); + b3Vector3 position=b3MakeVector3(0,0,0); b3Quaternion orn(0,0,0,1); b3Vector4 color = colors[curColor]; curColor++; curColor&=3; - b3Vector4 scaling(radius,radius,radius,1); + b3Vector4 scaling=b3MakeVector4(radius,radius,radius,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(prevGraphicsShapeIndex,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index, writeInstanceToGpu); @@ -110,10 +110,10 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) b3Vector4 colors[4] = { - b3Vector4(1,0,0,1), - b3Vector4(0,1,0,1), - b3Vector4(0,1,1,1), - b3Vector4(1,1,0,1), + b3MakeVector4(1,0,0,1), + b3MakeVector4(0,1,0,1), + b3MakeVector4(0,1,1,1), + b3MakeVector4(1,1,0,1), }; @@ -139,7 +139,7 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) int i=0,j=0,k=0; float mass = 0.f; - b3Vector3 position(0,0,0); + b3Vector3 position=b3MakeVector3(0,0,0); //b3Vector3 position((j&1)+i*142.2,-51+j*142.,(j&1)+k*142.2); //b3Vector3 position(0,-41,0);//0,0,0);//i*radius*3,-41+j*radius*3,k*radius*3); @@ -148,7 +148,7 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) b3Vector4 color = colors[curColor]; curColor++; curColor&=3; - b3Vector4 scaling(radius,radius,radius,1); + b3Vector4 scaling=b3MakeVector4(radius,radius,radius,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(prevGraphicsShapeIndex,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index, writeInstanceToGpu); @@ -161,9 +161,9 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) if (1) { int shapeId = ci.m_instancingRenderer->registerShape(&cube_vertices[0],numVertices,cube_indices,numIndices); - b3Vector4 scaling(0.5,0.5,0.5,1);//1,1,1,1);//0.1,0.1,0.1,1); + b3Vector4 scaling=b3MakeVector4(0.5,0.5,0.5,1);//1,1,1,1);//0.1,0.1,0.1,1); int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector3 normal(0,-1,0); + b3Vector3 normal=b3MakeVector3(0,-1,0); float constant=2; @@ -173,11 +173,11 @@ void GpuSphereScene::setupScene(const ConstructionInfo& ci) //int i=0;int j=0; { //int colIndex = m_data->m_np->registerPlaneShape(normal,constant);//>registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling); - b3Vector4 position(2*i,70+k*2,2*j+8,0); + b3Vector4 position=b3MakeVector4(2*i,70+k*2,2*j+8,0); //b3Quaternion orn(0,0,0,1); - b3Quaternion orn(b3Vector3(1,0,0),0.3); + b3Quaternion orn(b3MakeVector3(1,0,0),0.3); - b3Vector4 color(0,0,1,1); + b3Vector4 color=b3MakeVector4(0,0,1,1); int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling); int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(1.f,position,orn,colIndex,index,false); diff --git a/btgui/OpenGLWindow/GLInstancingRenderer.cpp b/btgui/OpenGLWindow/GLInstancingRenderer.cpp index f0539cd53..8ef1a7e06 100644 --- a/btgui/OpenGLWindow/GLInstancingRenderer.cpp +++ b/btgui/OpenGLWindow/GLInstancingRenderer.cpp @@ -16,9 +16,9 @@ subject to the following restrictions: ///todo: make this configurable in the gui bool useShadowMap=true; -float shadowMapWidth=4096;//8192, 2048 -float shadowMapHeight=4096; -float shadowMapWorldSize=200; +float shadowMapWidth=8192; +float shadowMapHeight=8192; +float shadowMapWorldSize=300; float WHEEL_MULTIPLIER=3.f; float MOUSE_MOVE_MULTIPLIER = 0.4f; @@ -143,10 +143,10 @@ struct InternalDataRenderer : public GLInstanceRendererInternalData GLuint m_shadowTexture; InternalDataRenderer() : - m_cameraPosition(b3Vector3(0,0,0)), - m_cameraTargetPosition(b3Vector3(15,2,-24)), + m_cameraPosition(b3MakeVector3(0,0,0)), + m_cameraTargetPosition(b3MakeVector3(15,2,-24)), m_cameraDistance(150), - m_cameraUp(0,1,0), + m_cameraUp(b3MakeVector3(0,1,0)), m_azi(100.f),//135.f), //m_ele(25.f), m_ele(25.f), @@ -971,17 +971,17 @@ void b3CreateLookAt(const b3Vector3& eye, const b3Vector3& center,const b3Vec b3Vector3 s = (f.cross(u)).normalized(); u = s.cross(f); - result[0*4+0] = s.getX(); - result[1*4+0] = s.getY(); - result[2*4+0] = s.getZ(); + result[0*4+0] = s.x; + result[1*4+0] = s.y; + result[2*4+0] = s.z; - result[0*4+1] = u.getX(); - result[1*4+1] = u.getY(); - result[2*4+1] = u.getZ(); + result[0*4+1] = u.x; + result[1*4+1] = u.y; + result[2*4+1] = u.z; - result[0*4+2] =-f.getX(); - result[1*4+2] =-f.getY(); - result[2*4+2] =-f.getZ(); + result[0*4+2] =-f.x; + result[1*4+2] =-f.y; + result[2*4+2] =-f.z; result[0*4+3] = 0.f; result[1*4+3] = 0.f; @@ -1021,10 +1021,10 @@ void GLInstancingRenderer::updateCamera() b3Quaternion rot(m_data->m_cameraUp,razi); - b3Vector3 eyePos(0,0,0); + b3Vector3 eyePos = b3MakeVector3(0,0,0); eyePos[m_forwardAxis] = -m_data->m_cameraDistance; - b3Vector3 forward(eyePos[0],eyePos[1],eyePos[2]); + b3Vector3 forward = b3MakeVector3(eyePos[0],eyePos[1],eyePos[2]); if (forward.length2() < B3_EPSILON) { forward.setValue(1.f,0.f,0.f); @@ -1034,9 +1034,9 @@ void GLInstancingRenderer::updateCamera() eyePos = b3Matrix3x3(rot) * b3Matrix3x3(roll) * eyePos; - m_data->m_cameraPosition[0] = eyePos.getX(); - m_data->m_cameraPosition[1] = eyePos.getY(); - m_data->m_cameraPosition[2] = eyePos.getZ(); + m_data->m_cameraPosition[0] = eyePos.x; + m_data->m_cameraPosition[1] = eyePos.y; + m_data->m_cameraPosition[2] = eyePos.z; m_data->m_cameraPosition += m_data->m_cameraTargetPosition; if (m_screenWidth == 0 && m_screenHeight == 0) @@ -1096,14 +1096,14 @@ float GLInstancingRenderer::getCameraPitch() const void GLInstancingRenderer::setCameraTargetPosition(float cameraPos[4]) { - m_data->m_cameraTargetPosition = b3Vector3(cameraPos[0],cameraPos[1],cameraPos[2]); + m_data->m_cameraTargetPosition = b3MakeVector3(cameraPos[0],cameraPos[1],cameraPos[2]); } void GLInstancingRenderer::getCameraTargetPosition(float cameraPos[4]) const { - cameraPos[0] = m_data->m_cameraTargetPosition.getX(); - cameraPos[1] = m_data->m_cameraTargetPosition.getY(); - cameraPos[2] = m_data->m_cameraTargetPosition.getZ(); + cameraPos[0] = m_data->m_cameraTargetPosition.x; + cameraPos[1] = m_data->m_cameraTargetPosition.y; + cameraPos[2] = m_data->m_cameraTargetPosition.z; } @@ -1328,12 +1328,12 @@ void GLInstancingRenderer::renderSceneInternal(int renderMode) GLint err = glGetError(); b3Assert(err==GL_NO_ERROR); } - static b3Vector3 lightPos(-5.f,200,-40);//20,15,10);//-13,6,2);// = b3Vector3(0.5f,2,2); + static b3Vector3 lightPos = b3MakeVector3(-5.f,200,-40);//20,15,10);//-13,6,2);// = b3Vector3(0.5f,2,2); // lightPos.y+=0.1f; b3CreateOrtho(-shadowMapWorldSize,shadowMapWorldSize,-shadowMapWorldSize,shadowMapWorldSize,1,300,depthProjectionMatrix);//-14,14,-14,14,1,200, depthProjectionMatrix); float depthViewMatrix[4][4]; - b3Vector3 center(0,0,0); - b3Vector3 up(0,1,0); + b3Vector3 center = b3MakeVector3(0,0,0); + b3Vector3 up =b3MakeVector3(0,1,0); b3CreateLookAt(lightPos,center,up,&depthViewMatrix[0][0]); //b3CreateLookAt(lightPos,m_data->m_cameraTargetPosition,b3Vector3(0,1,0),(float*)depthModelViewMatrix2); diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp index 1b13be75e..16991bc04 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp @@ -300,9 +300,9 @@ static b3DbvtNode* b3TopDown(b3DynamicBvh* pdbvt, b3NodeArray& leaves, int bu_treshold) { - static const b3Vector3 axis[]={b3Vector3(1,0,0), - b3Vector3(0,1,0), - b3Vector3(0,0,1)}; + static const b3Vector3 axis[]={b3MakeVector3(1,0,0), + b3MakeVector3(0,1,0), + b3MakeVector3(0,0,1)}; if(leaves.size()>1) { if(leaves.size()>bu_treshold) @@ -527,7 +527,7 @@ void b3DynamicBvh::update(b3DbvtNode* leaf,b3DbvtVolume& volume) bool b3DynamicBvh::update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector3& velocity,b3Scalar margin) { if(leaf->volume.Contain(volume)) return(false); - volume.Expand(b3Vector3(margin,margin,margin)); + volume.Expand(b3MakeVector3(margin,margin,margin)); volume.SignedExpand(velocity); update(leaf,volume); return(true); @@ -546,7 +546,7 @@ bool b3DynamicBvh::update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector bool b3DynamicBvh::update(b3DbvtNode* leaf,b3DbvtVolume& volume,b3Scalar margin) { if(leaf->volume.Contain(volume)) return(false); - volume.Expand(b3Vector3(margin,margin,margin)); + volume.Expand(b3MakeVector3(margin,margin,margin)); update(leaf,volume); return(true); } diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h index c3012a423..78d20a97c 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h @@ -404,7 +404,7 @@ inline b3DbvtAabbMm b3DbvtAabbMm::FromCE(const b3Vector3& c,const b3Vector3& e // inline b3DbvtAabbMm b3DbvtAabbMm::FromCR(const b3Vector3& c,b3Scalar r) { - return(FromCE(c,b3Vector3(r,r,r))); + return(FromCE(c,b3MakeVector3(r,r,r))); } // @@ -472,22 +472,22 @@ B3_DBVT_INLINE int b3DbvtAabbMm::Classify(const b3Vector3& n,b3Scalar o,int s) b3Vector3 pi,px; switch(s) { - case (0+0+0): px=b3Vector3(mi.x,mi.y,mi.z); - pi=b3Vector3(mx.x,mx.y,mx.z);break; - case (1+0+0): px=b3Vector3(mx.x,mi.y,mi.z); - pi=b3Vector3(mi.x,mx.y,mx.z);break; - case (0+2+0): px=b3Vector3(mi.x,mx.y,mi.z); - pi=b3Vector3(mx.x,mi.y,mx.z);break; - case (1+2+0): px=b3Vector3(mx.x,mx.y,mi.z); - pi=b3Vector3(mi.x,mi.y,mx.z);break; - case (0+0+4): px=b3Vector3(mi.x,mi.y,mx.z); - pi=b3Vector3(mx.x,mx.y,mi.z);break; - case (1+0+4): px=b3Vector3(mx.x,mi.y,mx.z); - pi=b3Vector3(mi.x,mx.y,mi.z);break; - case (0+2+4): px=b3Vector3(mi.x,mx.y,mx.z); - pi=b3Vector3(mx.x,mi.y,mi.z);break; - case (1+2+4): px=b3Vector3(mx.x,mx.y,mx.z); - pi=b3Vector3(mi.x,mi.y,mi.z);break; + case (0+0+0): px=b3MakeVector3(mi.x,mi.y,mi.z); + pi=b3MakeVector3(mx.x,mx.y,mx.z);break; + case (1+0+0): px=b3MakeVector3(mx.x,mi.y,mi.z); + pi=b3MakeVector3(mi.x,mx.y,mx.z);break; + case (0+2+0): px=b3MakeVector3(mi.x,mx.y,mi.z); + pi=b3MakeVector3(mx.x,mi.y,mx.z);break; + case (1+2+0): px=b3MakeVector3(mx.x,mx.y,mi.z); + pi=b3MakeVector3(mi.x,mi.y,mx.z);break; + case (0+0+4): px=b3MakeVector3(mi.x,mi.y,mx.z); + pi=b3MakeVector3(mx.x,mx.y,mi.z);break; + case (1+0+4): px=b3MakeVector3(mx.x,mi.y,mx.z); + pi=b3MakeVector3(mi.x,mx.y,mi.z);break; + case (0+2+4): px=b3MakeVector3(mi.x,mx.y,mx.z); + pi=b3MakeVector3(mx.x,mi.y,mi.z);break; + case (1+2+4): px=b3MakeVector3(mx.x,mx.y,mx.z); + pi=b3MakeVector3(mi.x,mi.y,mi.z);break; } if((b3Dot(n,px)+o)<0) return(-1); if((b3Dot(n,pi)+o)>=0) return(+1); @@ -498,7 +498,7 @@ B3_DBVT_INLINE int b3DbvtAabbMm::Classify(const b3Vector3& n,b3Scalar o,int s) B3_DBVT_INLINE b3Scalar b3DbvtAabbMm::ProjectMinimum(const b3Vector3& v,unsigned signs) const { const b3Vector3* b[]={&mx,&mi}; - const b3Vector3 p( b[(signs>>0)&1]->x, + const b3Vector3 p = b3MakeVector3( b[(signs>>0)&1]->x, b[(signs>>1)&1]->y, b[(signs>>2)&1]->z); return(b3Dot(p,v)); diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp index 0b0833bef..dd0377a5c 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp @@ -622,7 +622,7 @@ void b3DynamicBvhBroadphase::getBroadphaseAabb(b3Vector3& aabbMin,b3Vector bounds=m_sets[0].m_root->volume; else if(!m_sets[1].empty()) bounds=m_sets[1].m_root->volume; else - bounds=b3DbvtVolume::FromCR(b3Vector3(0,0,0),0); + bounds=b3DbvtVolume::FromCR(b3MakeVector3(0,0,0),0); aabbMin=bounds.Mins(); aabbMax=bounds.Maxs(); } diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h index c06612bf6..df68f5d58 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h @@ -168,7 +168,7 @@ struct b3DynamicBvhBroadphase b3BroadphaseProxy* createProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,int shapeType,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask); virtual void destroyProxy(b3BroadphaseProxy* proxy,b3Dispatcher* dispatcher); virtual void setAabb(b3BroadphaseProxy* proxy,const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3Dispatcher* dispatcher); - virtual void rayTest(const b3Vector3& rayFrom,const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin=b3Vector3(0,0,0), const b3Vector3& aabbMax = b3Vector3(0,0,0)); + virtual void rayTest(const b3Vector3& rayFrom,const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin=b3MakeVector3(0,0,0), const b3Vector3& aabbMax = b3MakeVector3(0,0,0)); virtual void aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& callback); virtual void getAabb(b3BroadphaseProxy* proxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const; diff --git a/src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h b/src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h index bcb1262ec..7f9bf990b 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h +++ b/src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h @@ -2,18 +2,58 @@ #ifndef B3_AABB_H #define B3_AABB_H + +#include "Bullet3Common/shared/b3Float4.h" +#include "Bullet3Common/shared/b3Mat3x3.h" + +typedef struct b3Aabb b3Aabb_t; + struct b3Aabb { union { float m_min[4]; + b3Float4 m_minVec; int m_minIndices[4]; }; union { - float m_max[4]; + float m_max[4]; + b3Float4 m_maxVec; int m_signedMaxIndices[4]; }; }; +inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin, + b3Float4ConstArg pos, + b3QuatConstArg orn, + b3Float4* aabbMinOut,b3Float4* aabbMaxOut) +{ + b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin); + localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f); + b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin); + b3Mat3x3 m; + m = b3QuatGetRotationMatrix(orn); + b3Mat3x3 abs_b = b3AbsoluteMat3x3(m); + b3Float4 center = b3TransformPoint(localCenter,pos,orn); + + b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)), + b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)), + b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)), + 0.f); + *aabbMinOut = center-extent; + *aabbMaxOut = center+extent; +} + +/// conservative test for overlap between two aabbs +inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1, + b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2) +{ + bool overlap = true; + overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap; + overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap; + overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap; + return overlap; +} + #endif //B3_AABB_H diff --git a/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h b/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h index d52db16d2..fb2516567 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h @@ -36,9 +36,9 @@ B3_ATTRIBUTE_ALIGNED16(struct) b3Contact4 : public b3Contact4Data void setFrictionCoeff( float c ) { b3Assert( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (unsigned short)(c*0xffff); } //float& getNPoints() { return m_worldNormal[3]; } - int getNPoints() const { return (int) m_worldNormalOnB[3]; } + int getNPoints() const { return (int) m_worldNormalOnB.w; } - float getPenetration(int idx) const { return m_worldPosB[idx][3]; } + float getPenetration(int idx) const { return m_worldPosB[idx].w; } bool isInvalid() const { return (getBodyA()==0 || getBodyB()==0); } }; diff --git a/src/Bullet3Common/b3Logging.cpp b/src/Bullet3Common/b3Logging.cpp index ae9353075..7586db0e7 100644 --- a/src/Bullet3Common/b3Logging.cpp +++ b/src/Bullet3Common/b3Logging.cpp @@ -18,22 +18,44 @@ subject to the following restrictions: #include #include +#ifdef _WIN32 +#include +#endif //_WIN32 + void b3PrintfFuncDefault(const char* msg) { +#ifdef _WIN32 + OutputDebugStringA(msg); +#else printf("%s",msg); +#endif + } void b3WarningMessageFuncDefault(const char* msg) { +#ifdef _WIN32 + OutputDebugStringA(msg); +#else printf("%s",msg); +#endif + } + void b3ErrorMessageFuncDefault(const char* msg) { +#ifdef _WIN32 + OutputDebugStringA(msg); +#else printf("%s",msg); +#endif + } + + static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault; static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault; static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault; diff --git a/src/Bullet3Common/b3Matrix3x3.h b/src/Bullet3Common/b3Matrix3x3.h index 98d699f02..63541e5c5 100644 --- a/src/Bullet3Common/b3Matrix3x3.h +++ b/src/Bullet3Common/b3Matrix3x3.h @@ -128,7 +128,7 @@ public: * @param i Column number 0 indexed */ B3_FORCE_INLINE b3Vector3 getColumn(int i) const { - return b3Vector3(m_el[0][i],m_el[1][i],m_el[2][i]); + return b3MakeVector3(m_el[0][i],m_el[1][i],m_el[2][i]); } @@ -1019,7 +1019,7 @@ b3Matrix3x3::adjoint() const B3_FORCE_INLINE b3Matrix3x3 b3Matrix3x3::inverse() const { - b3Vector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1)); + b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1)); b3Scalar det = (*this)[0].dot(co); b3FullAssert(det != b3Scalar(0.0)); b3Scalar s = b3Scalar(1.0) / det; @@ -1143,7 +1143,7 @@ operator*(const b3Matrix3x3& m, const b3Vector3& v) #if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON) return v.dot3(m[0], m[1], m[2]); #else - return b3Vector3(m[0].dot(v), m[1].dot(v), m[2].dot(v)); + return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v)); #endif } @@ -1184,7 +1184,7 @@ operator*(const b3Vector3& v, const b3Matrix3x3& m) return b3Vector3(c0); #else - return b3Vector3(m.tdotx(v), m.tdoty(v), m.tdotz(v)); + return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v)); #endif } diff --git a/src/Bullet3Common/b3Quaternion.h b/src/Bullet3Common/b3Quaternion.h index bde9288bd..e581cf605 100644 --- a/src/Bullet3Common/b3Quaternion.h +++ b/src/Bullet3Common/b3Quaternion.h @@ -407,9 +407,9 @@ public: b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3]; if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero - return b3Vector3(1.0, 0.0, 0.0); // Arbitrary + return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary b3Scalar s = 1.f/b3Sqrt(s_squared); - return b3Vector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s); + return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s); } /**@brief Return the inverse of this quaternion */ @@ -848,7 +848,7 @@ b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v) #elif defined(B3_USE_NEON) return b3Vector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask)); #else - return b3Vector3(q.getX(),q.getY(),q.getZ()); + return b3MakeVector3(q.getX(),q.getY(),q.getZ()); #endif } diff --git a/src/Bullet3Common/b3Transform.h b/src/Bullet3Common/b3Transform.h index 802dda646..fa480759b 100644 --- a/src/Bullet3Common/b3Transform.h +++ b/src/Bullet3Common/b3Transform.h @@ -45,7 +45,7 @@ public: * @param q Rotation from quaternion * @param c Translation from Vector (default 0,0,0) */ explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q, - const b3Vector3& c = b3Vector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) + const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) : m_basis(q), m_origin(c) {} @@ -54,7 +54,7 @@ public: * @param b Rotation from Matrix * @param c Translation from Vector default (0,0,0)*/ explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b, - const b3Vector3& c = b3Vector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) + const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) : m_basis(b), m_origin(c) {} diff --git a/src/Bullet3Common/b3TransformUtil.h b/src/Bullet3Common/b3TransformUtil.h index c6aeef415..6ce580c13 100644 --- a/src/Bullet3Common/b3TransformUtil.h +++ b/src/Bullet3Common/b3TransformUtil.h @@ -24,7 +24,7 @@ subject to the following restrictions: B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents,const b3Vector3& supportDir) { - return b3Vector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(), + return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(), supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(), supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ()); } @@ -99,12 +99,12 @@ public: b3Quaternion orn1 = orn0.nearest(orn1a); b3Quaternion dorn = orn1 * orn0.inverse(); angle = dorn.getAngle(); - axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ()); + axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ()); axis[3] = b3Scalar(0.); //check for axis length b3Scalar len = axis.length2(); if (len < B3_EPSILON*B3_EPSILON) - axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.)); + axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.)); else axis /= b3Sqrt(len); } @@ -128,12 +128,12 @@ public: dorn.normalize(); angle = dorn.getAngle(); - axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ()); + axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ()); axis[3] = b3Scalar(0.); //check for axis length b3Scalar len = axis.length2(); if (len < B3_EPSILON*B3_EPSILON) - axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.)); + axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.)); else axis /= b3Sqrt(len); } diff --git a/src/Bullet3Common/b3Vector3.h b/src/Bullet3Common/b3Vector3.h index aa053297b..85b7223c4 100644 --- a/src/Bullet3Common/b3Vector3.h +++ b/src/Bullet3Common/b3Vector3.h @@ -22,6 +22,8 @@ subject to the following restrictions: #include "b3MinMax.h" #include "b3AlignedAllocator.h" + + #ifdef B3_USE_DOUBLE_PRECISION #define b3Vector3Data b3Vector3DoubleData #define b3Vector3DataName "b3Vector3DoubleData" @@ -71,84 +73,78 @@ const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x #endif +class b3Vector3; +class b3Vector4; +inline b3Vector3 b3MakeVector3( b3SimdFloat4 v); +inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z); +inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w); +inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec); +inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w); + + /**@brief b3Vector3 can be used to represent 3D points and vectors. * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers */ B3_ATTRIBUTE_ALIGNED16(class) b3Vector3 { +public: +#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM + union { + b3SimdFloat4 mVec128; + float m_floats[4]; + struct {float x,y,z,w;}; + + }; +#else + union + { + float m_floats[4]; + struct {float x,y,z,w;}; + }; +#endif + + public: B3_DECLARE_ALIGNED_ALLOCATOR(); -#if defined (__SPU__) && defined (__CELLOS_LV2__) - b3Scalar m_floats[4]; -public: - B3_FORCE_INLINE const vec_float4& get128() const +#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM + + /*B3_FORCE_INLINE b3Vector3() { - return *((const vec_float4*)&m_floats[0]); } -public: -#else //__CELLOS_LV2__ __SPU__ - #if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - union { - b3SimdFloat4 mVec128; - b3Scalar m_floats[4]; - struct {b3Scalar x,y,z,w;}; - - }; - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } - #else - union - { - b3Scalar m_floats[4]; - struct {b3Scalar x,y,z,w;}; - }; - #endif -#endif //__CELLOS_LV2__ __SPU__ + */ + + B3_FORCE_INLINE b3SimdFloat4 get128() const + { + return mVec128; + } + B3_FORCE_INLINE void set128(b3SimdFloat4 v128) + { + mVec128 = v128; + } +#endif public: - /**@brief No initialization constructor */ - B3_FORCE_INLINE b3Vector3() - { - - } - - - - /**@brief Constructor from scalars - * @param x X value - * @param y Y value - * @param z Z value - */ - B3_FORCE_INLINE b3Vector3(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = b3Scalar(0.f); - } - + #if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE) )|| defined (B3_USE_NEON) - // Set Vector + /* + B3_FORCE_INLINE b3Vector3( b3SimdFloat4 v) { mVec128 = v; } - - // Copy constructor + B3_FORCE_INLINE b3Vector3(const b3Vector3& rhs) { mVec128 = rhs.mVec128; } + */ + + + // Assignment Operator B3_FORCE_INLINE b3Vector3& @@ -158,6 +154,12 @@ public: return *this; } + +#else + + + + #endif // #if defined (B3_USE_SSE_IN_API) || defined (B3_USE_NEON) /**@brief Add a vector to this one @@ -352,11 +354,11 @@ public: B3_FORCE_INLINE b3Vector3 absolute() const { #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - return b3Vector3(_mm_and_ps(mVec128, b3v3AbsfMask)); + return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask)); #elif defined(B3_USE_NEON) return b3Vector3(vabsq_f32(mVec128)); #else - return b3Vector3( + return b3MakeVector3( b3Fabs(m_floats[0]), b3Fabs(m_floats[1]), b3Fabs(m_floats[2])); @@ -378,7 +380,7 @@ public: V = _mm_sub_ps(V, T); V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3)); - return b3Vector3(V); + return b3MakeVector3(V); #elif defined(B3_USE_NEON) float32x4_t T, V; // form (Y, Z, X, _) of mVec128 and v.mVec128 @@ -397,7 +399,7 @@ public: return b3Vector3(V); #else - return b3Vector3( + return b3MakeVector3( m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1], m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2], m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]); @@ -517,7 +519,7 @@ public: vl = _mm_mul_ps(vl, vt); vl = _mm_add_ps(vl, mVec128); - return b3Vector3(vl); + return b3MakeVector3(vl); #elif defined(B3_USE_NEON) float32x4_t vl = vsubq_f32(v.mVec128, mVec128); vl = vmulq_n_f32(vl, t); @@ -526,7 +528,7 @@ public: return b3Vector3(vl); #else return - b3Vector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, + b3MakeVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); #endif @@ -715,7 +717,7 @@ public: r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 )); a2 = _mm_and_ps( a2, b3vxyzMaskf); r = _mm_add_ps( r, b3CastdTo128f (_mm_move_sd( b3CastfTo128d(a2), b3CastfTo128d(b1) ))); - return b3Vector3(r); + return b3MakeVector3(r); #elif defined(B3_USE_NEON) static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 }; @@ -728,7 +730,7 @@ public: float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); return b3Vector3( vcombine_f32(b0, b1) ); #else - return b3Vector3( dot(v0), dot(v1), dot(v2)); + return b3MakeVector3( dot(v0), dot(v1), dot(v2)); #endif } }; @@ -738,11 +740,11 @@ B3_FORCE_INLINE b3Vector3 operator+(const b3Vector3& v1, const b3Vector3& v2) { #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - return b3Vector3(_mm_add_ps(v1.mVec128, v2.mVec128)); + return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128)); #elif defined(B3_USE_NEON) - return b3Vector3(vaddq_f32(v1.mVec128, v2.mVec128)); + return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128)); #else - return b3Vector3( + return b3MakeVector3( v1.m_floats[0] + v2.m_floats[0], v1.m_floats[1] + v2.m_floats[1], v1.m_floats[2] + v2.m_floats[2]); @@ -754,11 +756,11 @@ B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& v1, const b3Vector3& v2) { #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - return b3Vector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); + return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); #elif defined(B3_USE_NEON) - return b3Vector3(vmulq_f32(v1.mVec128, v2.mVec128)); + return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128)); #else - return b3Vector3( + return b3MakeVector3( v1.m_floats[0] * v2.m_floats[0], v1.m_floats[1] * v2.m_floats[1], v1.m_floats[2] * v2.m_floats[2]); @@ -773,12 +775,12 @@ operator-(const b3Vector3& v1, const b3Vector3& v2) // without _mm_and_ps this code causes slowdown in Concave moving __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128); - return b3Vector3(_mm_and_ps(r, b3vFFF0fMask)); + return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); #elif defined(B3_USE_NEON) float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128); - return b3Vector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); + return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); #else - return b3Vector3( + return b3MakeVector3( v1.m_floats[0] - v2.m_floats[0], v1.m_floats[1] - v2.m_floats[1], v1.m_floats[2] - v2.m_floats[2]); @@ -791,11 +793,11 @@ operator-(const b3Vector3& v) { #if (defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)) __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask); - return b3Vector3(_mm_and_ps(r, b3vFFF0fMask)); + return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); #elif defined(B3_USE_NEON) - return b3Vector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask)); + return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask)); #else - return b3Vector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]); + return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]); #endif } @@ -806,12 +808,12 @@ operator*(const b3Vector3& v, const b3Scalar& s) #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) __m128 vs = _mm_load_ss(&s); // (S 0 0 0) vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - return b3Vector3(_mm_mul_ps(v.mVec128, vs)); + return b3MakeVector3(_mm_mul_ps(v.mVec128, vs)); #elif defined(B3_USE_NEON) float32x4_t r = vmulq_n_f32(v.mVec128, s); - return b3Vector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); + return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); #else - return b3Vector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s); + return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s); #endif } @@ -846,7 +848,7 @@ operator/(const b3Vector3& v1, const b3Vector3& v2) #if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128); vec = _mm_and_ps(vec, b3vFFF0fMask); - return b3Vector3(vec); + return b3MakeVector3(vec); #elif defined(B3_USE_NEON) float32x4_t x, y, v, m; @@ -862,7 +864,7 @@ operator/(const b3Vector3& v1, const b3Vector3& v2) return b3Vector3(v); #else - return b3Vector3( + return b3MakeVector3( v1.m_floats[0] / v2.m_floats[0], v1.m_floats[1] / v2.m_floats[1], v1.m_floats[2] / v2.m_floats[2]); @@ -953,7 +955,7 @@ B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Sca __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); b3Scalar ssin = b3Sin( _angle ); - __m128 C = wAxis.cross( mVec128 ).mVec128; + __m128 C = wAxis.cross( b3MakeVector3(mVec128) ).mVec128; O = _mm_and_ps(O, b3vFFF0fMask); b3Scalar scos = b3Cos( _angle ); @@ -975,7 +977,7 @@ B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Sca vcos = vcos * X; O = O + vcos; - return b3Vector3(O); + return b3MakeVector3(O); #else b3Vector3 o = wAxis * wAxis.dot( *this ); b3Vector3 _x = *this - o; @@ -1069,25 +1071,12 @@ class b3Vector4 : public b3Vector3 { public: - B3_FORCE_INLINE b3Vector4() {} + - B3_FORCE_INLINE b3Vector4(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w) - : b3Vector3(_x,_y,_z) - { - m_floats[3] = _w; - } #if (defined (B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined (B3_USE_NEON) - B3_FORCE_INLINE b3Vector4(const b3SimdFloat4 vec) - { - mVec128 = vec; - } - - B3_FORCE_INLINE b3Vector4(const b3Vector3& rhs) - { - mVec128 = rhs.mVec128; - } + B3_FORCE_INLINE b3Vector4& operator=(const b3Vector4& v) @@ -1100,11 +1089,11 @@ public: B3_FORCE_INLINE b3Vector4 absolute4() const { #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - return b3Vector4(_mm_and_ps(mVec128, b3vAbsfMask)); + return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask)); #elif defined(B3_USE_NEON) return b3Vector4(vabsq_f32(mVec128)); #else - return b3Vector4( + return b3MakeVector4( b3Fabs(m_floats[0]), b3Fabs(m_floats[1]), b3Fabs(m_floats[2]), @@ -1341,4 +1330,46 @@ B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn) m_floats[i] = dataIn.m_floats[i]; } + +inline b3Vector3 b3MakeVector3( b3SimdFloat4 v) +{ + b3Vector3 tmp; + tmp.set128(v); + return tmp; +} + + +inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z) +{ + b3Vector3 tmp; + tmp.setValue(x,y,z); + return tmp; +} + +inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w) +{ + b3Vector3 tmp; + tmp.setValue(x,y,z); + tmp.w = w; + return tmp; +} + +inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w) +{ + b3Vector4 tmp; + tmp.setValue(x,y,z,w); + return tmp; +} + +#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec) +{ + b3Vector4 tmp; + tmp.set128(vec); + return tmp; +} + +#endif + + #endif //B3_VECTOR3_H diff --git a/src/Bullet3Common/premake4.lua b/src/Bullet3Common/premake4.lua index efbb26f0e..e5fbb5280 100644 --- a/src/Bullet3Common/premake4.lua +++ b/src/Bullet3Common/premake4.lua @@ -5,6 +5,8 @@ kind "StaticLib" targetdir "../../bin" + + includedirs {".."} files { "**.cpp", diff --git a/src/Bullet3Common/shared/b3Float4.h b/src/Bullet3Common/shared/b3Float4.h index ceb7c1e5a..d6ac69d39 100644 --- a/src/Bullet3Common/shared/b3Float4.h +++ b/src/Bullet3Common/shared/b3Float4.h @@ -6,8 +6,26 @@ #ifdef __cplusplus #include "Bullet3Common/b3Vector3.h" #define b3Float4 b3Vector3 + #define b3Float4ConstArg const b3Vector3& + #define b3Dot3F4 b3Dot + #define b3Cross3 b3Cross + #define b3MakeFloat4 b3MakeVector3 #else typedef float4 b3Float4; + #define b3Float4ConstArg const b3Float4 + #define b3MakeFloat4 (float4) + float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1) + { + float4 a1 = b3MakeFloat4(v0.xyz,0.f); + float4 b1 = b3MakeFloat4(v1.xyz,0.f); + return dot(a1, b1); + } + b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1) + { + float4 a1 = b3MakeFloat4(v0.xyz,0.f); + float4 b1 = b3MakeFloat4(v1.xyz,0.f); + return cross(a1, b1); + } #endif #endif //B3_FLOAT4_H diff --git a/src/Bullet3Common/shared/b3Int2.h b/src/Bullet3Common/shared/b3Int2.h index ec886de18..f1d01f81a 100644 --- a/src/Bullet3Common/shared/b3Int2.h +++ b/src/Bullet3Common/shared/b3Int2.h @@ -16,6 +16,8 @@ subject to the following restrictions: #ifndef B3_INT2_H #define B3_INT2_H +#ifdef __cplusplus + struct b3UnsignedInt2 { union @@ -52,5 +54,11 @@ inline b3Int2 b3MakeInt2(int x, int y) v.s[0] = x; v.s[1] = y; return v; } +#else +#define b3UnsignedInt2 uint2 +#define b3Int2 int2 +#define b3MakeInt2 (int2) + +#endif //__cplusplus #endif \ No newline at end of file diff --git a/src/Bullet3Common/shared/b3Mat3x3.h b/src/Bullet3Common/shared/b3Mat3x3.h new file mode 100644 index 000000000..74f9fae49 --- /dev/null +++ b/src/Bullet3Common/shared/b3Mat3x3.h @@ -0,0 +1,75 @@ + +#ifndef B3_MAT3x3_H +#define B3_MAT3x3_H + +#include "Bullet3Common/shared/b3Quat.h" + + +#ifdef __cplusplus + +#include "Bullet3Common/b3Matrix3x3.h" + +#define b3Mat3x3 b3Matrix3x3 +#define b3Mat3x3ConstArg const b3Matrix3x3& + +inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat) +{ + return b3Mat3x3(quat); +} + +inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat) +{ + return mat.absolute(); +} + +#define b3GetRow(m,row) m.getRow(row) + +#else + +typedef struct +{ + float4 m_row[3]; +}b3Mat3x3; + +#define b3Mat3x3ConstArg const b3Mat3x3 +#define b3GetRow(m,row) (m.m_row[row]) + +inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat) +{ + float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); + b3Mat3x3 out; + + out.m_row[0].x=1-2*quat2.y-2*quat2.z; + out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z; + out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y; + out.m_row[0].w = 0.f; + + out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z; + out.m_row[1].y=1-2*quat2.x-2*quat2.z; + out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x; + out.m_row[1].w = 0.f; + + out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y; + out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x; + out.m_row[2].z=1-2*quat2.x-2*quat2.y; + out.m_row[2].w = 0.f; + + return out; +} + +inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn) +{ + b3Mat3x3 out; + out.m_row[0] = fabs(matIn.m_row[0]); + out.m_row[1] = fabs(matIn.m_row[1]); + out.m_row[2] = fabs(matIn.m_row[2]); + return out; +} + +#endif + + + + + +#endif //B3_MAT3x3_H diff --git a/src/Bullet3Common/shared/b3Quat.h b/src/Bullet3Common/shared/b3Quat.h index 3bfe40a46..108c719f7 100644 --- a/src/Bullet3Common/shared/b3Quat.h +++ b/src/Bullet3Common/shared/b3Quat.h @@ -2,12 +2,76 @@ #define B3_QUAT_H #include "Bullet3Common/shared/b3PlatformDefinitions.h" +#include "Bullet3Common/shared/b3Float4.h" #ifdef __cplusplus #include "Bullet3Common/b3Quaternion.h" + #include "Bullet3Common/b3Transform.h" + #define b3Quat b3Quaternion + #define b3QuatConstArg const b3Quaternion& + + inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) + { + b3Transform tr; + tr.setOrigin(translation); + tr.setRotation(orientation); + return tr(point); + } + #else typedef float4 b3Quat; + #define b3QuatConstArg const b3Quat + + +inline float4 b3FastNormalize4(float4 v) +{ + v = (float4)(v.xyz,0.f); + return fast_normalize(v); +} + +inline b3Quat b3QuatMul(b3Quat a, b3Quat b); +inline b3Quat b3QuatNormalize(b3QuatConstArg in); +inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec); +inline b3Quat b3QuatInvert(b3QuatConstArg q); +inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) +{ + b3Quat ans; + ans = b3Cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - b3Dot3F4(a, b); + return ans; +} + +inline b3Quat b3QuatNormalize(b3QuatConstArg in) +{ + return b3FastNormalize4(in); +} +inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec) +{ + b3Quat qInv = b3QuatInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv); + return out; +} + +inline b3Quat b3QuatInvert(b3QuatConstArg q) +{ + return (b3Quat)(-q.xyz, q.w); +} + +inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec) +{ + return b3QuatRotate( b3QuatInvert( q ), vec ); +} + +inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) +{ + return b3QuatRotate( orientation, point ) + (translation); +} + #endif #endif //B3_QUAT_H diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h b/src/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h index f421b4cf5..a55168eb3 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h +++ b/src/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h @@ -58,7 +58,7 @@ public: const b3Matrix3x3& world2B, const b3Vector3& inertiaInvA, const b3Vector3& inertiaInvB) - :m_linearJointAxis(b3Vector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.))) + :m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.))) { m_aJ= world2A*jointAxis; m_bJ = world2B*-jointAxis; @@ -74,7 +74,7 @@ public: const b3Vector3& axisInB, const b3Vector3& inertiaInvA, const b3Vector3& inertiaInvB) - : m_linearJointAxis(b3Vector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.))) + : m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.))) , m_aJ(axisInA) , m_bJ(-axisInB) { @@ -97,7 +97,7 @@ public: m_aJ= world2A*(rel_pos1.cross(jointAxis)); m_bJ = world2A*(rel_pos2.cross(-jointAxis)); m_0MinvJt = inertiaInvA * m_aJ; - m_1MinvJt = b3Vector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + m_1MinvJt = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); m_Adiag = massInvA + m_0MinvJt.dot(m_aJ); b3Assert(m_Adiag > b3Scalar(0.0)); diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp b/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp index 068d0e28a..d568158b2 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp +++ b/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp @@ -450,16 +450,16 @@ void b3PgsJacobiSolver::initSolverBody(int bodyIndex, b3SolverBody* solverBody, if (rb) { solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3Vector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())); + solverBody->internalSetInvMass(b3MakeVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())); solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3Vector3(1,1,1); - solverBody->m_linearFactor = b3Vector3(1,1,1); + solverBody->m_angularFactor = b3MakeVector3(1,1,1); + solverBody->m_linearFactor = b3MakeVector3(1,1,1); solverBody->m_linearVelocity = getLinearVelocity(rb); solverBody->m_angularVelocity = getAngularVelocity(rb); } else { solverBody->m_worldTransform.setIdentity(); - solverBody->internalSetInvMass(b3Vector3(0,0,0)); + solverBody->internalSetInvMass(b3MakeVector3(0,0,0)); solverBody->m_originalBodyIndex = bodyIndex; solverBody->m_angularFactor.setValue(1,1,1); solverBody->m_linearFactor.setValue(1,1,1); @@ -510,12 +510,12 @@ void b3PgsJacobiSolver::setupFrictionConstraint(b3RigidBodyCL* bodies,b3InertiaC { b3Vector3 ftorqueAxis1 = rel_pos1.cross(solverConstraint.m_contactNormal); solverConstraint.m_relpos1CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : b3MakeVector3(0,0,0); } { b3Vector3 ftorqueAxis1 = rel_pos2.cross(-solverConstraint.m_contactNormal); solverConstraint.m_relpos2CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : b3MakeVector3(0,0,0); } b3Scalar scaledDenom; @@ -555,10 +555,10 @@ void b3PgsJacobiSolver::setupFrictionConstraint(b3RigidBodyCL* bodies,b3InertiaC b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:b3Vector3(0,0,0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:b3Vector3(0,0,0)); + b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:b3MakeVector3(0,0,0)); + b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:b3MakeVector3(0,0,0)); rel_vel = vel1Dotn+vel2Dotn; @@ -590,7 +590,7 @@ void b3PgsJacobiSolver::setupRollingFrictionConstraint(b3RigidBodyCL* bodies,b3I b3Scalar desiredVelocity, b3Scalar cfmSlip) { - b3Vector3 normalAxis(0,0,0); + b3Vector3 normalAxis=b3MakeVector3(0,0,0); solverConstraint.m_contactNormal = normalAxis; @@ -612,18 +612,18 @@ void b3PgsJacobiSolver::setupRollingFrictionConstraint(b3RigidBodyCL* bodies,b3I { b3Vector3 ftorqueAxis1 = -normalAxis1; solverConstraint.m_relpos1CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : b3MakeVector3(0,0,0); } { b3Vector3 ftorqueAxis1 = normalAxis1; solverConstraint.m_relpos2CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : b3MakeVector3(0,0,0); } { - b3Vector3 iMJaA = body0?getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*solverConstraint.m_relpos1CrossNormal:b3Vector3(0,0,0); - b3Vector3 iMJaB = body1?getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*solverConstraint.m_relpos2CrossNormal:b3Vector3(0,0,0); + b3Vector3 iMJaA = body0?getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*solverConstraint.m_relpos1CrossNormal:b3MakeVector3(0,0,0); + b3Vector3 iMJaB = body1?getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*solverConstraint.m_relpos2CrossNormal:b3MakeVector3(0,0,0); b3Scalar sum = 0; sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); @@ -634,10 +634,10 @@ void b3PgsJacobiSolver::setupRollingFrictionConstraint(b3RigidBodyCL* bodies,b3I b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:b3Vector3(0,0,0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:b3Vector3(0,0,0)); + b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:b3MakeVector3(0,0,0)); + b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:b3MakeVector3(0,0,0)); rel_vel = vel1Dotn+vel2Dotn; @@ -730,9 +730,9 @@ void b3PgsJacobiSolver::setupContactConstraint(b3RigidBodyCL* bodies, b3InertiaC relaxation = 1.f; b3Vector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB); - solverConstraint.m_angularComponentA = rb0 ? getInvInertiaTensorWorld(&inertias[bodyA->m_originalBodyIndex])*torqueAxis0 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentA = rb0 ? getInvInertiaTensorWorld(&inertias[bodyA->m_originalBodyIndex])*torqueAxis0 : b3MakeVector3(0,0,0); b3Vector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB); - solverConstraint.m_angularComponentB = rb1 ? getInvInertiaTensorWorld(&inertias[bodyB->m_originalBodyIndex])*-torqueAxis1 : b3Vector3(0,0,0); + solverConstraint.m_angularComponentB = rb1 ? getInvInertiaTensorWorld(&inertias[bodyB->m_originalBodyIndex])*-torqueAxis1 : b3MakeVector3(0,0,0); b3Scalar scaledDenom; { @@ -781,8 +781,8 @@ void b3PgsJacobiSolver::setupContactConstraint(b3RigidBodyCL* bodies, b3InertiaC { b3Vector3 vel1,vel2; - vel1 = rb0? getVelocityInLocalPoint(rb0,rel_pos1) : b3Vector3(0,0,0); - vel2 = rb1? getVelocityInLocalPoint(rb1, rel_pos2) : b3Vector3(0,0,0); + vel1 = rb0? getVelocityInLocalPoint(rb0,rel_pos1) : b3MakeVector3(0,0,0); + vel2 = rb1? getVelocityInLocalPoint(rb1, rel_pos2) : b3MakeVector3(0,0,0); // b3Vector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : b3Vector3(0,0,0); vel = vel1 - vel2; @@ -817,10 +817,10 @@ void b3PgsJacobiSolver::setupContactConstraint(b3RigidBodyCL* bodies, b3InertiaC solverConstraint.m_appliedPushImpulse = 0.f; { - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?bodyA->m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos1CrossNormal.dot(rb0?bodyA->m_angularVelocity:b3Vector3(0,0,0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?bodyB->m_linearVelocity:b3Vector3(0,0,0)) - + solverConstraint.m_relpos2CrossNormal.dot(rb1?bodyB->m_angularVelocity:b3Vector3(0,0,0)); + b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?bodyA->m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos1CrossNormal.dot(rb0?bodyA->m_angularVelocity:b3MakeVector3(0,0,0)); + b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?bodyB->m_linearVelocity:b3MakeVector3(0,0,0)) + + solverConstraint.m_relpos2CrossNormal.dot(rb1?bodyB->m_angularVelocity:b3MakeVector3(0,0,0)); b3Scalar rel_vel = vel1Dotn+vel2Dotn; b3Scalar positionalError = 0.f; @@ -1080,9 +1080,9 @@ b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, m_bodyCountCheck.resize(numBodies,0); m_deltaLinearVelocities.resize(0); - m_deltaLinearVelocities.resize(numBodies,b3Vector3(0,0,0)); + m_deltaLinearVelocities.resize(numBodies,b3MakeVector3(0,0,0)); m_deltaAngularVelocities.resize(0); - m_deltaAngularVelocities.resize(numBodies,b3Vector3(0,0,0)); + m_deltaAngularVelocities.resize(numBodies,b3MakeVector3(0,0,0)); int totalBodies = 0; @@ -1669,9 +1669,9 @@ void b3PgsJacobiSolver::averageVelocities() int numBodies = m_bodyCount.size(); m_deltaLinearVelocities.resize(0); - m_deltaLinearVelocities.resize(numBodies,b3Vector3(0,0,0)); + m_deltaLinearVelocities.resize(numBodies,b3MakeVector3(0,0,0)); m_deltaAngularVelocities.resize(0); - m_deltaAngularVelocities.resize(numBodies,b3Vector3(0,0,0)); + m_deltaAngularVelocities.resize(numBodies,b3MakeVector3(0,0,0)); for (int i=0;im_plane.x,face->m_plane.y,face->m_plane.z,0.f); + float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); if (face->m_numIndices<2) return false; @@ -347,7 +347,7 @@ int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& if (nPoints >64) nPoints = 64; - float4 center = make_float4(0,0,0,0); + float4 center = b3MakeVector3(0,0,0,0); { for (int i=0;im_numFaces;face++) { - const float4 Normal = make_float4( + const float4 Normal = b3MakeVector3( facesA[hullA->m_faceOffset+face].m_plane.x, facesA[hullA->m_faceOffset+face].m_plane.y, facesA[hullA->m_faceOffset+face].m_plane.z,0.f); @@ -873,7 +873,7 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron { float4 pointInWorld = pVtxIn[i]; //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); //printf("depth=%f\n",depth); } } else @@ -934,7 +934,7 @@ static int clipHullAgainstHull(const float4& separatingNormal, #endif //BT_DEBUG_SAT_FACE //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) { - const float4 Normal = make_float4(facesB[hullB.m_faceOffset+face].m_plane.x, + const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); const float4 WorldNormal = b3QuatRotate(ornB, Normal); #ifdef BT_DEBUG_SAT_FACE @@ -1145,7 +1145,7 @@ int clipHullHullSingle( float4 worldVertsB2[MAX_VERTS]; int capacityWorldVerts = MAX_VERTS; - float4 hostNormal = make_float4(sepNormalWorldSpace.getX(),sepNormalWorldSpace.getY(),sepNormalWorldSpace.getZ(),0.f); + float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; @@ -1158,11 +1158,11 @@ int clipHullHullSingle( { //B3_PROFILE("transform computation"); //trA.setIdentity(); - trA.setOrigin(b3Vector3(posA.x,posA.y,posA.z)); + trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); //trB.setIdentity(); - trB.setOrigin(b3Vector3(posB.x,posB.y,posB.z)); + trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); } @@ -1263,7 +1263,7 @@ void computeContactPlaneConvex(int pairIndex, int numWorldVertsB1= 0; b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal(planeEq.x,planeEq.y,planeEq.z); + b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); float planeConstant = planeEq.w; b3Transform convexWorldTransform; @@ -1358,7 +1358,7 @@ void computeContactPlaneConvex(int pairIndex, b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPosB[i] = pOnB1; } - c->m_worldNormalOnB[3] = (b3Scalar)numReducedPoints; + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } @@ -1373,9 +1373,9 @@ B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vect { b3Vector3 vecOut; vecOut.setValue( - (b3Scalar)(vecIn[0]) / (quantization.getX()), - (b3Scalar)(vecIn[1]) / (quantization.getY()), - (b3Scalar)(vecIn[2]) / (quantization.getZ())); + (b3Scalar)(vecIn[0]) / (quantization.x), + (b3Scalar)(vecIn[1]) / (quantization.y), + (b3Scalar)(vecIn[2]) / (quantization.z)); vecOut += bvhAabbMin; return vecOut; } @@ -1385,6 +1385,11 @@ void traverseTreeTree() } +#include "Bullet3Common/shared/b3Mat3x3.h" + +int numAabbChecks = 0; +int maxNumAabbChecks = 0; +int maxDepth = 0; // work-in-progress __kernel void findCompoundPairsKernel( @@ -1408,8 +1413,8 @@ __kernel void findCompoundPairsKernel( b3AlignedObjectArray& bvhInfoCPU ) { - - + numAabbChecks=0; + maxNumAabbChecks=0; int i = pairIndex; { @@ -1462,7 +1467,7 @@ __kernel void findCompoundPairsKernel( b3Vector3 aabbAMinOut,aabbAMaxOut; float margin=0.f; - b3TransformAabb(treeAminLocal,treeAmaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut); + b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); for (int q=0;q nodeStack; b3Int2 node0; @@ -1497,8 +1504,13 @@ __kernel void findCompoundPairsKernel( do { + if (depth > maxDepth) + { + maxDepth=depth; + printf("maxDepth=%d\n",maxDepth); + } b3Int2 node = nodeStack[--depth]; - + b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); @@ -1507,12 +1519,13 @@ __kernel void findCompoundPairsKernel( float margin=0.f; b3Vector3 aabbAMinOut,aabbAMaxOut; - b3TransformAabb(aMinLocal,aMaxLocal, margin,transA,aabbAMinOut,aabbAMaxOut); + b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb(bMinLocal,bMaxLocal, margin,transB,aabbBMinOut,aabbBMaxOut); + b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); - bool nodeOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + numAabbChecks++; + bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); if (nodeOverlap) { bool isLeafA = treeNodesCPU[node.x].isLeafNode(); @@ -1573,34 +1586,11 @@ __kernel void findCompoundPairsKernel( } } } while (depth); + maxNumAabbChecks = b3Max(numAabbChecks,maxNumAabbChecks); } - - /* - for (i=0;im_SubtreeHeaders.size();i++) - { - const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); - if (overlap != 0) - { - walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, - subtree.m_rootNodeIndex, - subtree.m_rootNodeIndex+subtree.m_subtreeSize); - } - } - */ - - /*bvhInfoCPU[bvhA].m_numNodes; - bvhInfoCPU[bvhA].m_nodeOffset - - b3AlignedObjectArray nodeStack; - b3Int2 n;n.x = - nodeStack.push_back( - */ - } } + return; } @@ -1624,8 +1614,8 @@ __kernel void findCompoundPairsKernel( b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - b3Aabb aabbA = aabbsLocalSpace[shapeIndexA]; + + b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; b3Transform transA; @@ -1636,7 +1626,7 @@ __kernel void findCompoundPairsKernel( b3Vector3 aabbAMinOut,aabbAMaxOut; - b3TransformAabb((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA,aabbAMinOut,aabbAMaxOut); + b3TransformAabb2((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) { @@ -1654,10 +1644,7 @@ __kernel void findCompoundPairsKernel( - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - b3Aabb aabbB = aabbsLocalSpace[shapeIndexB]; + b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; b3Transform transB; transB.setIdentity(); @@ -1665,9 +1652,10 @@ __kernel void findCompoundPairsKernel( transB.setRotation(newOrnB); b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB,aabbBMinOut,aabbBMaxOut); + b3TransformAabb2((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); - bool aabbOverlap = b3TestAabbAgainstAabb2(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + numAabbChecks++; + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); if (aabbOverlap) { /* @@ -2083,6 +2071,7 @@ void computeContactCompoundCompound(int pairIndex, bvhInfoCPU ); + printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); if (numCompoundPairsOut>maxNumCompoundPairsCapacity) { b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); @@ -2238,7 +2227,7 @@ void computeContactPlaneCompound(int pairIndex, int numWorldVertsB1= 0; b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal(planeEq.x,planeEq.y,planeEq.z); + b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); float planeConstant = planeEq.w; b3Transform convexWorldTransform; @@ -2333,7 +2322,7 @@ void computeContactPlaneCompound(int pairIndex, b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPosB[i] = pOnB1; } - c->m_worldNormalOnB[3] = (b3Scalar)numReducedPoints; + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } @@ -2382,8 +2371,8 @@ void computeContactSphereConvex(int pairIndex, int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; int shapeIndex = collidables[collidableIndex].m_shapeIndex; int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = make_float4(0, 0, 0, 0); - float4 hitNormalWorld = make_float4(0, 0, 0, 0); + float4 closestPnt = b3MakeVector3(0, 0, 0, 0); + float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); float minDist = -1000000.f; // TODO: What is the largest/smallest float? bool bCollide = true; int region = -1; @@ -2392,10 +2381,10 @@ void computeContactSphereConvex(int pairIndex, { b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; float4 planeEqn; - float4 localPlaneNormal = make_float4(face.m_plane.getX(),face.m_plane.getY(),face.m_plane.getZ(),0.f); + float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); planeEqn = n1; - planeEqn[3] = face.m_plane[3]; + planeEqn[3] = face.m_plane.w; float4 pntReturn; float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); @@ -2471,7 +2460,7 @@ void computeContactSphereConvex(int pairIndex, if (actualDepth<0) { //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.getX(),normalOnSurfaceB1.getY(),normalOnSurfaceB1.getZ()); + //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); //printf("region=,%d,\n", region); pOnB1[3] = actualDepth; @@ -2493,7 +2482,7 @@ void computeContactSphereConvex(int pairIndex, c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; c->m_worldPosB[0] = pOnB1; int numPoints = 1; - c->m_worldNormalOnB[3] = (b3Scalar)numPoints; + c->m_worldNormalOnB.w = (b3Scalar)numPoints; }//if (dstIdx < numPairs) } }//if (hasCollision) @@ -2539,7 +2528,7 @@ int computeContactConvexConvex( b3AlignedObjectArray& pairs, float maximumDistanceSquared = 1e30f; b3Vector3 resultPointOnBWorld; - b3Vector3 sepAxis2(0,1,0); + b3Vector3 sepAxis2=b3MakeVector3(0,1,0); b3Scalar distance2 = 1e30f; int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; @@ -2749,7 +2738,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* const b3OpenCLArray& gpuChildShapes, const b3OpenCLArray& clAabbsWorldSpace, - const b3OpenCLArray& clAabbslocalSpace, + const b3OpenCLArray& clAabbsLocalSpace, b3OpenCLArray& worldVertsB1GPU, b3OpenCLArray& clippingFacesOutGPU, @@ -2788,7 +2777,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); b3AlignedObjectArray hostAabbsLocalSpace; - clAabbslocalSpace.copyToHost(hostAabbsLocalSpace); + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); @@ -3115,8 +3104,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* } numCompoundPairs = m_numCompoundPairsOut.at(0); - - if (1) + bool useGpuFindCompoundPairs=true; + if (useGpuFindCompoundPairs) { B3_PROFILE("findCompoundPairsKernel"); b3BufferInfoCL bInfo[] = @@ -3129,10 +3118,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), - b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()) + b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), + b3BufferInfoCL(subTreesGPU->getBufferCL()), + b3BufferInfoCL(treeNodesGPU->getBufferCL()), + b3BufferInfoCL(bvhInfo->getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel); @@ -3143,17 +3135,104 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* int num = nPairs; launcher.launch1D( num); clFinish(m_queue); + + numCompoundPairs = m_numCompoundPairsOut.at(0); + //printf("numCompoundPairs =%d\n",numCompoundPairs ); + if (numCompoundPairs) + { + //printf("numCompoundPairs=%d\n",numCompoundPairs); + } + + + } else + { + + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + int numCompoundPairsOut=0; + + b3AlignedObjectArray cpuCompoundPairsOut; + cpuCompoundPairsOut.resize(compoundPairCapacity); + + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + + + + for (int pairIndex=0;pairIndex compoundPairCapacity) { b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); numCompoundPairs = compoundPairCapacity; } + + m_gpuCompoundPairs.resize(numCompoundPairs); m_gpuHasCompoundSepNormals.resize(numCompoundPairs); m_gpuCompoundSepNormals.resize(numCompoundPairs); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp index 500775725..4f565143c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp @@ -127,12 +127,12 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices, b3MyFace& faceA = tmpFaces[refFace]; todoFaces.pop_back(); - b3Vector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]); + b3Vector3 faceNormalA = b3MakeVector3(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]); for (int j=todoFaces.size()-1;j>=0;j--) { int i = todoFaces[j]; b3MyFace& faceB = tmpFaces[i]; - b3Vector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]); + b3Vector3 faceNormalB = b3MakeVector3(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]); if (faceNormalA.dot(faceNormalB)>faceWeldThreshold) { coplanarFaceGroup.push_back(i); @@ -147,14 +147,14 @@ bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices, //do the merge: use Graham Scan 2d convex hull b3AlignedObjectArray orgpoints; - b3Vector3 averageFaceNormal(0,0,0); + b3Vector3 averageFaceNormal = b3MakeVector3(0,0,0); for (int i=0;im_faces.push_back(tmpFaces[coplanarFaceGroup[i]]); b3MyFace& face = tmpFaces[coplanarFaceGroup[i]]; - b3Vector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]); + b3Vector3 faceNormal = b3MakeVector3(face.m_plane[0],face.m_plane[1],face.m_plane[2]); averageFaceNormal+=faceNormal; for (int f=0;f0?-m_ray:b3Vector3(1,0,0)); + appendvertice(m_simplices[0],sqrl>0?-m_ray:b3MakeVector3(1,0,0)); m_simplices[0].p[0] = 1; m_ray = m_simplices[0].c[0]->w; sqdist = sqrl; @@ -242,7 +242,7 @@ namespace gjkepa2_impl if(sqdist>=0) {/* Valid */ ns.rank = 0; - m_ray = b3Vector3(0,0,0); + m_ray = b3MakeVector3(0,0,0); m_current = next; for(unsigned int i=0,ni=cs.rank;ic[1]->w-m_simplex->c[0]->w; for(unsigned int i=0;i<3;++i) { - b3Vector3 axis=b3Vector3(0,0,0); + b3Vector3 axis=b3MakeVector3(0,0,0); axis[i]=1; const b3Vector3 p=b3Cross(d,axis); if(p.length2()>0) @@ -557,7 +557,7 @@ namespace gjkepa2_impl void Initialize() { m_status = eStatus::Failed; - m_normal = b3Vector3(0,0,0); + m_normal = b3MakeVector3(0,0,0); m_depth = 0; m_nextsv = 0; for(unsigned int i=0;i0) m_normal = m_normal/nl; else - m_normal = b3Vector3(1,0,0); + m_normal = b3MakeVector3(1,0,0); m_depth = 0; m_result.rank=1; m_result.c[0]=simplex.c[0]; @@ -813,7 +813,7 @@ namespace gjkepa2_impl { /* Results */ results.witnesses[0] = - results.witnesses[1] = b3Vector3(0,0,0); + results.witnesses[1] = b3MakeVector3(0,0,0); results.status = b3GjkEpaSolver2::sResults::Separated; /* Shape */ shape.m_shapes[0] = hullA; @@ -851,8 +851,8 @@ bool b3GjkEpaSolver2::Distance( const b3Transform& transA, const b3Transform& t GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,guess); if(gjk_status==GJK::eStatus::Valid) { - b3Vector3 w0=b3Vector3(0,0,0); - b3Vector3 w1=b3Vector3(0,0,0); + b3Vector3 w0=b3MakeVector3(0,0,0); + b3Vector3 w1=b3MakeVector3(0,0,0); for(unsigned int i=0;irank;++i) { const b3Scalar p=gjk.m_simplex->p[i]; @@ -897,7 +897,7 @@ bool b3GjkEpaSolver2::Penetration( const b3Transform& transA, const b3Transform& EPA::eStatus::_ epa_status=epa.Evaluate(gjk,-guess); if(epa_status!=EPA::eStatus::Failed) { - b3Vector3 w0=b3Vector3(0,0,0); + b3Vector3 w0=b3MakeVector3(0,0,0); for(unsigned int i=0;id,0,verticesA,verticesB)*epa.m_result.p[i]; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp index 53397e033..69149a6c3 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp @@ -33,7 +33,7 @@ int gEpaSeparatingAxis=0; b3GjkPairDetector::b3GjkPairDetector(b3VoronoiSimplexSolver* simplexSolver,b3GjkEpaSolver2* penetrationDepthSolver) -:m_cachedSeparatingAxis(b3Scalar(0.),b3Scalar(-1.),b3Scalar(0.)), +:m_cachedSeparatingAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(-1.),b3Scalar(0.))), m_penetrationDepthSolver(penetrationDepthSolver), m_simplexSolver(simplexSolver), m_ignoreMargin(false), @@ -159,13 +159,13 @@ bool getClosestPoints(b3GjkPairDetector* gjkDetector, const b3Transform& transA, gjkDetector->m_cachedSeparatingDistance = 0.f; b3Scalar distance=b3Scalar(0.); - b3Vector3 normalInB(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 normalInB= b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); b3Vector3 pointOnA,pointOnB; b3Transform localTransA = transA; b3Transform localTransB = transB; - b3Vector3 positionOffset(0,0,0);// = (localTransA.getOrigin() + localTransB.getOrigin()) * b3Scalar(0.5); + b3Vector3 positionOffset = b3MakeVector3(0,0,0);// = (localTransA.getOrigin() + localTransB.getOrigin()) * b3Scalar(0.5); localTransA.getOrigin() -= positionOffset; localTransB.getOrigin() -= positionOffset; @@ -202,7 +202,7 @@ bool getClosestPoints(b3GjkPairDetector* gjkDetector, const b3Transform& transA, b3Scalar margin = marginA + marginB; b3Scalar bestDeltaN = -1e30f; - b3Vector3 bestSepAxis(0,0,0); + b3Vector3 bestSepAxis= b3MakeVector3(0,0,0); b3Vector3 bestPointOnA; b3Vector3 bestPointOnB; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp index 25626d07a..e9e51d5a3 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp @@ -167,8 +167,8 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized { NodeTriangleCallback callback(m_leafNodes); - b3Vector3 aabbMin(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - b3Vector3 aabbMax(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + b3Vector3 aabbMin=b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + b3Vector3 aabbMax=b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax); @@ -322,7 +322,7 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f if (type == PHY_FLOAT) { float* graphicsbase = (float*)(vertexbase+graphicsindex*stride); - triangleVerts[j] = b3Vector3( + triangleVerts[j] = b3MakeVector3( graphicsbase[0]*meshScaling.getX(), graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); @@ -330,7 +330,7 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int f else { double* graphicsbase = (double*)(vertexbase+graphicsindex*stride); - triangleVerts[j] = b3Vector3( b3Scalar(graphicsbase[0]*meshScaling.getX()), b3Scalar(graphicsbase[1]*meshScaling.getY()), b3Scalar(graphicsbase[2]*meshScaling.getZ())); + triangleVerts[j] = b3MakeVector3( b3Scalar(graphicsbase[0]*meshScaling.getX()), b3Scalar(graphicsbase[1]*meshScaling.getY()), b3Scalar(graphicsbase[2]*meshScaling.getZ())); } } diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp index 05fc64d6e..b6eb9c336 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp @@ -90,11 +90,11 @@ b3Vector3 color[4]= void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin) { //enlarge the AABB to avoid division by zero when initializing the quantization values - b3Vector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin); + b3Vector3 clampValue =b3MakeVector3(quantizationMargin,quantizationMargin,quantizationMargin); m_bvhAabbMin = bvhAabbMin - clampValue; m_bvhAabbMax = bvhAabbMax + clampValue; b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; - m_bvhQuantization = b3Vector3(b3Scalar(65533.0),b3Scalar(65533.0),b3Scalar(65533.0)) / aabbSize; + m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0),b3Scalar(65533.0),b3Scalar(65533.0)) / aabbSize; m_useQuantization = true; } @@ -233,7 +233,7 @@ int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int sp int numIndices = endIndex - startIndex; b3Scalar splitValue; - b3Vector3 means(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); for (i=startIndex;i& verticesA, b3Scalar margin) { - b3Vector3 supVec(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 supVec = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); // Here we take advantage of dot(a, b*c) = dot(a*b, c). Note: This is true mathematically, but not numerically. diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp index e74c97383..906645188 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp @@ -17,7 +17,7 @@ subject to the following restrictions: b3TriangleCallback::~b3TriangleCallback() { - + } diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp index bc9a18f22..cf3d5ef49 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp @@ -68,7 +68,7 @@ void b3VoronoiSimplexSolver::reset() m_cachedValidClosest = false; m_numVertices = 0; m_needsUpdate = true; - m_lastW = b3Vector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); m_cachedBC.reset(); } @@ -118,7 +118,7 @@ bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() const b3Vector3& to = m_simplexVectorW[1]; b3Vector3 nearest; - b3Vector3 p (b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); b3Vector3 diff = p - from; b3Vector3 v = to - from; b3Scalar t = v.dot(diff); @@ -157,7 +157,7 @@ bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() case 3: { //closest point origin from triangle - b3Vector3 p (b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); const b3Vector3& a = m_simplexVectorW[0]; const b3Vector3& b = m_simplexVectorW[1]; @@ -183,7 +183,7 @@ bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() { - b3Vector3 p (b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); const b3Vector3& a = m_simplexVectorW[0]; const b3Vector3& b = m_simplexVectorW[1]; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index eb183b82f..9d20efb93 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -18,6 +18,20 @@ static const char* primitiveContactsKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl index 638d7399f..df6938dd8 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl @@ -7,6 +7,7 @@ #define TRIANGLE_NUM_CONVEX_FACES 5 #define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 +#define B3_MAX_STACK_DEPTH 256 typedef unsigned int u32; @@ -14,13 +15,104 @@ typedef unsigned int u32; ///keep this in sync with btCollidable.h typedef struct { - int m_numChildShapes; - int blaat2; + union { + int m_numChildShapes; + int m_bvhIndex; + }; + union + { + float m_radius; + int m_compoundBvhIndex; + }; + int m_shapeType; int m_shapeIndex; } btCollidableGpu; +#define MAX_NUM_PARTS_IN_BITS 10 + +///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; +} b3QuantizedBvhNode; + +typedef struct +{ + float4 m_aabbMin; + float4 m_aabbMax; + float4 m_quantization; + int m_numNodes; + int m_numSubTrees; + int m_nodeOffset; + int m_subTreeOffset; + +} b3BvhInfo; + + +int getTriangleIndex(const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int isLeafNode(const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int getEscapeIndex(const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + + +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +} b3BvhSubtreeInfo; + + + + + + + typedef struct { float4 m_childPosition; @@ -80,6 +172,11 @@ typedef struct }; } btAabbCL; +#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" +#include "Bullet3Common/shared/b3Int2.h" + + + typedef struct { float4 m_plane; @@ -755,6 +852,34 @@ __kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPair } + +inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) +{ + b3Float4 vecOut; + vecOut = b3MakeFloat4( + (float)(vecIn[0]) / (quantization.x), + (float)(vecIn[1]) / (quantization.y), + (float)(vecIn[2]) / (quantization.z), + 0.f); + + vecOut += bvhAabbMin; + return vecOut; +} + +inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) +{ + b3Float4 vecOut; + vecOut = b3MakeFloat4( + (float)(vecIn[0]) / (quantization.x), + (float)(vecIn[1]) / (quantization.y), + (float)(vecIn[2]) / (quantization.z), + 0.f); + + vecOut += bvhAabbMin; + return vecOut; +} + + // work-in-progress __kernel void findCompoundPairsKernel( __global const int4* pairs, __global const BodyData* rigidBodies, @@ -764,10 +889,13 @@ __kernel void findCompoundPairsKernel( __global const int4* pairs, __global const float4* uniqueEdges, __global const btGpuFace* faces, __global const int* indices, - __global btAabbCL* aabbs, + __global b3Aabb_t* aabbLocalSpace, __global const btGpuChildShape* gpuChildShapes, __global volatile int4* gpuCompoundPairsOut, __global volatile int* numCompoundPairsOut, + __global const b3BvhSubtreeInfo* subtrees, + __global const b3QuantizedBvhNode* quantizedNodes, + __global const b3BvhInfo* bvhInfos, int numPairs, int maxNumCompoundPairsCapacity ) @@ -793,6 +921,157 @@ __kernel void findCompoundPairsKernel( __global const int4* pairs, return; } + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; + int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; + int numSubTreesA = bvhInfos[bvhA].m_numSubTrees; + int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset; + int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset; + + + int numSubTreesB = bvhInfos[bvhB].m_numSubTrees; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + + for (int p=0;pmaxStackDepth && !(isLeafA && isLeafB)) + { + //printf("Error: traversal exceeded maxStackDepth"); + continue; + } + + if(isInternalA) + { + int nodeAleftChild = node.x+1; + bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]); + int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]); + + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); + + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); + } + else + { + nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); + nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); + } + } + else + { + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); + } + else + { + int compoundPairIdx = atomic_inc(numCompoundPairsOut); + if (compoundPairIdxm_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +" \n" +"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes, points to the root of the subtree\n" +" int m_rootNodeIndex;\n" +" //4 bytes\n" +" int m_subtreeSize;\n" +" int m_padding[3];\n" +"} b3BvhSubtreeInfo;\n" "typedef struct\n" "{\n" " float4 m_childPosition;\n" @@ -67,6 +141,210 @@ static const char* satKernelsCL= \ " int m_maxIndices[4];\n" " };\n" "} btAabbCL;\n" +"#ifndef B3_AABB_H\n" +"#define B3_AABB_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3AtomicInc atomic_inc\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" +"{\n" +" return b3FastNormalize4(in);\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3Aabb b3Aabb_t;\n" +"struct b3Aabb\n" +"{\n" +" union\n" +" {\n" +" float m_min[4];\n" +" b3Float4 m_minVec;\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float m_max[4];\n" +" b3Float4 m_maxVec;\n" +" int m_signedMaxIndices[4];\n" +" };\n" +"};\n" +"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +" b3Float4ConstArg pos,\n" +" b3QuatConstArg orn,\n" +" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +"{\n" +" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +" b3Mat3x3 m;\n" +" m = b3QuatGetRotationMatrix(orn);\n" +" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +" \n" +" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +" 0.f);\n" +" *aabbMinOut = center-extent;\n" +" *aabbMaxOut = center+extent;\n" +"}\n" +"/// conservative test for overlap between two aabbs\n" +"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +"{\n" +" bool overlap = true;\n" +" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +" return overlap;\n" +"}\n" +"#endif //B3_AABB_H\n" +"/*\n" +"Bullet Continuous Collision Detection and Physics Library\n" +"Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" +"This software is provided 'as-is', without any express or implied warranty.\n" +"In no event will the authors be held liable for any damages arising from the use of this software.\n" +"Permission is granted to anyone to use this software for any purpose,\n" +"including commercial applications, and to alter it and redistribute it freely,\n" +"subject to the following restrictions:\n" +"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +"3. This notice may not be removed or altered from any source distribution.\n" +"*/\n" +"#ifndef B3_INT2_H\n" +"#define B3_INT2_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3UnsignedInt2 uint2\n" +"#define b3Int2 int2\n" +"#define b3MakeInt2 (int2)\n" +"#endif //__cplusplus\n" +"#endif\n" "typedef struct\n" "{\n" " float4 m_plane;\n" @@ -650,6 +928,28 @@ static const char* satKernelsCL= \ " }\n" " \n" "}\n" +"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +"{\n" +" b3Float4 vecOut;\n" +" vecOut = b3MakeFloat4(\n" +" (float)(vecIn[0]) / (quantization.x),\n" +" (float)(vecIn[1]) / (quantization.y),\n" +" (float)(vecIn[2]) / (quantization.z),\n" +" 0.f);\n" +" vecOut += bvhAabbMin;\n" +" return vecOut;\n" +"}\n" +"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +"{\n" +" b3Float4 vecOut;\n" +" vecOut = b3MakeFloat4(\n" +" (float)(vecIn[0]) / (quantization.x),\n" +" (float)(vecIn[1]) / (quantization.y),\n" +" (float)(vecIn[2]) / (quantization.z),\n" +" 0.f);\n" +" vecOut += bvhAabbMin;\n" +" return vecOut;\n" +"}\n" "// work-in-progress\n" "__kernel void findCompoundPairsKernel( __global const int4* pairs, \n" " __global const BodyData* rigidBodies, \n" @@ -659,10 +959,13 @@ static const char* satKernelsCL= \ " __global const float4* uniqueEdges,\n" " __global const btGpuFace* faces,\n" " __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" +" __global b3Aabb_t* aabbLocalSpace,\n" " __global const btGpuChildShape* gpuChildShapes,\n" " __global volatile int4* gpuCompoundPairsOut,\n" " __global volatile int* numCompoundPairsOut,\n" +" __global const b3BvhSubtreeInfo* subtrees,\n" +" __global const b3QuantizedBvhNode* quantizedNodes,\n" +" __global const b3BvhInfo* bvhInfos,\n" " int numPairs,\n" " int maxNumCompoundPairsCapacity\n" " )\n" @@ -681,6 +984,131 @@ static const char* satKernelsCL= \ " {\n" " return;\n" " }\n" +" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +" {\n" +" int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" +" int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" +" int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" +" int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" +" int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" +" int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" +" b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" \n" +" for (int p=0;pmaxStackDepth && !(isLeafA && isLeafB))\n" +" {\n" +" //printf(\"Error: traversal exceeded maxStackDepth\");\n" +" continue;\n" +" }\n" +" if(isInternalA)\n" +" {\n" +" int nodeAleftChild = node.x+1;\n" +" bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" +" int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" +" if(isInternalB)\n" +" { \n" +" int nodeBleftChild = node.y+1;\n" +" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" +" }\n" +" else\n" +" {\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(isInternalB)\n" +" {\n" +" int nodeBleftChild = node.y+1;\n" +" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +" nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" +" }\n" +" else\n" +" {\n" +" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +" if (compoundPairIdx& src, b3OpenCLArray< void b3PrefixScanFloat4CL::executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, b3Vector3* sum) { - b3Vector3 s(0,0,0); + b3Vector3 s=b3MakeVector3(0,0,0); //if( data->m_option == EXCLUSIVE ) { for(int i=0; i=0) { b3SapAabb aabb; - b3Vector3 myAabbMin(-radius,-radius,-radius); - b3Vector3 myAabbMax(radius,radius,radius); + b3Vector3 myAabbMin=b3MakeVector3(-radius,-radius,-radius); + b3Vector3 myAabbMax=b3MakeVector3(radius,radius,radius); aabb.m_min[0] = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x; aabb.m_min[1] = myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y; @@ -226,10 +226,7 @@ int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConsta { int faceOffset = m_data->m_convexFaces.size(); b3GpuFace& face = m_data->m_convexFaces.expand(); - face.m_plane[0] = faceNormal.getX(); - face.m_plane[1] = faceNormal.getY(); - face.m_plane[2] = faceNormal.getZ(); - face.m_plane[3] = faceConstant; + face.m_plane = b3MakeVector3(faceNormal.x,faceNormal.y,faceNormal.z,faceConstant); return faceOffset; } @@ -303,10 +300,12 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Colli for (i=0;im_faces.size();i++) { - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[0] = convexPtr->m_faces[i].m_plane[0]; - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[1] = convexPtr->m_faces[i].m_plane[1]; - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[2] = convexPtr->m_faces[i].m_plane[2]; - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[3] = convexPtr->m_faces[i].m_plane[3]; + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0], + convexPtr->m_faces[i].m_plane[1], + convexPtr->m_faces[i].m_plane[2], + convexPtr->m_faces[i].m_plane[3]); + + int indexOffset = m_data->m_convexIndices.size(); int numIndices = convexPtr->m_faces[i].m_indices.size(); m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; @@ -344,7 +343,7 @@ int b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int stride for (int i=0;im_vertices.size();i++) localCenter+=utilPtr->m_vertices[i]; localCenter*= (1.f/utilPtr->m_vertices.size()); @@ -384,8 +383,8 @@ int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr) { b3SapAabb aabb; - b3Vector3 myAabbMin(1e30f,1e30f,1e30f); - b3Vector3 myAabbMax(-1e30f,-1e30f,-1e30f); + b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f); + b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f); for (int i=0;im_vertices.size();i++) { @@ -436,8 +435,8 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray childLocalAabbs; childLocalAabbs.resize(childShapes->size()); @@ -449,20 +448,15 @@ int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArraym_localShapeAABBCPU->at(childColIndex); - b3Vector3 childLocalAabbMin(aabbLoc.m_min[0],aabbLoc.m_min[1],aabbLoc.m_min[2]); - b3Vector3 childLocalAabbMax(aabbLoc.m_max[0],aabbLoc.m_max[1],aabbLoc.m_max[2]); + b3Vector3 childLocalAabbMin=b3MakeVector3(aabbLoc.m_min[0],aabbLoc.m_min[1],aabbLoc.m_min[2]); + b3Vector3 childLocalAabbMax=b3MakeVector3(aabbLoc.m_max[0],aabbLoc.m_max[1],aabbLoc.m_max[2]); b3Vector3 aMin,aMax; b3Scalar margin(0.f); b3Transform childTr; childTr.setIdentity(); - childTr.setOrigin(b3Vector3(childShapes->at(i).m_childPosition[0], - childShapes->at(i).m_childPosition[1], - childShapes->at(i).m_childPosition[2])); - childTr.setRotation(b3Quaternion(childShapes->at(i).m_childOrientation[0], - childShapes->at(i).m_childOrientation[1], - childShapes->at(i).m_childOrientation[2], - childShapes->at(i).m_childOrientation[3])); + childTr.setOrigin(childShapes->at(i).m_childPosition); + childTr.setRotation(b3Quaternion(childShapes->at(i).m_childOrientation)); b3TransformAabb(childLocalAabbMin,childLocalAabbMax,margin,childTr,aMin,aMax); myAabbMin.setMin(aMin); myAabbMax.setMax(aMax); @@ -580,7 +574,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray* vert { - b3Vector3 scaling(scaling1[0],scaling1[1],scaling1[2]); + b3Vector3 scaling=b3MakeVector3(scaling1[0],scaling1[1],scaling1[2]); int collidableIndex = allocateCollidable(); if (collidableIndex<0) @@ -594,8 +588,8 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray* vert b3SapAabb aabb; - b3Vector3 myAabbMin(1e30f,1e30f,1e30f); - b3Vector3 myAabbMax(-1e30f,-1e30f,-1e30f); + b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f); + b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f); for (int i=0;isize();i++) { @@ -625,7 +619,7 @@ int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray* vert b3IndexedMesh mesh; mesh.m_numTriangles = indices->size()/3; mesh.m_numVertices = vertices->size(); - mesh.m_vertexBase = (const unsigned char *)&vertices->at(0).getX(); + mesh.m_vertexBase = (const unsigned char *)&vertices->at(0).x; mesh.m_vertexStride = sizeof(b3Vector3); mesh.m_triangleIndexStride = 3 * sizeof(int);// or sizeof(int) mesh.m_triangleIndexBase = (const unsigned char *)&indices->at(0); @@ -673,17 +667,17 @@ int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray* { - b3Vector3 scaling(scaling1[0],scaling1[1],scaling1[2]); + b3Vector3 scaling=b3MakeVector3(scaling1[0],scaling1[1],scaling1[2]); m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1); m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1); b3ConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1); - convex.mC = b3Vector3(0,0,0); - convex.mE = b3Vector3(0,0,0); - convex.m_extents= b3Vector3(0,0,0); - convex.m_localCenter = b3Vector3(0,0,0); + convex.mC = b3MakeVector3(0,0,0); + convex.mE = b3MakeVector3(0,0,0); + convex.m_extents= b3MakeVector3(0,0,0); + convex.m_localCenter = b3MakeVector3(0,0,0); convex.m_radius = 0.f; convex.m_numUniqueEdges = 0; @@ -709,10 +703,7 @@ int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray* b3Vector3 normal = ((vert1-vert0).cross(vert2-vert0)).normalize(); b3Scalar c = -(normal.dot(vert0)); - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[0] = normal.getX(); - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[1] = normal.getY(); - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[2] = normal.getZ(); - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane[3] = c; + m_data->m_convexFaces[convex.m_faceOffset+i].m_plane = b3MakeVector4(normal.x,normal.y,normal.z,c); int indexOffset = m_data->m_convexIndices.size(); int numIndices = 3; m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; @@ -898,8 +889,8 @@ const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu) { - b3Vector3 aabbMin(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]); - b3Vector3 aabbMax (aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]); + b3Vector3 aabbMin=b3MakeVector3(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]); + b3Vector3 aabbMax=b3MakeVector3(aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]); if (m_data->m_numAcceleratedRigidBodies >= (m_data->m_config.m_maxConvexBodies)) @@ -917,9 +908,9 @@ int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f body.m_frictionCoeff = friction; body.m_restituitionCoeff = restitution; - body.m_angVel.setZero(); - body.m_linVel.setValue(0,0,0);//.setZero(); - body.m_pos.setValue(position[0],position[1],position[2]); + body.m_angVel = b3MakeVector3(0,0,0); + body.m_linVel=b3MakeVector3(0,0,0);//.setZero(); + body.m_pos =b3MakeVector3(position[0],position[1],position[2]); body.m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]); body.m_collidableIdx = collidableIndex; if (collidableIndex>=0) @@ -1066,7 +1057,7 @@ void b3GpuNarrowPhase::setObjectTransformCpu(float* position, float* orientation { if (bodyIndex>=0 && bodyIndexm_bodyBufferCPU->size()) { - m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.setValue(position[0],position[1],position[2]); + m_data->m_bodyBufferCPU->at(bodyIndex).m_pos=b3MakeVector3(position[0],position[1],position[2]); m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]); } else @@ -1078,8 +1069,8 @@ void b3GpuNarrowPhase::setObjectVelocityCpu(float* linVel, float* angVel, int bo { if (bodyIndex>=0 && bodyIndexm_bodyBufferCPU->size()) { - m_data->m_bodyBufferCPU->at(bodyIndex).m_linVel.setValue(linVel[0],linVel[1],linVel[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_angVel.setValue(angVel[0],angVel[1],angVel[2]); + m_data->m_bodyBufferCPU->at(bodyIndex).m_linVel=b3MakeVector3(linVel[0],linVel[1],linVel[2]); + m_data->m_bodyBufferCPU->at(bodyIndex).m_angVel=b3MakeVector3(angVel[0],angVel[1],angVel[2]); } else { b3Warning("setObjectVelocityCpu out of range.\n"); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp index c7026cfb8..e33bc455c 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -664,10 +664,10 @@ void b3GpuPgsJacobiSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solver b3Assert(rb); // solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3Vector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())); + solverBody->internalSetInvMass(b3MakeVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())); solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3Vector3(1,1,1); - solverBody->m_linearFactor = b3Vector3(1,1,1); + solverBody->m_angularFactor = b3MakeVector3(1,1,1); + solverBody->m_linearFactor = b3MakeVector3(1,1,1); solverBody->m_linearVelocity = getLinearVelocity(rb); solverBody->m_angularVelocity = getAngularVelocity(rb); } diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 814493410..e241f63a9 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -33,7 +33,7 @@ subject to the following restrictions: #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" -bool useDbvt = true;//false;//true; +bool useDbvt = false;//true; bool useBullet2CpuSolver = true; bool dumpContactStats = false; @@ -228,8 +228,8 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) for (int i=0;im_allAabbsCPU.size();i++) { b3BroadphaseProxy* proxy = &m_data->m_broadphaseDbvt->m_proxies[i]; - b3Vector3 aabbMin(m_data->m_allAabbsCPU[i].m_min[0],m_data->m_allAabbsCPU[i].m_min[1],m_data->m_allAabbsCPU[i].m_min[2]); - b3Vector3 aabbMax(m_data->m_allAabbsCPU[i].m_max[0],m_data->m_allAabbsCPU[i].m_max[1],m_data->m_allAabbsCPU[i].m_max[2]); + b3Vector3 aabbMin=b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0],m_data->m_allAabbsCPU[i].m_min[1],m_data->m_allAabbsCPU[i].m_min[2]); + b3Vector3 aabbMax=b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0],m_data->m_allAabbsCPU[i].m_max[1],m_data->m_allAabbsCPU[i].m_max[2]); m_data->m_broadphaseDbvt->setAabb(proxy,aabbMin,aabbMax,0); } } @@ -498,19 +498,19 @@ void b3GpuRigidBodyPipeline::writeAllInstancesToGpu() int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu) { - b3Vector3 aabbMin(0,0,0),aabbMax(0,0,0); + b3Vector3 aabbMin=b3MakeVector3(0,0,0),aabbMax=b3MakeVector3(0,0,0); if (collidableIndex>=0) { b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex); - b3Vector3 localAabbMin(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]); - b3Vector3 localAabbMax(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]); + b3Vector3 localAabbMin=b3MakeVector3(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]); + b3Vector3 localAabbMax=b3MakeVector3(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]); b3Scalar margin = 0.01f; b3Transform t; t.setIdentity(); - t.setOrigin(b3Vector3(position[0],position[1],position[2])); + t.setOrigin(b3MakeVector3(position[0],position[1],position[2])); t.setRotation(b3Quaternion(orientation[0],orientation[1],orientation[2],orientation[3])); b3TransformAabb(localAabbMin,localAabbMax, margin,t,aabbMin,aabbMax); } else diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index 69c9469f4..229cf0553 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -30,6 +30,20 @@ static const char* batchingKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 14111699a..0922b1131 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -30,6 +30,20 @@ static const char* batchingKernelsNewCL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index d6758e4dc..464417938 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -30,6 +30,20 @@ static const char* solverSetupCL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index 4efa7a1dd..ed30ccf11 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -30,6 +30,20 @@ static const char* solverSetup2CL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index 4f8787bcc..cf3f9a992 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -30,6 +30,20 @@ static const char* solverUtilsCL= \ "#ifdef __cplusplus\n" "#else\n" " typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" "#endif \n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" diff --git a/test/OpenCL/KernelLaunch/main.cpp b/test/OpenCL/KernelLaunch/main.cpp index a22ee3b77..fe44a4d49 100644 --- a/test/OpenCL/KernelLaunch/main.cpp +++ b/test/OpenCL/KernelLaunch/main.cpp @@ -21,9 +21,9 @@ subject to the following restrictions: #include #include -#include "Bullet3Common/b3Vector3.h" +#include "Bullet3Common/shared/b3Float4.h" -typedef b3Vector3 b3Float4; +//typedef b3Vector3 b3Float4; typedef struct b3Contact4Data b3Contact4Data_t; struct b3Contact4Data {