From a9a758dd543c9fd985a2f407a86153c36ce93734 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Wed, 6 Nov 2013 19:57:36 -0800 Subject: [PATCH] share more code between OpenCL kernels and C++ by moving kernel data structures/code to the 'shared' folders --- .../shared/b3Collidable.h | 3 + .../shared/b3RigidBodyData.h | 4 + .../shared/b3UpdateAabbs.h | 19 +- .../shared/b3PlatformDefinitions.h | 3 +- .../b3GpuSapBroadphase.cpp | 4 +- .../b3ConvexHullContact.cpp | 10 +- .../kernels/primitiveContacts.h | 8 + .../kernels/satClipHullContacts.h | 8 + .../NarrowphaseCollision/kernels/satKernels.h | 8 + .../ParallelPrimitives/b3PrefixScanCL.h | 2 +- .../RigidBody/b3GpuBatchingPgsSolver.cpp | 105 ++++- .../RigidBody/b3GpuNarrowPhase.cpp | 9 + .../RigidBody/b3GpuNarrowPhase.h | 1 + .../RigidBody/b3GpuRigidBodyPipeline.cpp | 99 ++++- .../RigidBody/kernels/batchingKernels.h | 8 + .../RigidBody/kernels/batchingKernelsNew.h | 8 + .../RigidBody/kernels/solverSetup.h | 8 + .../RigidBody/kernels/solverSetup2.h | 8 + .../RigidBody/kernels/solverUtils.h | 8 + .../RigidBody/kernels/updateAabbsKernel.cl | 190 +-------- .../RigidBody/kernels/updateAabbsKernel.h | 400 ++++++++++++------ 21 files changed, 549 insertions(+), 364 deletions(-) diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h index c3407a769..7fd654a53 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h @@ -18,6 +18,9 @@ enum b3ShapeTypes MAX_NUM_SHAPE_TYPES, }; +typedef struct b3Collidable b3Collidable_t; + + struct b3Collidable { union { diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h index 473e5cc44..bf5608a12 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h @@ -4,6 +4,9 @@ #include "Bullet3Common/shared/b3Float4.h" #include "Bullet3Common/shared/b3Quat.h" +typedef struct b3RigidBodyData b3RigidBodyData_t; + + struct b3RigidBodyData { b3Float4 m_pos; @@ -17,5 +20,6 @@ struct b3RigidBodyData float m_frictionCoeff; }; + #endif //B3_RIGIDBODY_DATA_H \ No newline at end of file diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h index 54686b35d..8d40d19a0 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h @@ -4,13 +4,15 @@ #include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3CollidableData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -void b3ComputeWorldAabb( int bodyId, b3RigidBodyData* body, b3CollidableData* collidables, b3Aabb* localShapeAABB, b3Aabb* worldAabbs) +void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs) { + __global const b3RigidBodyData_t* body = &bodies[bodyId]; + b3Float4 position = body->m_pos; b3Quat orientation = body->m_quat; @@ -20,10 +22,17 @@ void b3ComputeWorldAabb( int bodyId, b3RigidBodyData* body, b3CollidableData* c if (shapeIndex>=0) { - b3Aabb localAabb = localShapeAABB[shapeIndex]; - b3Aabb worldAabb; + b3Aabb_t localAabb = localShapeAABB[collidableIndex]; + b3Aabb_t worldAabb; - b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&worldAabb.m_minVec,&worldAabb.m_maxVec); + b3Float4 aabbAMinOut,aabbAMaxOut; + float margin = 0.f; + b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut); + + worldAabb.m_minVec =aabbAMinOut; + worldAabb.m_minIndices[3] = bodyId; + worldAabb.m_maxVec = aabbAMaxOut; + worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1; worldAabbs[bodyId] = worldAabb; } } diff --git a/src/Bullet3Common/shared/b3PlatformDefinitions.h b/src/Bullet3Common/shared/b3PlatformDefinitions.h index 64f784e8a..2c62236f0 100644 --- a/src/Bullet3Common/shared/b3PlatformDefinitions.h +++ b/src/Bullet3Common/shared/b3PlatformDefinitions.h @@ -8,9 +8,10 @@ struct MyTest #ifdef __cplusplus #define b3AtomicInc(a) ((*a)++) +#define __global #else #define b3AtomicInc atomic_inc - +#define b3Fabs fabs #endif #endif diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 93d0b6bcd..f779358ed 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -842,8 +842,8 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) { //test - if (m_currentBuffer>=0) - return calculateOverlappingPairsHostIncremental3Sap(); +// if (m_currentBuffer>=0) + // return calculateOverlappingPairsHostIncremental3Sap(); b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); m_allAabbsGPU.copyToHost(m_allAabbsCPU); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 7aeb3a49d..f293d23f9 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -2897,11 +2897,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - //int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, - oldHostContacts); + int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, + // oldHostContacts); if (contactIndex>=0) diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index 9d20efb93..b21de63f9 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -13,6 +13,7 @@ static const char* primitiveContactsKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -33,6 +34,13 @@ static const char* primitiveContactsKernelsCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index ce263e8e9..65d36410c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -45,6 +45,7 @@ static const char* satClipKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -65,6 +66,13 @@ static const char* satClipKernelsCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index 7ef1d0c48..fbea20f96 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -154,6 +154,7 @@ static const char* satKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -174,6 +175,13 @@ static const char* satKernelsCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "#ifndef B3_MAT3x3_H\n" "#define B3_MAT3x3_H\n" diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h index 665e46e90..a9a2e61b9 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h @@ -31,7 +31,7 @@ class b3PrefixScanCL virtual ~b3PrefixScanCL(); void execute(b3OpenCLArray& src, b3OpenCLArray& dst, int n, unsigned int* sum = 0); - void executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, unsigned int* sum); + void executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, unsigned int* sum=0); }; #endif //B3_PREFIX_SCAN_CL_H diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 72ba25012..3ab20da6b 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -6,6 +6,10 @@ bool gpuSetSortData = true; bool optionalSortContactsDeterminism = true; bool gpuSortContactsDeterminism = true; +bool useCpuCopyConstraints = false; + +bool useScanHost = false; +bool reorderContactsOnCpu = false; #include "b3GpuBatchingPgsSolver.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" @@ -815,19 +819,42 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } + if (useScanHost) { // 4. find entries - B3_PROFILE("gpuBoundSearch"); + B3_PROFILE("cpuBoundSearch"); + b3AlignedObjectArray countsHost; + countsNative->copyToHost(countsHost); + + b3AlignedObjectArray sortDataHost; + m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); + - m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - + //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); + m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); + + countsNative->copyFromHost(countsHost); + //adl::BoundSearch::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; - m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); - //printf("sum = %d\n",sum); + //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); + b3AlignedObjectArray offsetsHost; + offsetsHost.resize(offsetsNative->size()); + + + m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); + offsetsNative->copyFromHost(offsetsHost); + + //printf("sum = %d\n",sum); + } else + { + // 4. find entries + B3_PROFILE("gpuBoundSearch"); + m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); + m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); } @@ -835,14 +862,45 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (nContacts) { // 5. sort constraints by cellIdx + if (reorderContactsOnCpu) + { + B3_PROFILE("cpu m_reorderContactKernel"); + b3AlignedObjectArray sortDataHost; + m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); + b3AlignedObjectArray inContacts; + b3AlignedObjectArray outContacts; + m_data->m_pBufContactOutGPU->copyToHost(inContacts); + outContacts.resize(inContacts.size()); + for (int i=0;im_solverGPU->m_contactBuffer2->copyFromHost(outContacts); + +/* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" + "{\n" + " int nContacts = cb.x;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int srcIdx = sortData[gIdx].y;\n" + " out[gIdx] = in[srcIdx];\n" + " }\n" + "}\n" + */ + } else { B3_PROFILE("gpu m_reorderContactKernel"); b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), + b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; + b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); @@ -868,15 +926,32 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (nContacts) { - B3_PROFILE("gpu m_copyConstraintKernel"); - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - //we use the clFinish for proper benchmark/profile - clFinish(m_data->m_queue); + + if (useCpuCopyConstraints) + { + for (int i=0;im_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); +// m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); + // m_data->m_pBufContactOutGPU->getBufferCL() + } + + } else + { + B3_PROFILE("gpu m_copyConstraintKernel"); + b3Int4 cdata; cdata.x = nContacts; + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), + b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) + }; + + b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( cdata ); + launcher.launch1D( nContacts, 64 ); + //we use the clFinish for proper benchmark/profile + clFinish(m_data->m_queue); + } } diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp index 0aa1daebb..d52ea4a97 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp @@ -784,6 +784,15 @@ const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const return 0; } +const struct b3SapAabb* b3GpuNarrowPhase::getLocalSpaceAabbsCpu() const +{ + if (m_data->m_localShapeAABBCPU->size()) + { + return &m_data->m_localShapeAABBCPU->at(0); + } + return 0; +} + cl_mem b3GpuNarrowPhase::getAabbLocalSpaceBufferGpu() { diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h index 5d0f9f5ff..e29e362ce 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h @@ -72,6 +72,7 @@ public: const struct b3Collidable* getCollidablesCpu() const; int getNumCollidablesGpu() const; + const struct b3SapAabb* getLocalSpaceAabbsCpu() const; const struct b3Contact4* getContactsCPU() const; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 0fe30dd0c..24f68a7d9 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -25,7 +25,7 @@ subject to the following restrictions: #include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" - +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" //#define TEST_OTHER_GPU_SOLVER @@ -36,6 +36,8 @@ subject to the following restrictions: bool useDbvt = false;//true; bool useBullet2CpuSolver = true; bool dumpContactStats = false; +bool calcWorldSpaceAabbOnCpu = false;//true; +bool useCalculateOverlappingPairsHost = false; #ifdef TEST_OTHER_GPU_SOLVER #include "b3GpuJacobiSolver.h" @@ -240,15 +242,19 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); } else { - m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); - //m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); - + if (useCalculateOverlappingPairsHost) + { + m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); + } else + { + m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); + } numPairs = m_data->m_broadphaseSap->getNumOverlap(); } } //compute contact points - +// printf("numPairs=%d\n",numPairs); int numContacts = 0; @@ -434,6 +440,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep) + void b3GpuRigidBodyPipeline::setupGpuAabbsFull() { cl_int ciErrNum=0; @@ -442,27 +449,75 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull() if (!numBodies) return; - //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) - b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel); - launcher.setConst(numBodies); - cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); - launcher.setBuffer(bodies); - cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu(); - launcher.setBuffer(collidables); - cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu(); - launcher.setBuffer(localAabbs); - - cl_mem worldAabbs =0; - if (useDbvt) + if (calcWorldSpaceAabbOnCpu) { - worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); + + if (numBodies) + { + if (useDbvt) + { + m_data->m_allAabbsCPU.resize(numBodies); + m_data->m_narrowphase->readbackAllBodiesToCpu(); + for (int i=0;im_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(),&m_data->m_allAabbsCPU[0]); + } + m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); + } else + { + m_data->m_broadphaseSap->m_allAabbsCPU.resize(numBodies); + m_data->m_narrowphase->readbackAllBodiesToCpu(); + for (int i=0;im_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(),&m_data->m_broadphaseSap->m_allAabbsCPU[0]); + } + m_data->m_broadphaseSap->m_allAabbsGPU.copyFromHost(m_data->m_broadphaseSap->m_allAabbsCPU); + } + } } else { - worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); + //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) + b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel); + launcher.setConst(numBodies); + cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); + launcher.setBuffer(bodies); + cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu(); + launcher.setBuffer(collidables); + cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu(); + launcher.setBuffer(localAabbs); + + cl_mem worldAabbs =0; + if (useDbvt) + { + worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); + } else + { + worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); + } + launcher.setBuffer(worldAabbs); + launcher.launch1D(numBodies); + + oclCHECKERROR(ciErrNum, CL_SUCCESS); } - launcher.setBuffer(worldAabbs); - launcher.launch1D(numBodies); - oclCHECKERROR(ciErrNum, CL_SUCCESS); + + /* + b3AlignedObjectArray aabbs; + m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs); + + printf("numAabbs = %d\n", aabbs.size()); + + for (int i=0;i1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 0922b1131..175e16d1a 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -25,6 +25,7 @@ static const char* batchingKernelsNewCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -45,6 +46,13 @@ static const char* batchingKernelsNewCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index 464417938..0f70191b0 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -25,6 +25,7 @@ static const char* solverSetupCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -45,6 +46,13 @@ static const char* solverSetupCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index ed30ccf11..d3df012c1 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -25,6 +25,7 @@ static const char* solverSetup2CL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -45,6 +46,13 @@ static const char* solverSetup2CL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index cf3f9a992..e2a907e63 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -25,6 +25,7 @@ static const char* solverUtilsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" "#endif\n" "#endif\n" "#ifdef __cplusplus\n" @@ -45,6 +46,13 @@ static const char* solverUtilsCL= \ " return cross(a1, b1);\n" " }\n" "#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" "#endif //B3_FLOAT4_H\n" "typedef struct b3Contact4Data b3Contact4Data_t;\n" "struct b3Contact4Data\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl index 8c005bf01..9672bdb08 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl @@ -1,195 +1,13 @@ -#define SHAPE_CONVEX_HULL 3 - -typedef float4 Quaternion; - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = (float4)(a.xyz,0.f); - float4 b1 = (float4)(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - -typedef struct -{ - float4 m_row[3]; -} Matrix3x3; - -typedef unsigned int u32; +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct Collidable -{ - int m_unused1; - int m_unused2; - int m_shapeType; - int m_shapeIndex; -} Collidable; - - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - - -__inline -Matrix3x3 qtGetRotationMatrix(float4 quat) -{ - float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f); - Matrix3x3 out; - - out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z); - out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z); - out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y); - out.m_row[0].w = 0.f; - - out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z); - out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z); - out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x); - out.m_row[1].w = 0.f; - - out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y); - out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x); - out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y); - out.m_row[2].w = 0.f; - - return out; -} - - -typedef struct -{ - float fx; - float fy; - float fz; - int uw; -} btAABBCL; - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - - - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - - -__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB) +__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB) { int nodeID = get_global_id(0); - if( nodeID < numNodes ) { - float4 position = gBodies[nodeID].m_pos; - float4 orientation = gBodies[nodeID].m_quat; - - - int collidableIndex = gBodies[nodeID].m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - - if (shapeIndex>=0) - { - btAABBCL minAabb = plocalShapeAABB[collidableIndex*2]; - btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1]; - - float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f; - float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f; - - float4 worldCenter = transform(&localCenter,&position,&orientation); - - Matrix3x3 abs_b = qtGetRotationMatrix(orientation); - float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f); - - - pAABB[nodeID*2].fx = worldCenter.x-extent.x; - pAABB[nodeID*2].fy = worldCenter.y-extent.y; - pAABB[nodeID*2].fz = worldCenter.z-extent.z; - pAABB[nodeID*2].uw = nodeID; - - pAABB[nodeID*2+1].fx = worldCenter.x+extent.x; - pAABB[nodeID*2+1].fy = worldCenter.y+extent.y; - pAABB[nodeID*2+1].fz = worldCenter.z+extent.z; - pAABB[nodeID*2+1].uw = gBodies[nodeID].m_invMass==0.f? 0 : 1; - } - } + b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB); + } } diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index eef8e4659..6964e9e2a 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -1,167 +1,313 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project static const char* updateAabbsKernelCL= \ -"#define SHAPE_CONVEX_HULL 3\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" +"#ifndef B3_UPDATE_AABBS_H\n" +"#define B3_UPDATE_AABBS_H\n" +"#ifndef B3_AABB_H\n" +"#define B3_AABB_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" "{\n" -" return cross(a,b);\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3Fabs fabs\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" "}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" "{\n" -" float4 a1 = (float4)(a.xyz,0.f);\n" -" float4 b1 = (float4)(b.xyz,0.f);\n" -" return dot(a1, b1);\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" "}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" "{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" " ans += a.w*b+b.w*a;\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" " return ans;\n" "}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" +"inline b3Quat b3QuatNormalize(b3QuatConstArg in)\n" "{\n" -" return (Quaternion)(-q.xyz, q.w);\n" +" return b3FastNormalize4(in);\n" "}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" "{\n" -" Quaternion qInv = qtInvert( q );\n" +" b3Quat qInv = b3QuatInvert( q );\n" " float4 vcpy = vec;\n" " vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" " return out;\n" "}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" "{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" +" return (b3Quat)(-q.xyz, q.w);\n" "}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" "typedef struct\n" "{\n" -" float4 m_row[3];\n" -"} Matrix3x3;\n" -"typedef unsigned int u32;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct Collidable\n" -"{\n" -" int m_unused1;\n" -" int m_unused2;\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -"} Collidable;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"__inline\n" -"Matrix3x3 qtGetRotationMatrix(float4 quat)\n" +" float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" "{\n" " float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" Matrix3x3 out;\n" -" out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);\n" -" out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);\n" -" out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" " out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);\n" -" out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);\n" -" out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" " out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);\n" -" out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);\n" -" out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" " out.m_row[2].w = 0.f;\n" " return out;\n" "}\n" -"typedef struct \n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" "{\n" -" float fx;\n" -" float fy;\n" -" float fz;\n" -" int uw;\n" -"} btAABBCL;\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" " return out;\n" "}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3Aabb b3Aabb_t;\n" +"struct b3Aabb\n" "{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" +" union\n" " {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" +" float m_min[4];\n" +" b3Float4 m_minVec;\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float m_max[4];\n" +" b3Float4 m_maxVec;\n" +" int m_signedMaxIndices[4];\n" +" };\n" +"};\n" +"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +" b3Float4ConstArg pos,\n" +" b3QuatConstArg orn,\n" +" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +"{\n" +" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +" b3Mat3x3 m;\n" +" m = b3QuatGetRotationMatrix(orn);\n" +" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +" \n" +" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +" 0.f);\n" +" *aabbMinOut = center-extent;\n" +" *aabbMaxOut = center+extent;\n" "}\n" -"__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n" +"/// conservative test for overlap between two aabbs\n" +"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +"{\n" +" bool overlap = true;\n" +" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +" return overlap;\n" +"}\n" +"#endif //B3_AABB_H\n" +"#ifndef B3_COLLIDABLE_H\n" +"#define B3_COLLIDABLE_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"enum b3ShapeTypes\n" +"{\n" +" SHAPE_HEIGHT_FIELD=1,\n" +" SHAPE_CONVEX_HULL=3,\n" +" SHAPE_PLANE=4,\n" +" SHAPE_CONCAVE_TRIMESH=5,\n" +" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +" SHAPE_SPHERE=7,\n" +" MAX_NUM_SHAPE_TYPES,\n" +"};\n" +"typedef struct b3Collidable b3Collidable_t;\n" +"struct b3Collidable\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" union\n" +" {\n" +" float m_radius;\n" +" int m_compoundBvhIndex;\n" +" };\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +"};\n" +"struct b3GpuChildShape\n" +"{\n" +" b3Float4 m_childPosition;\n" +" b3Quat m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"struct b3CompoundOverlappingPair\n" +"{\n" +" int m_bodyIndexA;\n" +" int m_bodyIndexB;\n" +"// int m_pairType;\n" +" int m_childShapeIndexA;\n" +" int m_childShapeIndexB;\n" +"};\n" +"#endif //B3_COLLIDABLE_H\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#define B3_RIGIDBODY_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +"struct b3RigidBodyData\n" +"{\n" +" b3Float4 m_pos;\n" +" b3Quat m_quat;\n" +" b3Float4 m_linVel;\n" +" b3Float4 m_angVel;\n" +" int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"};\n" +" \n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" +"{\n" +" __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" +" b3Float4 position = body->m_pos;\n" +" b3Quat orientation = body->m_quat;\n" +" \n" +" int collidableIndex = body->m_collidableIdx;\n" +" int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" +" \n" +" if (shapeIndex>=0)\n" +" {\n" +" \n" +" b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" +" b3Aabb_t worldAabb;\n" +" \n" +" b3Float4 aabbAMinOut,aabbAMaxOut; \n" +" float margin = 0.f;\n" +" b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" +" \n" +" worldAabb.m_minVec =aabbAMinOut;\n" +" worldAabb.m_minIndices[3] = bodyId;\n" +" worldAabb.m_maxVec = aabbAMaxOut;\n" +" worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" +" worldAabbs[bodyId] = worldAabb;\n" +" }\n" +"}\n" +"#endif //B3_UPDATE_AABBS_H\n" +"__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" "{\n" " int nodeID = get_global_id(0);\n" -" \n" " if( nodeID < numNodes )\n" " {\n" -" float4 position = gBodies[nodeID].m_pos;\n" -" float4 orientation = gBodies[nodeID].m_quat;\n" -" \n" -" \n" -" int collidableIndex = gBodies[nodeID].m_collidableIdx;\n" -" int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" -" \n" -" if (shapeIndex>=0)\n" -" {\n" -" btAABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n" -" btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n" -" \n" -" float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;\n" -" float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;\n" -" \n" -" float4 worldCenter = transform(&localCenter,&position,&orientation);\n" -" \n" -" Matrix3x3 abs_b = qtGetRotationMatrix(orientation);\n" -" float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);\n" -" \n" -" \n" -" pAABB[nodeID*2].fx = worldCenter.x-extent.x;\n" -" pAABB[nodeID*2].fy = worldCenter.y-extent.y;\n" -" pAABB[nodeID*2].fz = worldCenter.z-extent.z;\n" -" pAABB[nodeID*2].uw = nodeID;\n" -" \n" -" pAABB[nodeID*2+1].fx = worldCenter.x+extent.x;\n" -" pAABB[nodeID*2+1].fy = worldCenter.y+extent.y;\n" -" pAABB[nodeID*2+1].fz = worldCenter.z+extent.z;\n" -" pAABB[nodeID*2+1].uw = gBodies[nodeID].m_invMass==0.f? 0 : 1;\n" -" }\n" -" } \n" +" b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" +" }\n" "}\n" ;