From 4dcd52c0904be63695c28a429ace46c8f50ed927 Mon Sep 17 00:00:00 2001 From: Jackson Lee Date: Wed, 26 Feb 2014 15:38:59 -0800 Subject: [PATCH] Add only small AABBs to PLBVH, add large AABB support. --- .../b3GpuBroadphaseInterface.h | 3 + .../b3GpuGridBroadphase.cpp | 12 +- .../BroadphaseCollision/b3GpuGridBroadphase.h | 3 + .../b3GpuParallelLinearBvh.cpp | 179 ++++++++++++++---- .../b3GpuParallelLinearBvh.h | 16 +- .../b3GpuParallelLinearBvhBroadphase.cpp | 40 +++- .../b3GpuParallelLinearBvhBroadphase.h | 11 +- .../b3GpuSapBroadphase.cpp | 9 + .../BroadphaseCollision/b3GpuSapBroadphase.h | 2 + .../kernels/parallelLinearBvh.cl | 61 ++++++ .../kernels/parallelLinearBvhKernels.h | 57 ++++++ src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 3 +- 12 files changed, 350 insertions(+), 46 deletions(-) diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h index 3598ffb56..bcbf09a4f 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h @@ -34,6 +34,9 @@ public: virtual b3OpenCLArray& getAllAabbsGPU()=0; virtual b3AlignedObjectArray& getAllAabbsCPU()=0; + + virtual b3OpenCLArray& getSmallAabbIndicesGPU() = 0; + virtual b3OpenCLArray& getLargeAabbIndicesGPU() = 0; }; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp index f5308ecf4..1e707f364 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp @@ -366,4 +366,14 @@ b3OpenCLArray& b3GpuGridBroadphase::getAllAabbsGPU() b3AlignedObjectArray& b3GpuGridBroadphase::getAllAabbsCPU() { return m_allAabbsCPU1; -} \ No newline at end of file +} + +b3OpenCLArray& b3GpuGridBroadphase::getSmallAabbIndicesGPU() +{ + return m_smallAabbsMappingGPU; +} +b3OpenCLArray& b3GpuGridBroadphase::getLargeAabbIndicesGPU() +{ + return m_largeAabbsMappingGPU; +} + diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h index 4dd5c3a3c..c2c66ce7c 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h @@ -78,6 +78,9 @@ public: virtual b3OpenCLArray& getAllAabbsGPU(); virtual b3AlignedObjectArray& getAllAabbsCPU(); + + virtual b3OpenCLArray& getSmallAabbIndicesGPU(); + virtual b3OpenCLArray& getLargeAabbIndicesGPU(); }; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp index c6169ee8e..dd5557360 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp @@ -34,7 +34,9 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_leafNodeParentNodes(context, queue), m_mortonCodesAndAabbIndicies(context, queue), m_mergedAabb(context, queue), - m_leafNodeAabbs(context, queue) + m_leafNodeAabbs(context, queue), + + m_largeAabbs(context, queue) { m_rootNodeIndex.resize(1); @@ -47,6 +49,8 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH); b3Assert(m_parallelLinearBvhProgram); + m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_separateAabbsKernel); m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_findAllNodesMergedAabbKernel); m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros ); @@ -61,10 +65,15 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id b3Assert(m_plbvhCalculateOverlappingPairsKernel); m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_plbvhRayTraverseKernel); + m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_plbvhLargeAabbAabbTestKernel); + m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_plbvhLargeAabbRayTestKernel); } b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() { + clReleaseKernel(m_separateAabbsKernel); clReleaseKernel(m_findAllNodesMergedAabbKernel); clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel); clReleaseKernel(m_constructBinaryTreeKernel); @@ -72,18 +81,68 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); clReleaseKernel(m_plbvhRayTraverseKernel); + clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); + clReleaseKernel(m_plbvhLargeAabbRayTestKernel); clReleaseProgram(m_parallelLinearBvhProgram); } -void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAabbs) +void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAabbs, const b3OpenCLArray& smallAabbIndices, + const b3OpenCLArray& largeAabbIndices) { B3_PROFILE("b3ParallelLinearBvh::build()"); - m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs); + int numLargeAabbs = largeAabbIndices.size(); + int numSmallAabbs = smallAabbIndices.size(); + + //Since all AABBs(both large and small) are input as a contiguous array, + //with 2 additional arrays used to indicate the indices of large and small AABBs, + //it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH. + { + B3_PROFILE("Separate large and small AABBs"); + + m_largeAabbs.resize(numLargeAabbs); + m_leafNodeAabbs.resize(numSmallAabbs); + + //Write large AABBs into m_largeAabbs + { + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ), + b3BufferInfoCL( largeAabbIndices.getBufferCL() ), + + b3BufferInfoCL( m_largeAabbs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numLargeAabbs); + + launcher.launch1D(numLargeAabbs); + } + + //Write small AABBs into m_leafNodeAabbs + { + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ), + b3BufferInfoCL( smallAabbIndices.getBufferCL() ), + + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numSmallAabbs); + + launcher.launch1D(numSmallAabbs); + } + + clFinish(m_queue); + } // - int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid body AABBs + int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs int numInternalNodes = numLeaves - 1; if(numLeaves < 2) @@ -105,12 +164,14 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab m_mergedAabb.resize(numLeaves); } + + //Find the AABB of all input AABBs; this is used to define the size of //each cell in the virtual grid(2^10 cells in each dimension). { B3_PROFILE("Find AABB of merged nodes"); - m_mergedAabb.copyFromOpenCLArray(worldSpaceAabbs); //Need to make a copy since the kernel modifies the array + m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array for(int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2; numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2) @@ -172,6 +233,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAab clFinish(m_queue); } + // constructSimpleBinaryTree(); } @@ -184,10 +246,10 @@ void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray& out_n int reset = 0; out_numPairs.copyFromHostPointer(&reset, 1); - if( m_leafNodeAabbs.size() < 2 ) return; - + // + if( m_leafNodeAabbs.size() > 1 ) { - B3_PROFILE("PLBVH calculateOverlappingPairs"); + B3_PROFILE("PLBVH small-small AABB test"); int numQueryAabbs = m_leafNodeAabbs.size(); @@ -214,6 +276,32 @@ void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray& out_n clFinish(m_queue); } + int numLargeAabbRigids = m_largeAabbs.size(); + if( numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0 ) + { + B3_PROFILE("PLBVH large-small AABB test"); + + int numQueryAabbs = m_leafNodeAabbs.size(); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_largeAabbs.getBufferCL() ), + + b3BufferInfoCL( out_numPairs.getBufferCL() ), + b3BufferInfoCL( out_overlappingPairs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(maxPairs); + launcher.setConst(numLargeAabbRigids); + launcher.setConst(numQueryAabbs); + + launcher.launch1D(numQueryAabbs); + clFinish(m_queue); + } + // int numPairs = -1; @@ -240,32 +328,59 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray 0 ) { - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - - b3BufferInfoCL( rays.getBufferCL() ), - - b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), - b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) - }; + B3_PROFILE("PLBVH ray test small AABB"); - b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), + b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), + + b3BufferInfoCL( rays.getBufferCL() ), + + b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), + b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(maxRayRigidPairs); + launcher.setConst(numRays); + + launcher.launch1D(numRays); + clFinish(m_queue); + } + int numLargeAabbRigids = m_largeAabbs.size(); + if(numLargeAabbRigids > 0) + { + B3_PROFILE("PLBVH ray test large AABB"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_largeAabbs.getBufferCL() ), + b3BufferInfoCL( rays.getBufferCL() ), + + b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), + b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numLargeAabbRigids); + launcher.setConst(maxRayRigidPairs); + launcher.setConst(numRays); + + launcher.launch1D(numRays); + clFinish(m_queue); + } // int numRayRigidPairs = -1; @@ -278,7 +393,7 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray m_numNodesPerLevelGpu; b3OpenCLArray m_firstIndexOffsetPerLevelGpu; - //1 element per internal node (number_of_internal_nodes = number_of_leaves - 1); index 0 is the root node + //1 element per internal node (number_of_internal_nodes = number_of_leaves - 1) b3OpenCLArray m_internalNodeAabbs; b3OpenCLArray m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child b3OpenCLArray m_internalNodeParentNodes; - //1 element per leaf node + //1 element per leaf node (leaf nodes only include small AABBs) b3OpenCLArray m_leafNodeParentNodes; b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index b3OpenCLArray m_mergedAabb; - b3OpenCLArray m_leafNodeAabbs; + b3OpenCLArray m_leafNodeAabbs; //Contains only small AABBs + + //1 element per large AABB + b3OpenCLArray m_largeAabbs; //Not stored in the BVH public: b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue); virtual ~b3GpuParallelLinearBvh(); - void build(const b3OpenCLArray& worldSpaceAabbs); + void build(const b3OpenCLArray& worldSpaceAabbs, const b3OpenCLArray& smallAabbIndices, + const b3OpenCLArray& largeAabbIndices); ///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses ///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp index 6d7293e2c..b48c2c1de 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp @@ -16,37 +16,67 @@ subject to the following restrictions: b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) : m_plbvh(context, device, queue), + m_numOverlappingPairs(context, queue), m_overlappingPairsGpu(context, queue), + m_aabbsGpu(context, queue), - m_tempNumPairs(context, queue) + m_smallAabbsMappingGpu(context, queue), + m_largeAabbsMappingGpu(context, queue) { - m_tempNumPairs.resize(1); + m_numOverlappingPairs.resize(1); } void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask) { + int newAabbIndex = m_aabbsCpu.size(); + b3SapAabb aabb; aabb.m_minVec = aabbMin; aabb.m_maxVec = aabbMax; + aabb.m_minIndices[3] = userPtr; + aabb.m_signedMaxIndices[3] = newAabbIndex; + + m_smallAabbsMappingCpu.push_back(newAabbIndex); m_aabbsCpu.push_back(aabb); } void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask) { - b3Assert(0); //Not implemented + int newAabbIndex = m_aabbsCpu.size(); + + b3SapAabb aabb; + aabb.m_minVec = aabbMin; + aabb.m_maxVec = aabbMax; + + aabb.m_minIndices[3] = userPtr; + aabb.m_signedMaxIndices[3] = newAabbIndex; + + m_largeAabbsMappingCpu.push_back(newAabbIndex); + + m_aabbsCpu.push_back(aabb); } void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs) { //Reconstruct BVH - m_plbvh.build(m_aabbsGpu); + m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu); // m_overlappingPairsGpu.resize(maxPairs); - m_plbvh.calculateOverlappingPairs(m_tempNumPairs, m_overlappingPairsGpu); + m_plbvh.calculateOverlappingPairs(m_numOverlappingPairs, m_overlappingPairsGpu); } void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs) { b3Assert(0); //CPU version not implemented } + +void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu() +{ + m_aabbsGpu.copyFromHost(m_aabbsCpu); + m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu); + m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu); +} + + + diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h index 2280b48ce..9155bc1a7 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h @@ -22,11 +22,16 @@ class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface { b3GpuParallelLinearBvh m_plbvh; + b3OpenCLArray m_numOverlappingPairs; b3OpenCLArray m_overlappingPairsGpu; + b3OpenCLArray m_aabbsGpu; - b3OpenCLArray m_tempNumPairs; + b3OpenCLArray m_smallAabbsMappingGpu; + b3OpenCLArray m_largeAabbsMappingGpu; b3AlignedObjectArray m_aabbsCpu; + b3AlignedObjectArray m_smallAabbsMappingCpu; + b3AlignedObjectArray m_largeAabbsMappingCpu; public: b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue); @@ -39,13 +44,15 @@ public: virtual void calculateOverlappingPairsHost(int maxPairs); //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu() { m_aabbsGpu.copyFromHost(m_aabbsCpu); } + virtual void writeAabbsToGpu(); virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); } virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); } virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); } virtual b3OpenCLArray& getAllAabbsGPU() { return m_aabbsGpu; } + virtual b3OpenCLArray& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; } + virtual b3OpenCLArray& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; } virtual b3AlignedObjectArray& getAllAabbsCPU() { return m_aabbsCpu; } diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 529951af0..5ab7bf0db 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -1307,3 +1307,12 @@ cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer() { return m_overlappingPairs.getBufferCL(); } + +b3OpenCLArray& b3GpuSapBroadphase::getSmallAabbIndicesGPU() +{ + return m_smallAabbsMappingGPU; +} +b3OpenCLArray& b3GpuSapBroadphase::getLargeAabbIndicesGPU() +{ + return m_largeAabbsMappingGPU; +} diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h index 2d3d39367..7cbf6c7fc 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h @@ -143,6 +143,8 @@ public: virtual int getNumOverlap(); virtual cl_mem getOverlappingPairBuffer(); + virtual b3OpenCLArray& getSmallAabbIndicesGPU(); + virtual b3OpenCLArray& getLargeAabbIndicesGPU(); }; #endif //B3_GPU_SAP_BROADPHASE_H \ No newline at end of file diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index 3b634328a..e2380145e 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -81,6 +81,15 @@ unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z) return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2; } +__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate) +{ + int separatedAabbIndex = get_global_id(0); + if(separatedAabbIndex >= numAabbsToSeparate) return; + + int unseparatedAabbIndex = aabbIndices[separatedAabbIndex]; + out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex]; +} + //Should replace with an optimized parallel reduction __kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge) { @@ -506,3 +515,55 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, } } +__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, + __global int* out_numPairs, __global int4* out_overlappingPairs, + int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids) +{ + int smallAabbIndex = get_global_id(0); + if(smallAabbIndex >= numSmallAabbRigids) return; + + b3AabbCL smallAabb = smallAabbs[smallAabbIndex]; + for(int i = 0; i < numLargeAabbRigids; ++i) + { + b3AabbCL largeAabb = largeAabbs[i]; + if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) ) + { + int4 pair; + pair.x = smallAabb.m_minIndices[3]; + pair.y = largeAabb.m_minIndices[3]; + pair.z = NEW_PAIR_MARKER; + pair.w = NEW_PAIR_MARKER; + + int pairIndex = atomic_inc(out_numPairs); + if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; + } + } +} +__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays, + __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs, + int numLargeAabbRigids, int maxRayRigidPairs, int numRays) +{ + int rayIndex = get_global_id(0); + if(rayIndex >= numRays) return; + + b3Vector3 rayFrom = rays[rayIndex].m_from; + b3Vector3 rayTo = rays[rayIndex].m_to; + b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); + b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); + + for(int i = 0; i < numLargeAabbRigids; ++i) + { + b3AabbCL rigidAabb = largeRigidAabbs[i]; + if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) ) + { + int2 rayRigidPair; + rayRigidPair.x = rayIndex; + rayRigidPair.y = rigidAabb.m_minIndices[3]; + + int pairIndex = atomic_inc(out_numRayRigidPairs); + if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; + } + } +} + + diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index b894e2a11..cfb477d03 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -76,6 +76,13 @@ static const char* parallelLinearBvhCL= \ "{\n" " return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n" "}\n" +"__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n" +"{\n" +" int separatedAabbIndex = get_global_id(0);\n" +" if(separatedAabbIndex >= numAabbsToSeparate) return;\n" +" int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n" +" out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n" +"}\n" "//Should replace with an optimized parallel reduction\n" "__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n" "{\n" @@ -481,4 +488,54 @@ static const char* parallelLinearBvhCL= \ " }\n" " }\n" "}\n" +"__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n" +" __global int* out_numPairs, __global int4* out_overlappingPairs, \n" +" int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n" +"{\n" +" int smallAabbIndex = get_global_id(0);\n" +" if(smallAabbIndex >= numSmallAabbRigids) return;\n" +" \n" +" b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n" +" for(int i = 0; i < numLargeAabbRigids; ++i)\n" +" {\n" +" b3AabbCL largeAabb = largeAabbs[i];\n" +" if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n" +" {\n" +" int4 pair;\n" +" pair.x = smallAabb.m_minIndices[3];\n" +" pair.y = largeAabb.m_minIndices[3];\n" +" pair.z = NEW_PAIR_MARKER;\n" +" pair.w = NEW_PAIR_MARKER;\n" +" \n" +" int pairIndex = atomic_inc(out_numPairs);\n" +" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" +" }\n" +" }\n" +"}\n" +"__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n" +" __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n" +" int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n" +"{\n" +" int rayIndex = get_global_id(0);\n" +" if(rayIndex >= numRays) return;\n" +" \n" +" b3Vector3 rayFrom = rays[rayIndex].m_from;\n" +" b3Vector3 rayTo = rays[rayIndex].m_to;\n" +" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" +" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" \n" +" for(int i = 0; i < numLargeAabbRigids; ++i)\n" +" {\n" +" b3AabbCL rigidAabb = largeRigidAabbs[i];\n" +" if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n" +" {\n" +" int2 rayRigidPair;\n" +" rayRigidPair.x = rayIndex;\n" +" rayRigidPair.y = rigidAabb.m_minIndices[3];\n" +" \n" +" int pairIndex = atomic_inc(out_numRayRigidPairs);\n" +" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" +" }\n" +" }\n" +"}\n" ; diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index 686a7f835..294a20f74 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -305,8 +305,7 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align } else { - //printf("broadphase->getAllAabbsGPU().size(): %d \n", broadphase->getAllAabbsGPU().size()); - m_data->m_plbvh->build( broadphase->getAllAabbsGPU() ); + m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);