Add only small AABBs to PLBVH, add large AABB support.
This commit is contained in:
@@ -35,6 +35,9 @@ public:
|
|||||||
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()=0;
|
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()=0;
|
||||||
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()=0;
|
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()=0;
|
||||||
|
|
||||||
|
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0;
|
||||||
|
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //B3_GPU_BROADPHASE_INTERFACE_H
|
#endif //B3_GPU_BROADPHASE_INTERFACE_H
|
||||||
|
|||||||
@@ -367,3 +367,13 @@ b3AlignedObjectArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsCPU()
|
|||||||
{
|
{
|
||||||
return m_allAabbsCPU1;
|
return m_allAabbsCPU1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b3OpenCLArray<int>& b3GpuGridBroadphase::getSmallAabbIndicesGPU()
|
||||||
|
{
|
||||||
|
return m_smallAabbsMappingGPU;
|
||||||
|
}
|
||||||
|
b3OpenCLArray<int>& b3GpuGridBroadphase::getLargeAabbIndicesGPU()
|
||||||
|
{
|
||||||
|
return m_largeAabbsMappingGPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,9 @@ public:
|
|||||||
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU();
|
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU();
|
||||||
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU();
|
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU();
|
||||||
|
|
||||||
|
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
|
||||||
|
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //B3_GPU_GRID_BROADPHASE_H
|
#endif //B3_GPU_GRID_BROADPHASE_H
|
||||||
@@ -34,7 +34,9 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
|||||||
m_leafNodeParentNodes(context, queue),
|
m_leafNodeParentNodes(context, queue),
|
||||||
m_mortonCodesAndAabbIndicies(context, queue),
|
m_mortonCodesAndAabbIndicies(context, queue),
|
||||||
m_mergedAabb(context, queue),
|
m_mergedAabb(context, queue),
|
||||||
m_leafNodeAabbs(context, queue)
|
m_leafNodeAabbs(context, queue),
|
||||||
|
|
||||||
|
m_largeAabbs(context, queue)
|
||||||
{
|
{
|
||||||
m_rootNodeIndex.resize(1);
|
m_rootNodeIndex.resize(1);
|
||||||
|
|
||||||
@@ -47,6 +49,8 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
|||||||
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
|
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
|
||||||
b3Assert(m_parallelLinearBvhProgram);
|
b3Assert(m_parallelLinearBvhProgram);
|
||||||
|
|
||||||
|
m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
|
b3Assert(m_separateAabbsKernel);
|
||||||
m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
b3Assert(m_findAllNodesMergedAabbKernel);
|
b3Assert(m_findAllNodesMergedAabbKernel);
|
||||||
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
@@ -61,10 +65,15 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
|||||||
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
||||||
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
b3Assert(m_plbvhRayTraverseKernel);
|
b3Assert(m_plbvhRayTraverseKernel);
|
||||||
|
m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
|
b3Assert(m_plbvhLargeAabbAabbTestKernel);
|
||||||
|
m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
|
b3Assert(m_plbvhLargeAabbRayTestKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
|
b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
|
||||||
{
|
{
|
||||||
|
clReleaseKernel(m_separateAabbsKernel);
|
||||||
clReleaseKernel(m_findAllNodesMergedAabbKernel);
|
clReleaseKernel(m_findAllNodesMergedAabbKernel);
|
||||||
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||||
clReleaseKernel(m_constructBinaryTreeKernel);
|
clReleaseKernel(m_constructBinaryTreeKernel);
|
||||||
@@ -72,18 +81,68 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
|
|||||||
|
|
||||||
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
||||||
clReleaseKernel(m_plbvhRayTraverseKernel);
|
clReleaseKernel(m_plbvhRayTraverseKernel);
|
||||||
|
clReleaseKernel(m_plbvhLargeAabbAabbTestKernel);
|
||||||
|
clReleaseKernel(m_plbvhLargeAabbRayTestKernel);
|
||||||
|
|
||||||
clReleaseProgram(m_parallelLinearBvhProgram);
|
clReleaseProgram(m_parallelLinearBvhProgram);
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs)
|
void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
|
||||||
|
const b3OpenCLArray<int>& largeAabbIndices)
|
||||||
{
|
{
|
||||||
B3_PROFILE("b3ParallelLinearBvh::build()");
|
B3_PROFILE("b3ParallelLinearBvh::build()");
|
||||||
|
|
||||||
m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs);
|
int numLargeAabbs = largeAabbIndices.size();
|
||||||
|
int numSmallAabbs = smallAabbIndices.size();
|
||||||
|
|
||||||
|
//Since all AABBs(both large and small) are input as a contiguous array,
|
||||||
|
//with 2 additional arrays used to indicate the indices of large and small AABBs,
|
||||||
|
//it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH.
|
||||||
|
{
|
||||||
|
B3_PROFILE("Separate large and small AABBs");
|
||||||
|
|
||||||
|
m_largeAabbs.resize(numLargeAabbs);
|
||||||
|
m_leafNodeAabbs.resize(numSmallAabbs);
|
||||||
|
|
||||||
|
//Write large AABBs into m_largeAabbs
|
||||||
|
{
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( largeAabbIndices.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( m_largeAabbs.getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(numLargeAabbs);
|
||||||
|
|
||||||
|
launcher.launch1D(numLargeAabbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Write small AABBs into m_leafNodeAabbs
|
||||||
|
{
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( smallAabbIndices.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(numSmallAabbs);
|
||||||
|
|
||||||
|
launcher.launch1D(numSmallAabbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
clFinish(m_queue);
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid body AABBs
|
int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs
|
||||||
int numInternalNodes = numLeaves - 1;
|
int numInternalNodes = numLeaves - 1;
|
||||||
|
|
||||||
if(numLeaves < 2)
|
if(numLeaves < 2)
|
||||||
@@ -105,12 +164,14 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
|||||||
m_mergedAabb.resize(numLeaves);
|
m_mergedAabb.resize(numLeaves);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Find the AABB of all input AABBs; this is used to define the size of
|
//Find the AABB of all input AABBs; this is used to define the size of
|
||||||
//each cell in the virtual grid(2^10 cells in each dimension).
|
//each cell in the virtual grid(2^10 cells in each dimension).
|
||||||
{
|
{
|
||||||
B3_PROFILE("Find AABB of merged nodes");
|
B3_PROFILE("Find AABB of merged nodes");
|
||||||
|
|
||||||
m_mergedAabb.copyFromOpenCLArray(worldSpaceAabbs); //Need to make a copy since the kernel modifies the array
|
m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array
|
||||||
|
|
||||||
for(int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2;
|
for(int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2;
|
||||||
numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2)
|
numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2)
|
||||||
@@ -172,6 +233,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
|||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
constructSimpleBinaryTree();
|
constructSimpleBinaryTree();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -184,10 +246,10 @@ void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<int>& out_n
|
|||||||
int reset = 0;
|
int reset = 0;
|
||||||
out_numPairs.copyFromHostPointer(&reset, 1);
|
out_numPairs.copyFromHostPointer(&reset, 1);
|
||||||
|
|
||||||
if( m_leafNodeAabbs.size() < 2 ) return;
|
//
|
||||||
|
if( m_leafNodeAabbs.size() > 1 )
|
||||||
{
|
{
|
||||||
B3_PROFILE("PLBVH calculateOverlappingPairs");
|
B3_PROFILE("PLBVH small-small AABB test");
|
||||||
|
|
||||||
int numQueryAabbs = m_leafNodeAabbs.size();
|
int numQueryAabbs = m_leafNodeAabbs.size();
|
||||||
|
|
||||||
@@ -214,6 +276,32 @@ void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<int>& out_n
|
|||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int numLargeAabbRigids = m_largeAabbs.size();
|
||||||
|
if( numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0 )
|
||||||
|
{
|
||||||
|
B3_PROFILE("PLBVH large-small AABB test");
|
||||||
|
|
||||||
|
int numQueryAabbs = m_leafNodeAabbs.size();
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_largeAabbs.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( out_numPairs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( out_overlappingPairs.getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(maxPairs);
|
||||||
|
launcher.setConst(numLargeAabbRigids);
|
||||||
|
launcher.setConst(numQueryAabbs);
|
||||||
|
|
||||||
|
launcher.launch1D(numQueryAabbs);
|
||||||
|
clFinish(m_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
int numPairs = -1;
|
int numPairs = -1;
|
||||||
@@ -240,7 +328,10 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
|
|||||||
int reset = 0;
|
int reset = 0;
|
||||||
out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
|
out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
|
||||||
|
|
||||||
if( m_leafNodeAabbs.size() < 1 ) return;
|
//
|
||||||
|
if( m_leafNodeAabbs.size() > 0 )
|
||||||
|
{
|
||||||
|
B3_PROFILE("PLBVH ray test small AABB");
|
||||||
|
|
||||||
b3BufferInfoCL bufferInfo[] =
|
b3BufferInfoCL bufferInfo[] =
|
||||||
{
|
{
|
||||||
@@ -265,7 +356,31 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
|
|||||||
|
|
||||||
launcher.launch1D(numRays);
|
launcher.launch1D(numRays);
|
||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
int numLargeAabbRigids = m_largeAabbs.size();
|
||||||
|
if(numLargeAabbRigids > 0)
|
||||||
|
{
|
||||||
|
B3_PROFILE("PLBVH ray test large AABB");
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
|
{
|
||||||
|
b3BufferInfoCL( m_largeAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( rays.getBufferCL() ),
|
||||||
|
|
||||||
|
b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( out_rayRigidPairs.getBufferCL() )
|
||||||
|
};
|
||||||
|
|
||||||
|
b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel");
|
||||||
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
|
launcher.setConst(numLargeAabbRigids);
|
||||||
|
launcher.setConst(maxRayRigidPairs);
|
||||||
|
launcher.setConst(numRays);
|
||||||
|
|
||||||
|
launcher.launch1D(numRays);
|
||||||
|
clFinish(m_queue);
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
int numRayRigidPairs = -1;
|
int numRayRigidPairs = -1;
|
||||||
@@ -278,7 +393,7 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
|
|||||||
|
|
||||||
void b3GpuParallelLinearBvh::constructSimpleBinaryTree()
|
void b3GpuParallelLinearBvh::constructSimpleBinaryTree()
|
||||||
{
|
{
|
||||||
int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid body AABBs
|
int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid bodies with small AABBs
|
||||||
int numInternalNodes = numLeaves - 1;
|
int numInternalNodes = numLeaves - 1;
|
||||||
|
|
||||||
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ class b3GpuParallelLinearBvh
|
|||||||
|
|
||||||
cl_program m_parallelLinearBvhProgram;
|
cl_program m_parallelLinearBvhProgram;
|
||||||
|
|
||||||
|
cl_kernel m_separateAabbsKernel;
|
||||||
cl_kernel m_findAllNodesMergedAabbKernel;
|
cl_kernel m_findAllNodesMergedAabbKernel;
|
||||||
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
||||||
|
|
||||||
@@ -63,6 +64,9 @@ class b3GpuParallelLinearBvh
|
|||||||
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
||||||
cl_kernel m_plbvhRayTraverseKernel;
|
cl_kernel m_plbvhRayTraverseKernel;
|
||||||
|
|
||||||
|
cl_kernel m_plbvhLargeAabbAabbTestKernel;
|
||||||
|
cl_kernel m_plbvhLargeAabbRayTestKernel;
|
||||||
|
|
||||||
b3FillCL m_fill;
|
b3FillCL m_fill;
|
||||||
b3RadixSort32CL m_radixSorter;
|
b3RadixSort32CL m_radixSorter;
|
||||||
|
|
||||||
@@ -75,23 +79,27 @@ class b3GpuParallelLinearBvh
|
|||||||
b3OpenCLArray<int> m_numNodesPerLevelGpu;
|
b3OpenCLArray<int> m_numNodesPerLevelGpu;
|
||||||
b3OpenCLArray<int> m_firstIndexOffsetPerLevelGpu;
|
b3OpenCLArray<int> m_firstIndexOffsetPerLevelGpu;
|
||||||
|
|
||||||
//1 element per internal node (number_of_internal_nodes = number_of_leaves - 1); index 0 is the root node
|
//1 element per internal node (number_of_internal_nodes = number_of_leaves - 1)
|
||||||
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
|
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
|
||||||
b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
|
b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
|
||||||
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
|
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
|
||||||
b3OpenCLArray<int> m_internalNodeParentNodes;
|
b3OpenCLArray<int> m_internalNodeParentNodes;
|
||||||
|
|
||||||
//1 element per leaf node
|
//1 element per leaf node (leaf nodes only include small AABBs)
|
||||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
||||||
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
||||||
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs;
|
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs
|
||||||
|
|
||||||
|
//1 element per large AABB
|
||||||
|
b3OpenCLArray<b3SapAabb> m_largeAabbs; //Not stored in the BVH
|
||||||
|
|
||||||
public:
|
public:
|
||||||
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
|
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
|
||||||
virtual ~b3GpuParallelLinearBvh();
|
virtual ~b3GpuParallelLinearBvh();
|
||||||
|
|
||||||
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs);
|
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
|
||||||
|
const b3OpenCLArray<int>& largeAabbIndices);
|
||||||
|
|
||||||
///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses
|
///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses
|
||||||
///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
|
///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
|
||||||
|
|||||||
@@ -16,37 +16,67 @@ subject to the following restrictions:
|
|||||||
b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) :
|
b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||||
m_plbvh(context, device, queue),
|
m_plbvh(context, device, queue),
|
||||||
|
|
||||||
|
m_numOverlappingPairs(context, queue),
|
||||||
m_overlappingPairsGpu(context, queue),
|
m_overlappingPairsGpu(context, queue),
|
||||||
|
|
||||||
m_aabbsGpu(context, queue),
|
m_aabbsGpu(context, queue),
|
||||||
m_tempNumPairs(context, queue)
|
m_smallAabbsMappingGpu(context, queue),
|
||||||
|
m_largeAabbsMappingGpu(context, queue)
|
||||||
{
|
{
|
||||||
m_tempNumPairs.resize(1);
|
m_numOverlappingPairs.resize(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
||||||
{
|
{
|
||||||
|
int newAabbIndex = m_aabbsCpu.size();
|
||||||
|
|
||||||
b3SapAabb aabb;
|
b3SapAabb aabb;
|
||||||
aabb.m_minVec = aabbMin;
|
aabb.m_minVec = aabbMin;
|
||||||
aabb.m_maxVec = aabbMax;
|
aabb.m_maxVec = aabbMax;
|
||||||
|
|
||||||
aabb.m_minIndices[3] = userPtr;
|
aabb.m_minIndices[3] = userPtr;
|
||||||
|
aabb.m_signedMaxIndices[3] = newAabbIndex;
|
||||||
|
|
||||||
|
m_smallAabbsMappingCpu.push_back(newAabbIndex);
|
||||||
|
|
||||||
m_aabbsCpu.push_back(aabb);
|
m_aabbsCpu.push_back(aabb);
|
||||||
}
|
}
|
||||||
void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
||||||
{
|
{
|
||||||
b3Assert(0); //Not implemented
|
int newAabbIndex = m_aabbsCpu.size();
|
||||||
|
|
||||||
|
b3SapAabb aabb;
|
||||||
|
aabb.m_minVec = aabbMin;
|
||||||
|
aabb.m_maxVec = aabbMax;
|
||||||
|
|
||||||
|
aabb.m_minIndices[3] = userPtr;
|
||||||
|
aabb.m_signedMaxIndices[3] = newAabbIndex;
|
||||||
|
|
||||||
|
m_largeAabbsMappingCpu.push_back(newAabbIndex);
|
||||||
|
|
||||||
|
m_aabbsCpu.push_back(aabb);
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs)
|
void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||||
{
|
{
|
||||||
//Reconstruct BVH
|
//Reconstruct BVH
|
||||||
m_plbvh.build(m_aabbsGpu);
|
m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu);
|
||||||
|
|
||||||
//
|
//
|
||||||
m_overlappingPairsGpu.resize(maxPairs);
|
m_overlappingPairsGpu.resize(maxPairs);
|
||||||
m_plbvh.calculateOverlappingPairs(m_tempNumPairs, m_overlappingPairsGpu);
|
m_plbvh.calculateOverlappingPairs(m_numOverlappingPairs, m_overlappingPairsGpu);
|
||||||
}
|
}
|
||||||
void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||||
{
|
{
|
||||||
b3Assert(0); //CPU version not implemented
|
b3Assert(0); //CPU version not implemented
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu()
|
||||||
|
{
|
||||||
|
m_aabbsGpu.copyFromHost(m_aabbsCpu);
|
||||||
|
m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu);
|
||||||
|
m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,11 +22,16 @@ class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface
|
|||||||
{
|
{
|
||||||
b3GpuParallelLinearBvh m_plbvh;
|
b3GpuParallelLinearBvh m_plbvh;
|
||||||
|
|
||||||
|
b3OpenCLArray<int> m_numOverlappingPairs;
|
||||||
b3OpenCLArray<b3Int4> m_overlappingPairsGpu;
|
b3OpenCLArray<b3Int4> m_overlappingPairsGpu;
|
||||||
|
|
||||||
b3OpenCLArray<b3SapAabb> m_aabbsGpu;
|
b3OpenCLArray<b3SapAabb> m_aabbsGpu;
|
||||||
b3OpenCLArray<int> m_tempNumPairs;
|
b3OpenCLArray<int> m_smallAabbsMappingGpu;
|
||||||
|
b3OpenCLArray<int> m_largeAabbsMappingGpu;
|
||||||
|
|
||||||
b3AlignedObjectArray<b3SapAabb> m_aabbsCpu;
|
b3AlignedObjectArray<b3SapAabb> m_aabbsCpu;
|
||||||
|
b3AlignedObjectArray<int> m_smallAabbsMappingCpu;
|
||||||
|
b3AlignedObjectArray<int> m_largeAabbsMappingCpu;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue);
|
b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue);
|
||||||
@@ -39,13 +44,15 @@ public:
|
|||||||
virtual void calculateOverlappingPairsHost(int maxPairs);
|
virtual void calculateOverlappingPairsHost(int maxPairs);
|
||||||
|
|
||||||
//call writeAabbsToGpu after done making all changes (createProxy etc)
|
//call writeAabbsToGpu after done making all changes (createProxy etc)
|
||||||
virtual void writeAabbsToGpu() { m_aabbsGpu.copyFromHost(m_aabbsCpu); }
|
virtual void writeAabbsToGpu();
|
||||||
|
|
||||||
virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); }
|
virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); }
|
||||||
virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); }
|
virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); }
|
||||||
|
|
||||||
virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); }
|
virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); }
|
||||||
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; }
|
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; }
|
||||||
|
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; }
|
||||||
|
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; }
|
||||||
|
|
||||||
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; }
|
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; }
|
||||||
|
|
||||||
|
|||||||
@@ -1307,3 +1307,12 @@ cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer()
|
|||||||
{
|
{
|
||||||
return m_overlappingPairs.getBufferCL();
|
return m_overlappingPairs.getBufferCL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b3OpenCLArray<int>& b3GpuSapBroadphase::getSmallAabbIndicesGPU()
|
||||||
|
{
|
||||||
|
return m_smallAabbsMappingGPU;
|
||||||
|
}
|
||||||
|
b3OpenCLArray<int>& b3GpuSapBroadphase::getLargeAabbIndicesGPU()
|
||||||
|
{
|
||||||
|
return m_largeAabbsMappingGPU;
|
||||||
|
}
|
||||||
|
|||||||
@@ -143,6 +143,8 @@ public:
|
|||||||
virtual int getNumOverlap();
|
virtual int getNumOverlap();
|
||||||
virtual cl_mem getOverlappingPairBuffer();
|
virtual cl_mem getOverlappingPairBuffer();
|
||||||
|
|
||||||
|
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
|
||||||
|
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //B3_GPU_SAP_BROADPHASE_H
|
#endif //B3_GPU_SAP_BROADPHASE_H
|
||||||
@@ -81,6 +81,15 @@ unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
|
|||||||
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
|
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)
|
||||||
|
{
|
||||||
|
int separatedAabbIndex = get_global_id(0);
|
||||||
|
if(separatedAabbIndex >= numAabbsToSeparate) return;
|
||||||
|
|
||||||
|
int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];
|
||||||
|
out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];
|
||||||
|
}
|
||||||
|
|
||||||
//Should replace with an optimized parallel reduction
|
//Should replace with an optimized parallel reduction
|
||||||
__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)
|
__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)
|
||||||
{
|
{
|
||||||
@@ -506,3 +515,55 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs,
|
||||||
|
__global int* out_numPairs, __global int4* out_overlappingPairs,
|
||||||
|
int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)
|
||||||
|
{
|
||||||
|
int smallAabbIndex = get_global_id(0);
|
||||||
|
if(smallAabbIndex >= numSmallAabbRigids) return;
|
||||||
|
|
||||||
|
b3AabbCL smallAabb = smallAabbs[smallAabbIndex];
|
||||||
|
for(int i = 0; i < numLargeAabbRigids; ++i)
|
||||||
|
{
|
||||||
|
b3AabbCL largeAabb = largeAabbs[i];
|
||||||
|
if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )
|
||||||
|
{
|
||||||
|
int4 pair;
|
||||||
|
pair.x = smallAabb.m_minIndices[3];
|
||||||
|
pair.y = largeAabb.m_minIndices[3];
|
||||||
|
pair.z = NEW_PAIR_MARKER;
|
||||||
|
pair.w = NEW_PAIR_MARKER;
|
||||||
|
|
||||||
|
int pairIndex = atomic_inc(out_numPairs);
|
||||||
|
if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,
|
||||||
|
__global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,
|
||||||
|
int numLargeAabbRigids, int maxRayRigidPairs, int numRays)
|
||||||
|
{
|
||||||
|
int rayIndex = get_global_id(0);
|
||||||
|
if(rayIndex >= numRays) return;
|
||||||
|
|
||||||
|
b3Vector3 rayFrom = rays[rayIndex].m_from;
|
||||||
|
b3Vector3 rayTo = rays[rayIndex].m_to;
|
||||||
|
b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
|
||||||
|
b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
|
||||||
|
|
||||||
|
for(int i = 0; i < numLargeAabbRigids; ++i)
|
||||||
|
{
|
||||||
|
b3AabbCL rigidAabb = largeRigidAabbs[i];
|
||||||
|
if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )
|
||||||
|
{
|
||||||
|
int2 rayRigidPair;
|
||||||
|
rayRigidPair.x = rayIndex;
|
||||||
|
rayRigidPair.y = rigidAabb.m_minIndices[3];
|
||||||
|
|
||||||
|
int pairIndex = atomic_inc(out_numRayRigidPairs);
|
||||||
|
if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -76,6 +76,13 @@ static const char* parallelLinearBvhCL= \
|
|||||||
"{\n"
|
"{\n"
|
||||||
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n"
|
||||||
|
"{\n"
|
||||||
|
" int separatedAabbIndex = get_global_id(0);\n"
|
||||||
|
" if(separatedAabbIndex >= numAabbsToSeparate) return;\n"
|
||||||
|
" int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n"
|
||||||
|
" out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n"
|
||||||
|
"}\n"
|
||||||
"//Should replace with an optimized parallel reduction\n"
|
"//Should replace with an optimized parallel reduction\n"
|
||||||
"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n"
|
"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
@@ -481,4 +488,54 @@ static const char* parallelLinearBvhCL= \
|
|||||||
" }\n"
|
" }\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
"__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n"
|
||||||
|
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
|
||||||
|
" int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n"
|
||||||
|
"{\n"
|
||||||
|
" int smallAabbIndex = get_global_id(0);\n"
|
||||||
|
" if(smallAabbIndex >= numSmallAabbRigids) return;\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n"
|
||||||
|
" for(int i = 0; i < numLargeAabbRigids; ++i)\n"
|
||||||
|
" {\n"
|
||||||
|
" b3AabbCL largeAabb = largeAabbs[i];\n"
|
||||||
|
" if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n"
|
||||||
|
" {\n"
|
||||||
|
" int4 pair;\n"
|
||||||
|
" pair.x = smallAabb.m_minIndices[3];\n"
|
||||||
|
" pair.y = largeAabb.m_minIndices[3];\n"
|
||||||
|
" pair.z = NEW_PAIR_MARKER;\n"
|
||||||
|
" pair.w = NEW_PAIR_MARKER;\n"
|
||||||
|
" \n"
|
||||||
|
" int pairIndex = atomic_inc(out_numPairs);\n"
|
||||||
|
" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n"
|
||||||
|
" __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n"
|
||||||
|
" int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n"
|
||||||
|
"{\n"
|
||||||
|
" int rayIndex = get_global_id(0);\n"
|
||||||
|
" if(rayIndex >= numRays) return;\n"
|
||||||
|
" \n"
|
||||||
|
" b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
|
||||||
|
" b3Vector3 rayTo = rays[rayIndex].m_to;\n"
|
||||||
|
" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
|
||||||
|
" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
|
||||||
|
" \n"
|
||||||
|
" for(int i = 0; i < numLargeAabbRigids; ++i)\n"
|
||||||
|
" {\n"
|
||||||
|
" b3AabbCL rigidAabb = largeRigidAabbs[i];\n"
|
||||||
|
" if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n"
|
||||||
|
" {\n"
|
||||||
|
" int2 rayRigidPair;\n"
|
||||||
|
" rayRigidPair.x = rayIndex;\n"
|
||||||
|
" rayRigidPair.y = rigidAabb.m_minIndices[3];\n"
|
||||||
|
" \n"
|
||||||
|
" int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
|
||||||
|
" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -305,8 +305,7 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//printf("broadphase->getAllAabbsGPU().size(): %d \n", broadphase->getAllAabbsGPU().size());
|
m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );
|
||||||
m_data->m_plbvh->build( broadphase->getAllAabbsGPU() );
|
|
||||||
|
|
||||||
m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
|
m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user