Use merged AABB to calculate grid cell size for PLBVH.
This commit is contained in:
@@ -55,6 +55,7 @@ class b3GpuParallelLinearBvh
|
|||||||
|
|
||||||
cl_program m_parallelLinearBvhProgram;
|
cl_program m_parallelLinearBvhProgram;
|
||||||
|
|
||||||
|
cl_kernel m_findAllNodesMergedAabbKernel;
|
||||||
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
||||||
cl_kernel m_constructBinaryTreeKernel;
|
cl_kernel m_constructBinaryTreeKernel;
|
||||||
cl_kernel m_determineInternalNodeAabbsKernel;
|
cl_kernel m_determineInternalNodeAabbsKernel;
|
||||||
@@ -78,6 +79,7 @@ class b3GpuParallelLinearBvh
|
|||||||
//1 element per leaf node
|
//1 element per leaf node
|
||||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
||||||
|
b3OpenCLArray<b3SapAabb> m_mergedAabb;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||||
@@ -91,7 +93,8 @@ public:
|
|||||||
m_internalNodeChildNodes(context, queue),
|
m_internalNodeChildNodes(context, queue),
|
||||||
m_internalNodeParentNodes(context, queue),
|
m_internalNodeParentNodes(context, queue),
|
||||||
m_leafNodeParentNodes(context, queue),
|
m_leafNodeParentNodes(context, queue),
|
||||||
m_mortonCodesAndAabbIndicies(context, queue)
|
m_mortonCodesAndAabbIndicies(context, queue),
|
||||||
|
m_mergedAabb(context, queue)
|
||||||
{
|
{
|
||||||
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
||||||
|
|
||||||
@@ -101,6 +104,8 @@ public:
|
|||||||
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
|
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
|
||||||
b3Assert(m_parallelLinearBvhProgram);
|
b3Assert(m_parallelLinearBvhProgram);
|
||||||
|
|
||||||
|
m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
|
b3Assert(m_findAllNodesMergedAabbKernel);
|
||||||
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
|
b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||||
m_constructBinaryTreeKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "constructBinaryTree", &error, m_parallelLinearBvhProgram, additionalMacros );
|
m_constructBinaryTreeKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "constructBinaryTree", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||||
@@ -114,6 +119,7 @@ public:
|
|||||||
|
|
||||||
virtual ~b3GpuParallelLinearBvh()
|
virtual ~b3GpuParallelLinearBvh()
|
||||||
{
|
{
|
||||||
|
clReleaseKernel(m_findAllNodesMergedAabbKernel);
|
||||||
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||||
clReleaseKernel(m_constructBinaryTreeKernel);
|
clReleaseKernel(m_constructBinaryTreeKernel);
|
||||||
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
||||||
@@ -125,8 +131,7 @@ public:
|
|||||||
|
|
||||||
// fix: need to handle/test case with 2 nodes
|
// fix: need to handle/test case with 2 nodes
|
||||||
|
|
||||||
///@param cellsize A virtual grid of size 2^10^3 is used in the process of creating the BVH
|
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs)
|
||||||
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, b3Scalar cellSize)
|
|
||||||
{
|
{
|
||||||
B3_PROFILE("b3ParallelLinearBvh::build()");
|
B3_PROFILE("b3ParallelLinearBvh::build()");
|
||||||
|
|
||||||
@@ -143,6 +148,7 @@ public:
|
|||||||
|
|
||||||
m_leafNodeParentNodes.resize(numLeaves);
|
m_leafNodeParentNodes.resize(numLeaves);
|
||||||
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
||||||
|
m_mergedAabb.resize(numLeaves);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
||||||
@@ -160,7 +166,7 @@ public:
|
|||||||
//If the number of nodes is not a power of 2(as in, can be expressed as 2^N where N is an integer), then there is 1 additional level
|
//If the number of nodes is not a power of 2(as in, can be expressed as 2^N where N is an integer), then there is 1 additional level
|
||||||
if( ~(1 << mostSignificantBit) & numLeaves ) numLevels++;
|
if( ~(1 << mostSignificantBit) & numLeaves ) numLevels++;
|
||||||
|
|
||||||
if(1) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
|
if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
//Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
||||||
@@ -202,7 +208,7 @@ public:
|
|||||||
m_firstIndexOffsetPerLevelCpu[i] -= m_numNodesPerLevelCpu[i];
|
m_firstIndexOffsetPerLevelCpu[i] -= m_numNodesPerLevelCpu[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
if(1)
|
if(0)
|
||||||
{
|
{
|
||||||
int numInternalNodes = 0;
|
int numInternalNodes = 0;
|
||||||
for(int i = 0; i < numLevels; ++i)
|
for(int i = 0; i < numLevels; ++i)
|
||||||
@@ -225,20 +231,22 @@ public:
|
|||||||
{
|
{
|
||||||
B3_PROFILE("Find AABB of merged nodes");
|
B3_PROFILE("Find AABB of merged nodes");
|
||||||
|
|
||||||
/*b3BufferInfoCL bufferInfo[] =
|
m_mergedAabb.copyFromOpenCLArray(worldSpaceAabbs); //Need to make a copy since the kernel modifies the array
|
||||||
|
|
||||||
|
b3BufferInfoCL bufferInfo[] =
|
||||||
{
|
{
|
||||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
b3BufferInfoCL( m_mergedAabb.getBufferCL() ) //Resulting AABB is stored in m_mergedAabb[0]
|
||||||
b3BufferInfoCL( m_allNodesMergedAabb.getBufferCL() ),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabb, "m_findAllNodesMergedAabb");
|
b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel");
|
||||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst(numLeaves);
|
launcher.setConst(numLeaves);
|
||||||
|
|
||||||
launcher.launch1D(numLeaves);
|
launcher.launch1D(numLeaves);
|
||||||
clFinish(m_queue);*/
|
clFinish(m_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//Insert the center of the AABBs into a virtual grid,
|
//Insert the center of the AABBs into a virtual grid,
|
||||||
//then convert the discrete grid coordinates into a morton code
|
//then convert the discrete grid coordinates into a morton code
|
||||||
//For each element in m_mortonCodesAndAabbIndicies, set
|
//For each element in m_mortonCodesAndAabbIndicies, set
|
||||||
@@ -250,12 +258,12 @@ public:
|
|||||||
b3BufferInfoCL bufferInfo[] =
|
b3BufferInfoCL bufferInfo[] =
|
||||||
{
|
{
|
||||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_mergedAabb.getBufferCL() ),
|
||||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() )
|
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() )
|
||||||
};
|
};
|
||||||
|
|
||||||
b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel");
|
b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel");
|
||||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst(cellSize);
|
|
||||||
launcher.setConst(numLeaves);
|
launcher.setConst(numLeaves);
|
||||||
|
|
||||||
launcher.launch1D(numLeaves);
|
launcher.launch1D(numLeaves);
|
||||||
@@ -301,7 +309,7 @@ public:
|
|||||||
launcher.launch1D(numInternalNodes);
|
launcher.launch1D(numInternalNodes);
|
||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
|
|
||||||
if(1)
|
if(0)
|
||||||
{
|
{
|
||||||
static b3AlignedObjectArray<b3Int2> internalNodeChildNodes;
|
static b3AlignedObjectArray<b3Int2> internalNodeChildNodes;
|
||||||
m_internalNodeChildNodes.copyToHost(internalNodeChildNodes, false);
|
m_internalNodeChildNodes.copyToHost(internalNodeChildNodes, false);
|
||||||
@@ -335,6 +343,12 @@ public:
|
|||||||
launcher.launch1D(numLeaves);
|
launcher.launch1D(numLeaves);
|
||||||
clFinish(m_queue);
|
clFinish(m_queue);
|
||||||
|
|
||||||
|
if(0)
|
||||||
|
{
|
||||||
|
b3SapAabb mergedAABB = m_mergedAabb.at(0);
|
||||||
|
printf("mergedAABBMin: %f, %f, %f \n", mergedAABB.m_minVec.x, mergedAABB.m_minVec.y, mergedAABB.m_minVec.z);
|
||||||
|
printf("mergedAABBMax: %f, %f, %f \n", mergedAABB.m_maxVec.x, mergedAABB.m_maxVec.y, mergedAABB.m_maxVec.z);
|
||||||
|
}
|
||||||
if(0)
|
if(0)
|
||||||
{
|
{
|
||||||
static b3AlignedObjectArray<b3SapAabb> rigidAabbs;
|
static b3AlignedObjectArray<b3SapAabb> rigidAabbs;
|
||||||
|
|||||||
@@ -56,14 +56,8 @@ public:
|
|||||||
|
|
||||||
virtual void calculateOverlappingPairs(int maxPairs)
|
virtual void calculateOverlappingPairs(int maxPairs)
|
||||||
{
|
{
|
||||||
//Detect overall min/max
|
|
||||||
{
|
|
||||||
//Not implemented
|
|
||||||
}
|
|
||||||
|
|
||||||
//Reconstruct BVH
|
//Reconstruct BVH
|
||||||
const b3Scalar CELL_SIZE(0.1);
|
m_plbvh.build(m_aabbsGpu);
|
||||||
m_plbvh.build(m_aabbsGpu, CELL_SIZE);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
m_overlappingPairsGpu.resize(maxPairs);
|
m_overlappingPairsGpu.resize(maxPairs);
|
||||||
|
|||||||
@@ -80,8 +80,8 @@ unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
|
|||||||
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
|
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
__kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* out_mergedAabb, int numAabbs)
|
__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbs)
|
||||||
{
|
{
|
||||||
int aabbIndex = get_global_id(0);
|
int aabbIndex = get_global_id(0);
|
||||||
if(aabbIndex >= numAabbs) return;
|
if(aabbIndex >= numAabbs) return;
|
||||||
@@ -89,11 +89,11 @@ __kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __globa
|
|||||||
//Find the most significant bit(msb)
|
//Find the most significant bit(msb)
|
||||||
int mostSignificantBit = 0;
|
int mostSignificantBit = 0;
|
||||||
{
|
{
|
||||||
int temp = numLeaves;
|
int temp = numAabbs;
|
||||||
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
|
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit ); // verify
|
int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit );
|
||||||
int numRemainingAabbs = (1 << mostSignificantBit);
|
int numRemainingAabbs = (1 << mostSignificantBit);
|
||||||
|
|
||||||
//Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2
|
//Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2
|
||||||
@@ -102,8 +102,8 @@ __kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __globa
|
|||||||
{
|
{
|
||||||
int otherAabbIndex = numRemainingAabbs + aabbIndex;
|
int otherAabbIndex = numRemainingAabbs + aabbIndex;
|
||||||
|
|
||||||
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
|
b3AabbCL aabb = out_mergedAabb[aabbIndex];
|
||||||
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
|
b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
|
||||||
|
|
||||||
b3AabbCL mergedAabb;
|
b3AabbCL mergedAabb;
|
||||||
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
||||||
@@ -121,8 +121,8 @@ __kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __globa
|
|||||||
{
|
{
|
||||||
int otherAabbIndex = aabbIndex + offset;
|
int otherAabbIndex = aabbIndex + offset;
|
||||||
|
|
||||||
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
|
b3AabbCL aabb = out_mergedAabb[aabbIndex];
|
||||||
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
|
b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
|
||||||
|
|
||||||
b3AabbCL mergedAabb;
|
b3AabbCL mergedAabb;
|
||||||
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
||||||
@@ -130,27 +130,29 @@ __kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __globa
|
|||||||
out_mergedAabb[aabbIndex] = mergedAabb;
|
out_mergedAabb[aabbIndex] = mergedAabb;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = offset / 2;
|
offset /= 2;
|
||||||
|
|
||||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs,
|
__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes,
|
||||||
__global SortDataCL* out_mortonCodesAndAabbIndices,
|
__global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)
|
||||||
b3Scalar cellSize, int numAabbs)
|
|
||||||
{
|
{
|
||||||
int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
|
int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
|
||||||
if(leafNodeIndex >= numAabbs) return;
|
if(leafNodeIndex >= numAabbs) return;
|
||||||
|
|
||||||
b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
|
b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];
|
||||||
|
b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;
|
||||||
|
b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;
|
||||||
|
|
||||||
b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;
|
b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
|
||||||
|
b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;
|
||||||
|
b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;
|
||||||
|
|
||||||
//Quantize into integer coordinates
|
//Quantize into integer coordinates
|
||||||
//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
|
//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
|
||||||
b3Vector3 gridPosition = center / cellSize;
|
b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;
|
||||||
|
|
||||||
int4 discretePosition;
|
int4 discretePosition;
|
||||||
discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
|
discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
|
||||||
|
|||||||
@@ -75,20 +75,77 @@ static const char* parallelLinearBvhCL= \
|
|||||||
"{\n"
|
"{\n"
|
||||||
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, \n"
|
"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbs)\n"
|
||||||
" __global SortDataCL* out_mortonCodesAndAabbIndices, \n"
|
"{\n"
|
||||||
" b3Scalar cellSize, int numAabbs)\n"
|
" int aabbIndex = get_global_id(0);\n"
|
||||||
|
" if(aabbIndex >= numAabbs) return;\n"
|
||||||
|
" \n"
|
||||||
|
" //Find the most significant bit(msb)\n"
|
||||||
|
" int mostSignificantBit = 0;\n"
|
||||||
|
" {\n"
|
||||||
|
" int temp = numAabbs;\n"
|
||||||
|
" while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit );\n"
|
||||||
|
" int numRemainingAabbs = (1 << mostSignificantBit);\n"
|
||||||
|
" \n"
|
||||||
|
" //Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2\n"
|
||||||
|
" //For example, if there are 159 AABBs = 128 + 31, then merge indices [0, 30] and 128 + [0, 30]\n"
|
||||||
|
" if(aabbIndex < numberOfAabbsAboveMsbSplit)\n"
|
||||||
|
" {\n"
|
||||||
|
" int otherAabbIndex = numRemainingAabbs + aabbIndex;\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
|
||||||
|
" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL mergedAabb;\n"
|
||||||
|
" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
|
||||||
|
" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
|
||||||
|
" out_mergedAabb[aabbIndex] = mergedAabb;\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||||
|
" \n"
|
||||||
|
" //\n"
|
||||||
|
" int offset = numRemainingAabbs / 2;\n"
|
||||||
|
" while(offset >= 1)\n"
|
||||||
|
" {\n"
|
||||||
|
" if(aabbIndex < offset)\n"
|
||||||
|
" {\n"
|
||||||
|
" int otherAabbIndex = aabbIndex + offset;\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
|
||||||
|
" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
|
||||||
|
" \n"
|
||||||
|
" b3AabbCL mergedAabb;\n"
|
||||||
|
" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
|
||||||
|
" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
|
||||||
|
" out_mergedAabb[aabbIndex] = mergedAabb;\n"
|
||||||
|
" }\n"
|
||||||
|
" \n"
|
||||||
|
" offset /= 2;\n"
|
||||||
|
" \n"
|
||||||
|
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n"
|
||||||
|
" __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n"
|
" int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n"
|
||||||
" if(leafNodeIndex >= numAabbs) return;\n"
|
" if(leafNodeIndex >= numAabbs) return;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
|
" b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n"
|
||||||
|
" b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n"
|
||||||
|
" b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;\n"
|
" b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
|
||||||
|
" b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n"
|
||||||
|
" b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" //Quantize into integer coordinates\n"
|
" //Quantize into integer coordinates\n"
|
||||||
" //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
|
" //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
|
||||||
" b3Vector3 gridPosition = center / cellSize;\n"
|
" b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" int4 discretePosition;\n"
|
" int4 discretePosition;\n"
|
||||||
" discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
|
" discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
|
||||||
@@ -110,7 +167,7 @@ static const char* parallelLinearBvhCL= \
|
|||||||
" out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
|
" out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
|
"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
|
||||||
"#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the node has no parent \n"
|
"#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the (root) node has no parent \n"
|
||||||
"#define B3_PLBVH_ROOT_NODE_INDEX 0\n"
|
"#define B3_PLBVH_ROOT_NODE_INDEX 0\n"
|
||||||
"//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.\n"
|
"//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.\n"
|
||||||
"//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).\n"
|
"//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).\n"
|
||||||
@@ -264,13 +321,16 @@ static const char* parallelLinearBvhCL= \
|
|||||||
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
|
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
|
||||||
" int maxPairs, int numQueryAabbs)\n"
|
" int maxPairs, int numQueryAabbs)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
|
"#define USE_SPATIALLY_COHERENT_INDICIES //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve\n"
|
||||||
|
"#ifdef USE_SPATIALLY_COHERENT_INDICIES\n"
|
||||||
" int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
" int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
||||||
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;\n"
|
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;\n"
|
||||||
" //int queryRigidIndex = get_global_id(0);\n"
|
"#else\n"
|
||||||
" //if(queryRigidIndex >= numQueryAabbs) return;\n"
|
" int queryRigidIndex = get_global_id(0);\n"
|
||||||
" \n"
|
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||||
|
"#endif\n"
|
||||||
" b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
|
" b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
|
||||||
" \n"
|
" \n"
|
||||||
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
|
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user