diff --git a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp index 9596b34e9..5c75d12e0 100644 --- a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp +++ b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp @@ -187,6 +187,10 @@ void GpuRaytraceScene::renderScene() void GpuRaytraceScene::renderScene2() { + //If using the BVH to accelerate raycasting, the AABBs need to be updated or else they will + //not match the actual rigid body positions after integration. The result is that rigid bodies + //are not drawn or appear clipped, especially if they are moving quickly. + m_data->m_rigidBodyPipeline->setupGpuAabbsFull(); // GpuBoxPlaneScene::renderScene(); // return; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h index 84371b0e8..29c3e14bb 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h @@ -65,6 +65,9 @@ class b3GpuParallelLinearBvh b3FillCL m_fill; b3RadixSort32CL m_radixSorter; + // + b3OpenCLArray m_rootNodeIndex; + //1 element per level in the tree b3AlignedObjectArray m_numNodesPerLevelCpu; //Level 0(m_numNodesPerLevelCpu[0]) is the root, last level contains the leaf nodes b3AlignedObjectArray m_firstIndexOffsetPerLevelCpu; //Contains the index/offset of the first node in that level @@ -89,12 +92,16 @@ public: m_fill(context, device, queue), m_radixSorter(context, device, queue), + m_rootNodeIndex(context, queue), + m_numNodesPerLevelGpu(context, queue), m_firstIndexOffsetPerLevelGpu(context, queue), + m_internalNodeAabbs(context, queue), m_internalNodeLeafIndexRanges(context, queue), m_internalNodeChildNodes(context, queue), m_internalNodeParentNodes(context, queue), + m_leafNodeParentNodes(context, queue), m_mortonCodesAndAabbIndicies(context, queue), m_mergedAabb(context, queue), @@ -147,6 +154,8 @@ public: // { + m_rootNodeIndex.resize(1); + m_internalNodeAabbs.resize(numInternalNodes); m_internalNodeLeafIndexRanges.resize(numInternalNodes); m_internalNodeChildNodes.resize(numInternalNodes); @@ -309,6 +318,9 @@ public: //Construct binary tree; find the children of each internal node, and assign parent nodes { B3_PROFILE("Construct binary tree"); + + const int ROOT_NODE_INDEX = 0x80000000; //Default root index is 0, most significant bit is set to indicate internal node + m_rootNodeIndex.copyFromHostPointer(&ROOT_NODE_INDEX, 1); b3BufferInfoCL bufferInfo[] = { @@ -450,6 +462,7 @@ public: { b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), @@ -500,6 +513,9 @@ public: b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + + + b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index 3e650c7f9..40eabfd6c 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -148,17 +148,13 @@ __kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabb } #define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128 -#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the (root) node has no parent -#define B3_PLBVH_ROOT_NODE_INDEX 0 -//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node. -//Positive index == leaf node, while negative index == internal node (remove negative sign to get index). -// -//Since the root internal node is at index 0, no internal nodes should reference it as a child, -//and so index 0 is always used to indicate a leaf node. -int isLeafNode(int index) { return (index >= 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; } +//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes. +//If it is set, then the index is for an internal node; otherwise, it is a leaf node. +//In both cases, the bit should be cleared to access the index. +int isLeafNode(int index) { return (index >> 31 == 0); } +int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } +int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } __kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel, __global int* numNodesPerLevel, @@ -310,9 +306,12 @@ bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2) //From sap.cl __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, + + __global int* rootNodeIndex, __global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs, __global int2* internalNodeLeafIndexRanges, + __global SortDataCL* mortonCodesAndAabbIndices, __global int* out_numPairs, __global int4* out_overlappingPairs, int maxPairs, int numQueryAabbs) @@ -333,10 +332,8 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop) - int stackSize = 2; - stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x; - stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y; + int stackSize = 1; + stack[0] = *rootNodeIndex; while(stackSize) { @@ -396,16 +393,6 @@ typedef struct float4 m_from; float4 m_to; } b3RayInfo; - -typedef struct -{ - float m_hitFraction; - int m_hitResult0; - int m_hitResult1; - int m_hitResult2; - float4 m_hitPoint; - float4 m_hitNormal; -} b3RayHit; //From rayCastKernels.cl b3Vector3 b3Vector3_normalize(b3Vector3 v) @@ -481,6 +468,8 @@ int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalize } __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, + + __global int* rootNodeIndex, __global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs, __global int2* internalNodeLeafIndexRanges, @@ -501,10 +490,8 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop) - int stackSize = 2; - stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x; - stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y; + int stackSize = 1; + stack[0] = *rootNodeIndex; while(stackSize) { @@ -518,7 +505,6 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) ) { if(isLeaf) diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index eb45c0975..c83783901 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -141,16 +141,12 @@ static const char* parallelLinearBvhCL= \ " out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n" "}\n" "#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n" -"#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the (root) node has no parent \n" -"#define B3_PLBVH_ROOT_NODE_INDEX 0\n" -"//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.\n" -"//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).\n" -"//\n" -"//Since the root internal node is at index 0, no internal nodes should reference it as a child,\n" -"//and so index 0 is always used to indicate a leaf node.\n" -"int isLeafNode(int index) { return (index >= 0); }\n" -"int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; }\n" -"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; }\n" +"//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n" +"//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n" +"//In both cases, the bit should be cleared to access the index.\n" +"int isLeafNode(int index) { return (index >> 31 == 0); }\n" +"int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n" +"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n" "__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,\n" " __global int* numNodesPerLevel,\n" " __global int2* out_internalNodeChildIndices, \n" @@ -296,9 +292,11 @@ static const char* parallelLinearBvhCL= \ "}\n" "//From sap.cl\n" "__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n" +" __global int* rootNodeIndex, \n" " __global int2* internalNodeChildIndices, \n" " __global b3AabbCL* internalNodeAabbs,\n" " __global int2* internalNodeLeafIndexRanges,\n" +" \n" " __global SortDataCL* mortonCodesAndAabbIndices,\n" " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" " int maxPairs, int numQueryAabbs)\n" @@ -318,10 +316,8 @@ static const char* parallelLinearBvhCL= \ " \n" " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" " \n" -" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n" -" int stackSize = 2;\n" -" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n" -" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n" +" int stackSize = 1;\n" +" stack[0] = *rootNodeIndex;\n" " \n" " while(stackSize)\n" " {\n" @@ -379,15 +375,6 @@ static const char* parallelLinearBvhCL= \ " float4 m_from;\n" " float4 m_to;\n" "} b3RayInfo;\n" -"typedef struct\n" -"{\n" -" float m_hitFraction;\n" -" int m_hitResult0;\n" -" int m_hitResult1;\n" -" int m_hitResult2;\n" -" float4 m_hitPoint;\n" -" float4 m_hitNormal;\n" -"} b3RayHit;\n" "//From rayCastKernels.cl\n" "b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" "{\n" @@ -455,6 +442,7 @@ static const char* parallelLinearBvhCL= \ " return (t_min_final <= t_max_final);\n" "}\n" "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" +" __global int* rootNodeIndex, \n" " __global int2* internalNodeChildIndices, \n" " __global b3AabbCL* internalNodeAabbs,\n" " __global int2* internalNodeLeafIndexRanges,\n" @@ -475,10 +463,8 @@ static const char* parallelLinearBvhCL= \ " \n" " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" " \n" -" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n" -" int stackSize = 2;\n" -" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n" -" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n" +" int stackSize = 1;\n" +" stack[0] = *rootNodeIndex;\n" " \n" " while(stackSize)\n" " {\n" @@ -492,7 +478,6 @@ static const char* parallelLinearBvhCL= \ " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" " \n" " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" -" \n" " if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) )\n" " {\n" " if(isLeaf)\n" diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index 1ada815c7..686a7f835 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -41,7 +41,7 @@ struct b3GpuRaycastInternalData //1 element per (ray index, rigid index) pair b3OpenCLArray* m_gpuNumRayRigidPairs; - b3OpenCLArray* m_gpuRayRigidPairs; + b3OpenCLArray* m_gpuRayRigidPairs; //x == ray index, y == rigid index int m_test; };