diff --git a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp index b114b4511..9596b34e9 100644 --- a/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp +++ b/Demos3/GpuDemos/raytrace/RaytracedShadowDemo.cpp @@ -308,7 +308,7 @@ void GpuRaytraceScene::renderScene2() { B3_PROFILE("cast primary rays"); //m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(),m_data->m_np->getInternalData()); - m_raycaster->castRays(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData()); + m_raycaster->castRays(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData(), m_data->m_bp); } @@ -350,7 +350,7 @@ void GpuRaytraceScene::renderScene2() { B3_PROFILE("cast shadow rays"); //m_raycaster->castRaysHost(primaryRays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu()); - m_raycaster->castRays(shadowRays, shadowHits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData()); + m_raycaster->castRays(shadowRays, shadowHits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData(), m_data->m_bp); } { diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h index f2199a3cf..84371b0e8 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h @@ -18,6 +18,7 @@ subject to the following restrictions: #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" #include "Bullet3Common/shared/b3Int2.h" #include "Bullet3Common/shared/b3Int4.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" @@ -59,6 +60,7 @@ class b3GpuParallelLinearBvh cl_kernel m_determineInternalNodeAabbsKernel; cl_kernel m_plbvhCalculateOverlappingPairsKernel; + cl_kernel m_plbvhRayTraverseKernel; b3FillCL m_fill; b3RadixSort32CL m_radixSorter; @@ -79,6 +81,7 @@ class b3GpuParallelLinearBvh b3OpenCLArray m_leafNodeParentNodes; b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index b3OpenCLArray m_mergedAabb; + b3OpenCLArray m_leafNodeAabbs; public: b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) : @@ -94,7 +97,8 @@ public: m_internalNodeParentNodes(context, queue), m_leafNodeParentNodes(context, queue), m_mortonCodesAndAabbIndicies(context, queue), - m_mergedAabb(context, queue) + m_mergedAabb(context, queue), + m_leafNodeAabbs(context, queue) { const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; @@ -115,6 +119,8 @@ public: m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros ); b3Assert(m_plbvhCalculateOverlappingPairsKernel); + m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros ); + b3Assert(m_plbvhRayTraverseKernel); } virtual ~b3GpuParallelLinearBvh() @@ -125,6 +131,7 @@ public: clReleaseKernel(m_determineInternalNodeAabbsKernel); clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); + clReleaseKernel(m_plbvhRayTraverseKernel); clReleaseProgram(m_parallelLinearBvhProgram); } @@ -148,8 +155,12 @@ public: m_leafNodeParentNodes.resize(numLeaves); m_mortonCodesAndAabbIndicies.resize(numLeaves); m_mergedAabb.resize(numLeaves); + m_leafNodeAabbs.resize(numLeaves); } + // + m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs); + //Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) ) //The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes) int numLevels = 0; @@ -168,7 +179,7 @@ public: if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit); } - //Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU + //Determine number of internal nodes per level, use prefix sum to get offsets of each level, and send to GPU { B3_PROFILE("Determine number of nodes per level"); @@ -329,7 +340,7 @@ public: } //For each internal node, check children to get its AABB; start from the - //last level and move towards the root + //last level, which contains the leaves, and move towards the root { B3_PROFILE("Set AABBs"); @@ -416,10 +427,12 @@ public: } } - //Max number of pairs is out_overlappingPairs.size() - //If the number of overlapping pairs is < out_overlappingPairs.size(), the array is resized - void calculateOverlappingPairs(const b3OpenCLArray& worldSpaceAabbs, - b3OpenCLArray& out_numPairs, b3OpenCLArray& out_overlappingPairs) + ///b3GpuParallelLinearBvh::build() must be called before this function. calculateOverlappingPairs() uses + ///the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. + ///@param out_numPairs If number of pairs exceeds the max number of pairs, this is clamped to the max number. + ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs. + ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized. + void calculateOverlappingPairs(b3OpenCLArray& out_numPairs, b3OpenCLArray& out_overlappingPairs) { b3Assert( out_numPairs.size() == 1 ); @@ -431,11 +444,11 @@ public: { B3_PROFILE("PLBVH calculateOverlappingPairs"); - int numQueryAabbs = worldSpaceAabbs.size(); + int numQueryAabbs = m_leafNodeAabbs.size(); b3BufferInfoCL bufferInfo[] = { - b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ), + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), @@ -468,6 +481,53 @@ public: out_overlappingPairs.resize(numPairs); } + + ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections; + ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough. + ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index. + ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded. + void testRaysAgainstBvhAabbs(const b3OpenCLArray& rays, + b3OpenCLArray& out_numRayRigidPairs, b3OpenCLArray& out_rayRigidPairs) + { + B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()"); + + int numRays = rays.size(); + int maxRayRigidPairs = out_rayRigidPairs.size(); + + int reset = 0; + out_numRayRigidPairs.copyFromHostPointer(&reset, 1); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), + b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), + b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), + b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), + + b3BufferInfoCL( rays.getBufferCL() ), + + b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), + b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) + }; + + b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(maxRayRigidPairs); + launcher.setConst(numRays); + + launcher.launch1D(numRays); + clFinish(m_queue); + + + // + int numRayRigidPairs = -1; + out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1); + + if(numRayRigidPairs > maxRayRigidPairs) + b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs); + + } }; #endif diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h index 761f4b168..c5b85f4bb 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h @@ -61,7 +61,7 @@ public: // m_overlappingPairsGpu.resize(maxPairs); - m_plbvh.calculateOverlappingPairs(m_aabbsGpu, m_tempNumPairs, m_overlappingPairsGpu); + m_plbvh.calculateOverlappingPairs(m_tempNumPairs, m_overlappingPairsGpu); } virtual void calculateOverlappingPairsHost(int maxPairs) { diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index dc283ac95..3e650c7f9 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -15,6 +15,7 @@ typedef float b3Scalar; typedef float4 b3Vector3; #define b3Max max #define b3Min min +#define b3Sqrt sqrt typedef struct { @@ -388,3 +389,161 @@ __kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, } } + +//From rayCastKernels.cl +typedef struct +{ + float4 m_from; + float4 m_to; +} b3RayInfo; + +typedef struct +{ + float m_hitFraction; + int m_hitResult0; + int m_hitResult1; + int m_hitResult2; + float4 m_hitPoint; + float4 m_hitNormal; +} b3RayHit; +//From rayCastKernels.cl + +b3Vector3 b3Vector3_normalize(b3Vector3 v) +{ + b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f}; + return normalize(normal); //OpenCL normalize == vector4 normalize +} +b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } +b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } + + +/** + +int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) +{ + // not functional -- need to fix + + //aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ) + //t_min is the first intersection, t_max is the second intersection + b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection; + int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y) + + //select(b, a, condition) == condition ? a : b + b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection; + b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection; + + b3Scalar t_min_final = 0.0f; + b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); + + //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. + //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) + //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN + t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); + t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); + + return (t_min_final <= t_max_final); +} +**/ + +void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection, + b3Scalar planeMin, b3Scalar planeMax, + b3Scalar* out_t_min, b3Scalar* out_t_max) +{ + if(rayNormalizedDirection < 0.0f) + { + //max is closer, min is farther + *out_t_min = (planeMax - rayStart) / rayNormalizedDirection; + *out_t_max = (planeMin - rayStart) / rayNormalizedDirection; + } + else + { + //min is closer, max is farther + *out_t_min = (planeMin - rayStart) / rayNormalizedDirection; + *out_t_max = (planeMax - rayStart) / rayNormalizedDirection; + } +} +int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) +{ + b3Scalar t_min_x, t_min_y, t_min_z; + b3Scalar t_max_x, t_max_y, t_max_z; + + rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x); + rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y); + rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z); + + b3Scalar t_min_final = 0.0f; + b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); + + t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) ); + t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) ); + + return (t_min_final <= t_max_final); +} + +__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, + __global int2* internalNodeChildIndices, + __global b3AabbCL* internalNodeAabbs, + __global int2* internalNodeLeafIndexRanges, + __global SortDataCL* mortonCodesAndAabbIndices, + + __global b3RayInfo* rays, + + __global int* out_numRayRigidPairs, + __global int2* out_rayRigidPairs, + int maxRayRigidPairs, int numRays) +{ + int rayIndex = get_global_id(0); + if(rayIndex >= numRays) return; + + b3Vector3 rayFrom = rays[rayIndex].m_from; + b3Vector3 rayTo = rays[rayIndex].m_to; + b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from); + + int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; + + //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop) + int stackSize = 2; + stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x; + stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y; + + while(stackSize) + { + int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; + --stackSize; + + int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false + int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); + + //bvhRigidIndex is not used if internal node + int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; + + b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; + + if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) ) + { + if(isLeaf) + { + int2 rayRigidPair; + rayRigidPair.x = rayIndex; + rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; + + int pairIndex = atomic_inc(out_numRayRigidPairs); + if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; + } + + if(!isLeaf) //Internal node + { + if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) + { + //Error + } + else + { + stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; + stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; + } + } + } + } +} + diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index 6f99d51f2..eb45c0975 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -15,6 +15,7 @@ static const char* parallelLinearBvhCL= \ "typedef float4 b3Vector3;\n" "#define b3Max max\n" "#define b3Min min\n" +"#define b3Sqrt sqrt\n" "typedef struct\n" "{\n" " unsigned int m_key;\n" @@ -372,4 +373,151 @@ static const char* parallelLinearBvhCL= \ " \n" " }\n" "}\n" +"//From rayCastKernels.cl\n" +"typedef struct\n" +"{\n" +" float4 m_from;\n" +" float4 m_to;\n" +"} b3RayInfo;\n" +"typedef struct\n" +"{\n" +" float m_hitFraction;\n" +" int m_hitResult0;\n" +" int m_hitResult1;\n" +" int m_hitResult2;\n" +" float4 m_hitPoint;\n" +" float4 m_hitNormal;\n" +"} b3RayHit;\n" +"//From rayCastKernels.cl\n" +"b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" +"{\n" +" b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n" +" return normalize(normal); //OpenCL normalize == vector4 normalize\n" +"}\n" +"b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" +"b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" +"/**\n" +"int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" +"{\n" +" // not functional -- need to fix\n" +" //aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} )\n" +" //t_min is the first intersection, t_max is the second intersection\n" +" b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection;\n" +" int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y)\n" +" \n" +" //select(b, a, condition) == condition ? a : b\n" +" b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection;\n" +" b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection;\n" +" b3Scalar t_min_final = 0.0f;\n" +" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" \n" +" //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" +" //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" +" //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN\n" +" t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" +" t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" +" \n" +" return (t_min_final <= t_max_final);\n" +"}\n" +"**/\n" +"void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection,\n" +" b3Scalar planeMin, b3Scalar planeMax, \n" +" b3Scalar* out_t_min, b3Scalar* out_t_max)\n" +"{\n" +" if(rayNormalizedDirection < 0.0f)\n" +" {\n" +" //max is closer, min is farther\n" +" *out_t_min = (planeMax - rayStart) / rayNormalizedDirection;\n" +" *out_t_max = (planeMin - rayStart) / rayNormalizedDirection;\n" +" }\n" +" else\n" +" {\n" +" //min is closer, max is farther\n" +" *out_t_min = (planeMin - rayStart) / rayNormalizedDirection;\n" +" *out_t_max = (planeMax - rayStart) / rayNormalizedDirection;\n" +" }\n" +"}\n" +"int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" +"{\n" +" b3Scalar t_min_x, t_min_y, t_min_z;\n" +" b3Scalar t_max_x, t_max_y, t_max_z;\n" +" \n" +" rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x);\n" +" rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y);\n" +" rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z);\n" +" \n" +" b3Scalar t_min_final = 0.0f;\n" +" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" \n" +" t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) );\n" +" t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) );\n" +" \n" +" return (t_min_final <= t_max_final);\n" +"}\n" +"__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" +" __global int2* internalNodeChildIndices, \n" +" __global b3AabbCL* internalNodeAabbs,\n" +" __global int2* internalNodeLeafIndexRanges,\n" +" __global SortDataCL* mortonCodesAndAabbIndices,\n" +" \n" +" __global b3RayInfo* rays,\n" +" \n" +" __global int* out_numRayRigidPairs, \n" +" __global int2* out_rayRigidPairs,\n" +" int maxRayRigidPairs, int numRays)\n" +"{\n" +" int rayIndex = get_global_id(0);\n" +" if(rayIndex >= numRays) return;\n" +" \n" +" b3Vector3 rayFrom = rays[rayIndex].m_from;\n" +" b3Vector3 rayTo = rays[rayIndex].m_to;\n" +" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from);\n" +" \n" +" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" +" \n" +" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n" +" int stackSize = 2;\n" +" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n" +" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n" +" \n" +" while(stackSize)\n" +" {\n" +" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" +" --stackSize;\n" +" \n" +" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" +" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" +" \n" +" //bvhRigidIndex is not used if internal node\n" +" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" +" \n" +" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" +" \n" +" if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) )\n" +" {\n" +" if(isLeaf)\n" +" {\n" +" int2 rayRigidPair;\n" +" rayRigidPair.x = rayIndex;\n" +" rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" +" \n" +" int pairIndex = atomic_inc(out_numRayRigidPairs);\n" +" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" +" }\n" +" \n" +" if(!isLeaf) //Internal node\n" +" {\n" +" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" +" {\n" +" //Error\n" +" }\n" +" else\n" +" {\n" +" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" +" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" +" }\n" +" }\n" +" }\n" +" }\n" +"}\n" ; diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index cb57fbd5b..1ada815c7 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -8,6 +8,11 @@ #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h" + #include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" @@ -20,7 +25,24 @@ struct b3GpuRaycastInternalData cl_context m_context; cl_device_id m_device; cl_command_queue m_q; - cl_kernel m_raytraceKernel; + cl_kernel m_raytraceKernel; + cl_kernel m_raytracePairsKernel; + cl_kernel m_findRayRigidPairIndexRanges; + + b3GpuParallelLinearBvh* m_plbvh; + b3RadixSort32CL* m_radixSorter; + b3FillCL* m_fill; + + //1 element per ray + b3OpenCLArray* m_gpuRays; + b3OpenCLArray* m_gpuHitResults; + b3OpenCLArray* m_firstRayRigidPairIndexPerRay; + b3OpenCLArray* m_numRayRigidPairsPerRay; + + //1 element per (ray index, rigid index) pair + b3OpenCLArray* m_gpuNumRayRigidPairs; + b3OpenCLArray* m_gpuRayRigidPairs; + int m_test; }; @@ -31,7 +53,19 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue m_data->m_device = device; m_data->m_q = q; m_data->m_raytraceKernel = 0; + m_data->m_raytracePairsKernel = 0; + m_data->m_findRayRigidPairIndexRanges = 0; + m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q); + m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q); + m_data->m_fill = new b3FillCL(ctx, device, q); + + m_data->m_gpuRays = new b3OpenCLArray(ctx, q); + m_data->m_gpuHitResults = new b3OpenCLArray(ctx, q); + m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray(ctx, q); + m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray(ctx, q); + m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray(ctx, q); + m_data->m_gpuRayRigidPairs = new b3OpenCLArray(ctx, q); { cl_int errNum=0; @@ -39,6 +73,10 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue b3Assert(errNum==CL_SUCCESS); m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog); b3Assert(errNum==CL_SUCCESS); + m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); + m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); clReleaseProgram(prog); } @@ -48,6 +86,20 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue b3GpuRaycast::~b3GpuRaycast() { clReleaseKernel(m_data->m_raytraceKernel); + clReleaseKernel(m_data->m_raytracePairsKernel); + clReleaseKernel(m_data->m_findRayRigidPairIndexRanges); + + delete m_data->m_plbvh; + delete m_data->m_radixSorter; + delete m_data->m_fill; + + delete m_data->m_gpuRays; + delete m_data->m_gpuHitResults; + delete m_data->m_firstRayRigidPairIndexPerRay; + delete m_data->m_numRayRigidPairsPerRay; + delete m_data->m_gpuNumRayRigidPairs; + delete m_data->m_gpuRayRigidPairs; + delete m_data; } @@ -206,27 +258,32 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3A } ///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) + int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) { - //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); - - b3OpenCLArray gpuRays(m_data->m_context,m_data->m_q); - b3OpenCLArray gpuHitResults(m_data->m_context,m_data->m_q); - + { B3_PROFILE("raycast copyFromHost"); - gpuRays.copyFromHost(rays); - - - gpuHitResults.resize(hitResults.size()); - gpuHitResults.copyFromHost(hitResults); + m_data->m_gpuRays->copyFromHost(rays); + m_data->m_gpuHitResults->copyFromHost(hitResults); + } - - + + int numRays = hitResults.size(); + { + m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); + m_data->m_numRayRigidPairsPerRay->resize(numRays); + + m_data->m_gpuNumRayRigidPairs->resize(1); + m_data->m_gpuRayRigidPairs->resize(numRays * 16); + } + //run kernel + const bool USE_BRUTE_FORCE_RAYCAST = false; + if(USE_BRUTE_FORCE_RAYCAST) { B3_PROFILE("raycast launch1D"); @@ -234,8 +291,8 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align int numRays = rays.size(); launcher.setConst(numRays); - launcher.setBuffer(gpuRays.getBufferCL()); - launcher.setBuffer(gpuHitResults.getBufferCL()); + launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); + launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); @@ -246,11 +303,90 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align launcher.launch1D(numRays); clFinish(m_data->m_q); } + else + { + //printf("broadphase->getAllAabbsGPU().size(): %d \n", broadphase->getAllAabbsGPU().size()); + m_data->m_plbvh->build( broadphase->getAllAabbsGPU() ); + + m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); + + int numRayRigidPairs = -1; + m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); + if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) + { + numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); + m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); + } + + m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct + + //Sort ray-rigid pairs by ray index + { + B3_PROFILE("sort ray-rigid pairs"); + m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray* >(m_data->m_gpuRayRigidPairs) ); + } + + //detect start,count of each ray pair + { + B3_PROFILE("detect ray-rigid pair index ranges"); + + { + B3_PROFILE("reset ray-rigid pair index ranges"); + + m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index + m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); + clFinish(m_data->m_q); + } + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), + + b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), + b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) + }; + + b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numRayRigidPairs); + + launcher.launch1D(numRayRigidPairs); + clFinish(m_data->m_q); + } + + { + B3_PROFILE("ray-rigid intersection"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), + b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), + b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), + b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), + + b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), + + b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) + }; + + b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numRays); + + launcher.launch1D(numRays); + clFinish(m_data->m_q); + } + } + + //copy results { B3_PROFILE("raycast copyToHost"); - gpuHitResults.copyToHost(hitResults); + m_data->m_gpuHitResults->copyToHost(hitResults); } } \ No newline at end of file diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h index 8b0cbd0be..3a5cf44b7 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h @@ -23,8 +23,7 @@ public: void castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData - ); + const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase); diff --git a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl index acf0bdd32..e72d96876 100644 --- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl @@ -337,3 +337,103 @@ __kernel void rayCastKernel( } } + + +__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, + __global int* out_firstRayRigidPairIndexPerRay, + __global int* out_numRayRigidPairsPerRay, + int numRayRigidPairs) +{ + int rayRigidPairIndex = get_global_id(0); + if (rayRigidPairIndex >= numRayRigidPairs) return; + + int rayIndex = rayRigidPairs[rayRigidPairIndex].x; + + atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex); + atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]); +} + +__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, + __global b3RayHit* hitResults, + __global int* firstRayRigidPairIndexPerRay, + __global int* numRayRigidPairsPerRay, + + __global Body* bodies, + __global Collidable* collidables, + __global const b3GpuFace* faces, + __global const ConvexPolyhedronCL* convexShapes, + + __global int2* rayRigidPairs, + int numRays) +{ + int i = get_global_id(0); + if (i >= numRays) return; + + float4 rayFrom = rays[i].m_from; + float4 rayTo = rays[i].m_to; + + hitResults[i].m_hitFraction = 1.f; + + float hitFraction = 1.f; + float4 hitPoint; + float4 hitNormal; + int hitBodyIndex = -1; + + // + for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair) + { + int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i]; + int b = rayRigidPairs[rayRigidPairIndex].y; + + if (hitResults[i].m_hitResult2 == b) continue; + + Body body = bodies[b]; + Collidable rigidCollidable = collidables[body.m_collidableIdx]; + + float4 pos = body.m_pos; + float4 orn = body.m_quat; + + if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL) + { + float4 invPos = (float4)(0,0,0,0); + float4 invOrn = (float4)(0,0,0,0); + float4 rayFromLocal = (float4)(0,0,0,0); + float4 rayToLocal = (float4)(0,0,0,0); + invOrn = qtInvert(orn); + invPos = qtRotate(invOrn, -pos); + rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; + rayToLocal = qtRotate( invOrn, rayTo) + invPos; + rayFromLocal.w = 0.f; + rayToLocal.w = 0.f; + int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces; + int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset; + + if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) + { + hitBodyIndex = b; + hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); + } + } + + if (rigidCollidable.m_shapeType == SHAPE_SPHERE) + { + float radius = rigidCollidable.m_radius; + + if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) + { + hitBodyIndex = b; + hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); + hitNormal = (float4) (hitPoint - bodies[b].m_pos); + } + } + } + + if (hitBodyIndex >= 0) + { + hitResults[i].m_hitFraction = hitFraction; + hitResults[i].m_hitPoint = hitPoint; + hitResults[i].m_hitNormal = normalize(hitNormal); + hitResults[i].m_hitResult0 = hitBodyIndex; + } + +} diff --git a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h index 159da7c17..6257909a4 100644 --- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h +++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h @@ -281,4 +281,101 @@ static const char* rayCastKernelCL= \ " hitResults[i].m_hitResult0 = hitBodyIndex;\n" " }\n" "}\n" +"__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n" +" __global int* out_firstRayRigidPairIndexPerRay,\n" +" __global int* out_numRayRigidPairsPerRay,\n" +" int numRayRigidPairs)\n" +"{\n" +" int rayRigidPairIndex = get_global_id(0);\n" +" if (rayRigidPairIndex >= numRayRigidPairs) return;\n" +" \n" +" int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n" +" \n" +" atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n" +" atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n" +"}\n" +"__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n" +" __global b3RayHit* hitResults, \n" +" __global int* firstRayRigidPairIndexPerRay,\n" +" __global int* numRayRigidPairsPerRay,\n" +" \n" +" __global Body* bodies,\n" +" __global Collidable* collidables,\n" +" __global const b3GpuFace* faces,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" \n" +" __global int2* rayRigidPairs,\n" +" int numRays)\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i >= numRays) return;\n" +" \n" +" float4 rayFrom = rays[i].m_from;\n" +" float4 rayTo = rays[i].m_to;\n" +" \n" +" hitResults[i].m_hitFraction = 1.f;\n" +" \n" +" float hitFraction = 1.f;\n" +" float4 hitPoint;\n" +" float4 hitNormal;\n" +" int hitBodyIndex = -1;\n" +" \n" +" //\n" +" for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n" +" {\n" +" int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n" +" int b = rayRigidPairs[rayRigidPairIndex].y;\n" +" \n" +" if (hitResults[i].m_hitResult2 == b) continue;\n" +" \n" +" Body body = bodies[b];\n" +" Collidable rigidCollidable = collidables[body.m_collidableIdx];\n" +" \n" +" float4 pos = body.m_pos;\n" +" float4 orn = body.m_quat;\n" +" \n" +" if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" +" {\n" +" float4 invPos = (float4)(0,0,0,0);\n" +" float4 invOrn = (float4)(0,0,0,0);\n" +" float4 rayFromLocal = (float4)(0,0,0,0);\n" +" float4 rayToLocal = (float4)(0,0,0,0);\n" +" invOrn = qtInvert(orn);\n" +" invPos = qtRotate(invOrn, -pos);\n" +" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" +" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" +" rayFromLocal.w = 0.f;\n" +" rayToLocal.w = 0.f;\n" +" int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n" +" int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n" +" \n" +" if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" +" {\n" +" hitBodyIndex = b;\n" +" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" +" }\n" +" }\n" +" \n" +" if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" float radius = rigidCollidable.m_radius;\n" +" \n" +" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" +" {\n" +" hitBodyIndex = b;\n" +" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" +" hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n" +" }\n" +" }\n" +" }\n" +" \n" +" if (hitBodyIndex >= 0)\n" +" {\n" +" hitResults[i].m_hitFraction = hitFraction;\n" +" hitResults[i].m_hitPoint = hitPoint;\n" +" hitResults[i].m_hitNormal = normalize(hitNormal);\n" +" hitResults[i].m_hitResult0 = hitBodyIndex;\n" +" }\n" +" \n" +"}\n" ; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index e1665f406..783e44306 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -703,6 +703,6 @@ void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray& ray { this->m_data->m_raycaster->castRays(rays,hitResults, getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(), - m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getInternalData() - ); + m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), + m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap); }