diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h index 29c3e14bb..81d455044 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h @@ -107,6 +107,9 @@ public: m_mergedAabb(context, queue), m_leafNodeAabbs(context, queue) { + m_rootNodeIndex.resize(1); + + // const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h @@ -150,12 +153,18 @@ public: int numLeaves = worldSpaceAabbs.size(); //Number of leaves in the BVH == Number of rigid body AABBs int numInternalNodes = numLeaves - 1; - if(numLeaves < 2) return; - + // + m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs); + + if(numLeaves < 2) + { + int rootNodeIndex = numLeaves - 1; + m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1); + return; + } + // { - m_rootNodeIndex.resize(1); - m_internalNodeAabbs.resize(numInternalNodes); m_internalNodeLeafIndexRanges.resize(numInternalNodes); m_internalNodeChildNodes.resize(numInternalNodes); @@ -164,12 +173,8 @@ public: m_leafNodeParentNodes.resize(numLeaves); m_mortonCodesAndAabbIndicies.resize(numLeaves); m_mergedAabb.resize(numLeaves); - m_leafNodeAabbs.resize(numLeaves); } - // - m_leafNodeAabbs.copyFromOpenCLArray(worldSpaceAabbs); - //Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) ) //The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes) int numLevels = 0; @@ -184,8 +189,6 @@ public: //If the number of nodes is not a power of 2(as in, can be expressed as 2^N where N is an integer), then there is 1 additional level if( ~(1 << mostSignificantBit) & numLeaves ) numLevels++; - - if(0) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit); } //Determine number of internal nodes per level, use prefix sum to get offsets of each level, and send to GPU @@ -230,18 +233,6 @@ public: m_firstIndexOffsetPerLevelCpu[i] -= m_numNodesPerLevelCpu[i]; } - if(0) - { - int numInternalNodes = 0; - for(int i = 0; i < numLevels; ++i) - if(i < numLevels - 1) numInternalNodes += m_numNodesPerLevelCpu[i]; - printf("numInternalNodes: %d\n", numInternalNodes); - - for(int i = 0; i < numLevels; ++i) - printf("numNodes, offset[%d]: %d, %d \n", i, m_numNodesPerLevelCpu[i], m_firstIndexOffsetPerLevelCpu[i]); - printf("\n"); - } - //Copy to GPU m_numNodesPerLevelGpu.copyFromHost(m_numNodesPerLevelCpu, false); m_firstIndexOffsetPerLevelGpu.copyFromHost(m_firstIndexOffsetPerLevelCpu, false); @@ -338,17 +329,6 @@ public: launcher.launch1D(numInternalNodes); clFinish(m_queue); - - if(0) - { - static b3AlignedObjectArray internalNodeChildNodes; - m_internalNodeChildNodes.copyToHost(internalNodeChildNodes, false); - clFinish(m_queue); - - for(int i = 0; i < numInternalNodes; ++i) - printf("ch[%d]: %d, %d\n", i, internalNodeChildNodes[i].x, internalNodeChildNodes[i].y); - printf("\n"); - } } //For each internal node, check children to get its AABB; start from the @@ -395,47 +375,6 @@ public: launcher.launch1D(numLeaves); } clFinish(m_queue); - - if(0) - { - static b3AlignedObjectArray leafIndexRanges; - m_internalNodeLeafIndexRanges.copyToHost(leafIndexRanges, false); - clFinish(m_queue); - - for(int i = 0; i < numInternalNodes; ++i) - //if(leafIndexRanges[i].x == -1 || leafIndexRanges[i].y == -1) - printf("leafIndexRanges[%d]: %d, %d\n", i, leafIndexRanges[i].x, leafIndexRanges[i].y); - printf("\n"); - } - - if(0) - { - static b3AlignedObjectArray rigidAabbs; - worldSpaceAabbs.copyToHost(rigidAabbs, false); - clFinish(m_queue); - - b3SapAabb actualRootAabb; - actualRootAabb.m_minVec = b3MakeVector3(B3_LARGE_FLOAT, B3_LARGE_FLOAT, B3_LARGE_FLOAT); - actualRootAabb.m_maxVec = b3MakeVector3(-B3_LARGE_FLOAT, -B3_LARGE_FLOAT, -B3_LARGE_FLOAT); - for(int i = 0; i < rigidAabbs.size(); ++i) - { - actualRootAabb.m_minVec.setMin(rigidAabbs[i].m_minVec); - actualRootAabb.m_maxVec.setMax(rigidAabbs[i].m_maxVec); - } - - b3SapAabb rootAabb = m_internalNodeAabbs.at(0); - b3SapAabb mergedAABB = m_mergedAabb.at(0); - - printf("mergedAABBMin: %f, %f, %f \n", mergedAABB.m_minVec.x, mergedAABB.m_minVec.y, mergedAABB.m_minVec.z); - printf("actualRootMin: %f, %f, %f \n", actualRootAabb.m_minVec.x, actualRootAabb.m_minVec.y, actualRootAabb.m_minVec.z); - printf("kernelRootMin: %f, %f, %f \n", rootAabb.m_minVec.x, rootAabb.m_minVec.y, rootAabb.m_minVec.z); - - printf("mergedAABBMax: %f, %f, %f \n", mergedAABB.m_maxVec.x, mergedAABB.m_maxVec.y, mergedAABB.m_maxVec.z); - printf("actualRootMax: %f, %f, %f \n", actualRootAabb.m_maxVec.x, actualRootAabb.m_maxVec.y, actualRootAabb.m_maxVec.z); - printf("kernelRootMax: %f, %f, %f \n", rootAabb.m_maxVec.x, rootAabb.m_maxVec.y, rootAabb.m_maxVec.z); - - printf("\n"); - } } } @@ -453,6 +392,8 @@ public: int reset = 0; out_numPairs.copyFromHostPointer(&reset, 1); + if( m_leafNodeAabbs.size() < 2 ) return; + { B3_PROFILE("PLBVH calculateOverlappingPairs"); @@ -510,11 +451,12 @@ public: int reset = 0; out_numRayRigidPairs.copyFromHostPointer(&reset, 1); + if( m_leafNodeAabbs.size() < 1 ) return; + b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index 40eabfd6c..3b634328a 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -403,69 +403,39 @@ b3Vector3 b3Vector3_normalize(b3Vector3 v) b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } - -/** - -int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) +int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) { - // not functional -- need to fix - - //aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ) - //t_min is the first intersection, t_max is the second intersection - b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection; - int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y) + //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ). + //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane. + // + //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane + //and min.x will be the far plane; otherwise, it is reversed. + // + //In order for there to be a collision, the t_min and t_max of each pair must overlap. + //This can be tested for by selecting the highest t_min and lowest t_max and comparing them. - //select(b, a, condition) == condition ? a : b - b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection; - b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection; + int4 isNegative = isless( rayNormalizedDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y) + + //When using vector types, the select() function checks the most signficant bit, + //but isless() sets the least significant bit. + isNegative <<= 31; + //select(b, a, condition) == condition ? a : b + //When using select() with vector types, (condition[i]) is true if its most significant bit is 1 + b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection; + b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection; + b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); + b3Scalar t_max_final = rayLength; //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) - //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN + //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN. t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); return (t_min_final <= t_max_final); } -**/ - -void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection, - b3Scalar planeMin, b3Scalar planeMax, - b3Scalar* out_t_min, b3Scalar* out_t_max) -{ - if(rayNormalizedDirection < 0.0f) - { - //max is closer, min is farther - *out_t_min = (planeMax - rayStart) / rayNormalizedDirection; - *out_t_max = (planeMin - rayStart) / rayNormalizedDirection; - } - else - { - //min is closer, max is farther - *out_t_min = (planeMin - rayStart) / rayNormalizedDirection; - *out_t_max = (planeMax - rayStart) / rayNormalizedDirection; - } -} -int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) -{ - b3Scalar t_min_x, t_min_y, t_min_z; - b3Scalar t_max_x, t_max_y, t_max_z; - - rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x); - rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y); - rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z); - - b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) ); - t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) ); - - return (t_min_final <= t_max_final); -} __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, @@ -484,10 +454,13 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, int rayIndex = get_global_id(0); if(rayIndex >= numRays) return; + // b3Vector3 rayFrom = rays[rayIndex].m_from; b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from); - + b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); + b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); + + // int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; int stackSize = 1; @@ -505,7 +478,7 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) ) + if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) ) { if(isLeaf) { diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index c83783901..b894e2a11 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -383,64 +383,38 @@ static const char* parallelLinearBvhCL= \ "}\n" "b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" "b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" -"/**\n" -"int rayIntersectsAabb_optimized(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" +"int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" "{\n" -" // not functional -- need to fix\n" -" //aabb is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} )\n" -" //t_min is the first intersection, t_max is the second intersection\n" -" b3Vector3 inverseRayDirection = (b3Vector3){1.0f, 1.0f, 1.0f, 0.0f} / rayNormalizedDirection;\n" -" int4 sign = isless( inverseRayDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y)\n" +" //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n" +" //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n" +" //\n" +" //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n" +" //and min.x will be the far plane; otherwise, it is reversed.\n" +" //\n" +" //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n" +" //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n" " \n" +" int4 isNegative = isless( rayNormalizedDirection, (b3Vector3){0.0f, 0.0f, 0.0f, 0.0f} ); //isless(x,y) returns (x < y)\n" +" \n" +" //When using vector types, the select() function checks the most signficant bit, \n" +" //but isless() sets the least significant bit.\n" +" isNegative <<= 31;\n" " //select(b, a, condition) == condition ? a : b\n" -" b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, sign) - rayFrom ) * inverseRayDirection;\n" -" b3Vector3 t_max = ( select(aabb.m_min, aabb.m_max, (int4){1,1,1,1} - sign) - rayFrom ) * inverseRayDirection;\n" +" //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n" +" b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" +" b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" +" \n" " b3Scalar t_min_final = 0.0f;\n" -" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" b3Scalar t_max_final = rayLength;\n" " \n" " //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" " //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" -" //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN\n" +" //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n" " t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" " t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" " \n" " return (t_min_final <= t_max_final);\n" "}\n" -"**/\n" -"void rayPlanePairTest(b3Scalar rayStart, b3Scalar rayNormalizedDirection,\n" -" b3Scalar planeMin, b3Scalar planeMax, \n" -" b3Scalar* out_t_min, b3Scalar* out_t_max)\n" -"{\n" -" if(rayNormalizedDirection < 0.0f)\n" -" {\n" -" //max is closer, min is farther\n" -" *out_t_min = (planeMax - rayStart) / rayNormalizedDirection;\n" -" *out_t_max = (planeMin - rayStart) / rayNormalizedDirection;\n" -" }\n" -" else\n" -" {\n" -" //min is closer, max is farther\n" -" *out_t_min = (planeMin - rayStart) / rayNormalizedDirection;\n" -" *out_t_max = (planeMax - rayStart) / rayNormalizedDirection;\n" -" }\n" -"}\n" -"int rayIntersectsAabb(b3Vector3 rayFrom, b3Vector3 rayTo, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" -"{\n" -" b3Scalar t_min_x, t_min_y, t_min_z;\n" -" b3Scalar t_max_x, t_max_y, t_max_z;\n" -" \n" -" rayPlanePairTest(rayFrom.x, rayNormalizedDirection.x, aabb.m_min.x, aabb.m_max.x, &t_min_x, &t_max_x);\n" -" rayPlanePairTest(rayFrom.y, rayNormalizedDirection.y, aabb.m_min.y, aabb.m_max.y, &t_min_y, &t_max_y);\n" -" rayPlanePairTest(rayFrom.z, rayNormalizedDirection.z, aabb.m_min.z, aabb.m_max.z, &t_min_z, &t_max_z);\n" -" \n" -" b3Scalar t_min_final = 0.0f;\n" -" b3Scalar t_max_final = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" -" \n" -" t_min_final = fmax( t_min_z, fmax(t_min_y, fmax(t_min_x, t_min_final)) );\n" -" t_max_final = fmin( t_max_z, fmin(t_max_y, fmin(t_max_x, t_max_final)) );\n" -" \n" -" return (t_min_final <= t_max_final);\n" -"}\n" "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" " __global int* rootNodeIndex, \n" " __global int2* internalNodeChildIndices, \n" @@ -457,10 +431,12 @@ static const char* parallelLinearBvhCL= \ " int rayIndex = get_global_id(0);\n" " if(rayIndex >= numRays) return;\n" " \n" +" //\n" " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" " b3Vector3 rayTo = rays[rayIndex].m_to;\n" -" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rays[rayIndex].m_to - rays[rayIndex].m_from);\n" -" \n" +" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" +" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" +" //\n" " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" " \n" " int stackSize = 1;\n" @@ -478,7 +454,7 @@ static const char* parallelLinearBvhCL= \ " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" " \n" " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" -" if( rayIntersectsAabb(rayFrom, rayTo, rayNormalizedDirection, bvhNodeAabb) )\n" +" if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n" " {\n" " if(isLeaf)\n" " {\n"