diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index 53995fd65..67a233b84 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -121,6 +121,7 @@ static BroadphaseEntry allBroadphases[]= struct PairBenchInternalData { b3GpuBroadphaseInterface* m_broadphaseGPU; + b3GpuBroadphaseInterface* m_validationBroadphase; cl_kernel m_moveObjectsKernel; cl_kernel m_sineWaveKernel; @@ -507,7 +508,16 @@ void PairBench::initPhysics(const ConstructionInfo& ci) m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"sineWaveKernel",&errNum,pairBenchProg); m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"colorPairsKernel2",&errNum,pairBenchProg); m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,pairsKernelsCL,"updateAabbSimple",&errNum,pairBenchProg); - + + //Method for validating the overlapping pairs requires that the + //reference broadphase does not maintain internal state aside from AABB data. + //That is, overwriting the AABB state in the broadphase using + // b3GpuBroadphaseInterface::getAllAabbsGPU(), + // b3GpuBroadphaseInterface::getSmallAabbIndicesGPU(), and + // b3GpuBroadphaseInterface::getLargeAabbIndicesGPU() + //and then calling b3GpuBroadphaseInterface::calculateOverlappingPairs() should + //always produce the same result regardless of the current state of the broadphase. + m_data->m_validationBroadphase = b3GpuParallelLinearBvhBroadphase::CreateFunc(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue); } if (ci.m_window) @@ -770,6 +780,17 @@ void PairBench::renderScene() m_instancingRenderer->renderScene(); } +struct OverlappingPairSortPredicate +{ + inline bool operator() (const b3Int4& a, const b3Int4& b) const + { + if(a.x != b.x) return (a.x < b.x); + if(a.y != b.y) return (a.y < b.y); + if(a.z != b.z) return (a.z < b.z); + return (a.w < b.w); + } +}; + void PairBench::clientMoveAndDisplay() { //color all objects blue @@ -903,7 +924,10 @@ void PairBench::clientMoveAndDisplay() } } - + + int prealloc = 3*1024*1024; + int maxOverlap = b3Min(prealloc,16*numObjects); + unsigned long dt = 0; if (numObjects) { @@ -912,16 +936,104 @@ void PairBench::clientMoveAndDisplay() B3_PROFILE("calculateOverlappingPairs"); int sz = sizeof(b3Int4)*64*numObjects; - int prealloc = 3*1024*1024; - - int maxOverlap = b3Min(prealloc,16*numObjects); m_data->m_broadphaseGPU->calculateOverlappingPairs(maxOverlap); int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); //printf("numPairs = %d\n", numPairs); dt = cl.getTimeMicroseconds()-dt; + } + const bool VALIDATE_BROADPHASE = false; //Check that overlapping pairs of 2 broadphases are the same + if(numObjects && VALIDATE_BROADPHASE) + { + B3_PROFILE("validate broadphases"); + + { + B3_PROFILE("calculateOverlappingPairs m_validationBroadphase"); + //m_data->m_validationBroadphase->getAllAabbsCPU() = m_data->m_broadphaseGPU->getAllAabbsCPU(); + + m_data->m_validationBroadphase->getAllAabbsGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getAllAabbsGPU() ); + m_data->m_validationBroadphase->getSmallAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getSmallAabbIndicesGPU() ); + m_data->m_validationBroadphase->getLargeAabbIndicesGPU().copyFromOpenCLArray( m_data->m_broadphaseGPU->getLargeAabbIndicesGPU() ); + + m_data->m_validationBroadphase->calculateOverlappingPairs(maxOverlap); + } + + static b3AlignedObjectArray overlappingPairs; + static b3AlignedObjectArray overlappingPairsReference; + m_data->m_broadphaseGPU->getOverlappingPairsGPU().copyToHost(overlappingPairs); + m_data->m_validationBroadphase->getOverlappingPairsGPU().copyToHost(overlappingPairsReference); + + //Reorder pairs so that (pair.x < pair.y) is always true + { + B3_PROFILE("reorder pairs"); + + for(int i = 0; i < overlappingPairs.size(); ++i) + { + b3Int4 pair = overlappingPairs[i]; + if(pair.x > pair.y) + { + b3Swap(pair.x, pair.y); + b3Swap(pair.z, pair.w); + overlappingPairs[i] = pair; + } + } + for(int i = 0; i < overlappingPairsReference.size(); ++i) + { + b3Int4 pair = overlappingPairsReference[i]; + if(pair.x > pair.y) + { + b3Swap(pair.x, pair.y); + b3Swap(pair.z, pair.w); + overlappingPairsReference[i] = pair; + } + } + } + + // + { + B3_PROFILE("Sort overlapping pairs from most to least significant bit"); + + overlappingPairs.quickSort( OverlappingPairSortPredicate() ); + overlappingPairsReference.quickSort( OverlappingPairSortPredicate() ); + } + + //Compare + { + B3_PROFILE("compare pairs"); + + int numPairs = overlappingPairs.size(); + int numPairsReference = overlappingPairsReference.size(); + + bool success = true; + + if(numPairs == numPairsReference) + { + for(int i = 0; i < numPairsReference; ++i) + { + const b3Int4& pairA = overlappingPairs[i]; + const b3Int4& pairB = overlappingPairsReference[i]; + if( pairA.x != pairB.x + || pairA.y != pairB.y + || pairA.z != pairB.z + || pairA.w != pairB.w ) + { + b3Error("Error: one or more overlappingPairs differs from reference.\n"); + success = false; + break; + } + } + } + else + { + b3Error("Error: numPairs %d != numPairsReference %d \n", numPairs, numPairsReference); + success = false; + } + + printf("Broadphase validation: %d \n", success); + } + } if (m_data->m_gui) { diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl index 0c7e1db69..586bb8abb 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl @@ -422,7 +422,8 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global //Set so that it is always greater than the actual common prefixes, and never selected as a parent node. //If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. -//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes. +//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node. +//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve). #define B3_PLBVH_INVALID_COMMON_PREFIX 128 #define B3_PLBVH_ROOT_NODE_MARKER -1 diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h index 37a1e8a5b..1b72803d3 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h @@ -398,7 +398,8 @@ static const char* parallelLinearBvhCL= \ "}\n" "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" -"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n" +"//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n" +"//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n" "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" "#define B3_PLBVH_ROOT_NODE_MARKER -1\n" "#define b3Int64 long\n" diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index 294a20f74..4ef38bd1d 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -39,7 +39,7 @@ struct b3GpuRaycastInternalData b3OpenCLArray* m_firstRayRigidPairIndexPerRay; b3OpenCLArray* m_numRayRigidPairsPerRay; - //1 element per (ray index, rigid index) pair + //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB b3OpenCLArray* m_gpuNumRayRigidPairs; b3OpenCLArray* m_gpuRayRigidPairs; //x == ray index, y == rigid index