From 5991eef7490802b31cb1c3ef2dc2eef621896de8 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Sat, 20 Jul 2013 21:16:24 -0700 Subject: [PATCH] add GPU incremental 3d sap (not enabled by default) --- .../b3GpuSapBroadphase.cpp | 741 +++++++++++++++++- .../BroadphaseCollision/b3GpuSapBroadphase.h | 24 +- .../BroadphaseCollision/b3SapAabb.h | 4 +- .../BroadphaseCollision/kernels/sapFast.cl | 270 +++++++ .../ParallelPrimitives/b3RadixSort32CL.h | 9 +- 5 files changed, 1017 insertions(+), 31 deletions(-) diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 3dc115204..bdf9359c7 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -1,4 +1,6 @@ +bool searchIncremental3dSapOnGpu = true; + #include "b3GpuSapBroadphase.h" #include "Bullet3Common/b3Vector3.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" @@ -27,7 +29,23 @@ m_gpuSmallSortedAabbs(ctx,q), m_sum(ctx,q), m_sum2(ctx,q), m_dst(ctx,q), -m_currentBuffer(-1) +m_currentBuffer(-1), +m_objectMinMaxIndexGPUaxis0(ctx,q), +m_objectMinMaxIndexGPUaxis1(ctx,q), +m_objectMinMaxIndexGPUaxis2(ctx,q), +m_objectMinMaxIndexGPUaxis0prev(ctx,q), +m_objectMinMaxIndexGPUaxis1prev(ctx,q), +m_objectMinMaxIndexGPUaxis2prev(ctx,q), +m_sortedAxisGPU0(ctx,q), +m_sortedAxisGPU1(ctx,q), +m_sortedAxisGPU2(ctx,q), +m_sortedAxisGPU0prev(ctx,q), +m_sortedAxisGPU1prev(ctx,q), +m_sortedAxisGPU2prev(ctx,q), +m_addedHostPairsGPU(ctx,q), +m_removedHostPairsGPU(ctx,q), +m_addedCountGPU(ctx,q), +m_removedCountGPU(ctx,q) { const char* sapSrc = sapCL; const char* sapFastSrc = sapFastCL; @@ -36,7 +54,8 @@ m_currentBuffer(-1) cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH); b3Assert(errNum==CL_SUCCESS); - cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"",B3_BROADPHASE_SAPFAST_PATH); + //cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"",B3_BROADPHASE_SAPFAST_PATH); + cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,0,&errNum,"",B3_BROADPHASE_SAPFAST_PATH,true); b3Assert(errNum==CL_SUCCESS); #ifndef __APPLE__ m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context,m_device,m_queue); @@ -54,6 +73,9 @@ m_currentBuffer(-1) m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "prepareSumVarianceKernel",&errNum,sapProg ); b3Assert(errNum==CL_SUCCESS); + m_computePairsIncremental3dSapKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsIncremental3dSapKernel",&errNum,sapFastProg ); + b3Assert(errNum==CL_SUCCESS); + #if 0 @@ -89,6 +111,7 @@ b3GpuSapBroadphase::~b3GpuSapBroadphase() clReleaseKernel(m_sapKernel); clReleaseKernel(m_sap2Kernel); clReleaseKernel(m_prepareSumVarianceKernel); + clReleaseKernel(m_computePairsIncremental3dSapKernel); } @@ -125,48 +148,693 @@ void b3GpuSapBroadphase::init3dSap() for (int buf=0;buf<2;buf++) { int totalNumAabbs = m_allAabbsCPU.size(); - m_sortedAxisCPU[axis][buf].resize(totalNumAabbs); + int numEndPoints = 2*totalNumAabbs; + m_sortedAxisCPU[axis][buf].resize(numEndPoints); if (buf==m_currentBuffer) { for (int i=0;iexecuteHost(m_sortedAxisCPU[axis][m_currentBuffer]); + } + + for (int axis=0;axis<3;axis++) + { + int totalNumAabbs = m_allAabbsCPU.size(); + int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); + m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints); + for (int i=0;i(const b3Int2& a,const b3Int2& b) +{ + return a.x > b.x || (a.x == b.x && a.y > b.y); +}; + +b3AlignedObjectArray addedHostPairs; +b3AlignedObjectArray removedHostPairs; + +b3AlignedObjectArray preAabbs; + void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() { - b3Assert(m_currentBuffer>=0); - if (m_currentBuffer<0) - return; + static int framepje = 0; + //printf("framepje=%d\n",framepje++); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - for (int axis=0;axis<3;axis++) + B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap"); + + addedHostPairs.resize(0); + removedHostPairs.resize(0); + + b3Assert(m_currentBuffer>=0); + { - for (int buf=0;buf<2;buf++) + preAabbs.resize(m_allAabbsCPU.size()); + for (int i=0;i allPairs; + { + B3_PROFILE("m_overlappingPairs.copyToHost"); + m_overlappingPairs.copyToHost(allPairs); + } + if (0) + { + { + printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n", + m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1],m_allAabbsCPU[40].m_min[2], + m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1],m_allAabbsCPU[40].m_max[2]); + } + + { + printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n", + m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1],m_allAabbsCPU[53].m_min[2], + m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1],m_allAabbsCPU[53].m_max[2]); + } + + + { + b3Int2 newPair; + newPair.x = 40; + newPair.y = 53; + int index = allPairs.findBinarySearch(newPair); + printf("hasPair(40,53)=%d out of %d\n",index, allPairs.size()); + + { + int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max,(const b3Vector3&)m_allAabbsCPU[53].m_min,(const b3Vector3&)m_allAabbsCPU[53].m_max); + printf("overlap=%d\n",overlap); + } + + if (preAabbs.size()) + { + int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max,(const b3Vector3&)preAabbs[53].m_min,(const b3Vector3&)preAabbs[53].m_max); + printf("prevoverlap=%d\n",prevOverlap); + } else + { + printf("unknown prevoverlap\n"); + } + + } + } + + + if (0) + { + for (int i=0;iexecuteHost(m_sortedAxisCPU[axis][m_currentBuffer]); + } + + if (0) + { + for (int axis=0;axis<3;axis++) + { + //printf("axis %d\n",axis); + for (int i=0;i m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + + // b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + //b3Assert(overlap==overlap2); + + + + if (dmin<0) + { + if (overlap && !prevOverlap) + { + //add a pair + b3Int2 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + addedHostPairs.push_back(newPair); + } + } + else + { + if (!overlap && prevOverlap) + { + + //remove a pair + b3Int2 removedPair; + if (i<=otherIndex) + { + removedPair.x = i; + removedPair.y = otherIndex; + } else + { + removedPair.x = otherIndex; + removedPair.y = i; + } + removedHostPairs.push_back(removedPair); + } + }//otherisMax + }//if (dmin<0) + }//if (otherIndex!=i) + }//for (int j= + } + + if (dmax!=0) + { + int stepMax = dmax<0 ? -1 : 1; + for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax) + { + int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; + int otherIndex = otherIndex2/2; + if (otherIndex!=i) + { + bool otherIsMin = ((otherIndex2&1)==0); + //if (otherIsMin) + { + //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); + //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); + + bool overlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + //b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + if (dmax>0) + { + if (overlap && !prevOverlap) + { + //add a pair + b3Int2 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + addedHostPairs.push_back(newPair); + + } + } + else + { + if (!overlap && prevOverlap) + { + //if (otherIndex2&1==0) -> min? + //remove a pair + b3Int2 removedPair; + if (i<=otherIndex) + { + removedPair.x = i; + removedPair.y = otherIndex; + } else + { + removedPair.x = otherIndex; + removedPair.y = i; + } + removedHostPairs.push_back(removedPair); + + } + } + + }//if (dmin<0) + }//if (otherIndex!=i) + }//for (int j= + } + }//for (int otherbuffer + }//for (int axis=0; + }//for (int i=0;i removedPositions; + + { + B3_PROFILE("actual removing"); + for (int i=0;i actualAddedPairs; + + { + B3_PROFILE("actual adding"); + for (int i=0;i addedHostPairs; + //b3AlignedObjectArray removedHostPairs; + { + B3_PROFILE("m_overlappingPairs.copyFromHost"); + m_overlappingPairs.copyFromHost(allPairs); + } } @@ -177,8 +845,8 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) { //test - //if (m_currentBuffer>=0) - // calculateOverlappingPairsHostIncremental3Sap(); + if (m_currentBuffer>=0) + return calculateOverlappingPairsHostIncremental3Sap(); b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); m_allAabbsGPU.copyToHost(m_allAabbsCPU); @@ -203,6 +871,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) int axis=0; { + B3_PROFILE("CPU compute best variance axis"); b3Vector3 s(0,0,0),s2(0,0,0); int numRigidBodies = numSmallAabbs; @@ -250,8 +919,17 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) (b3Vector3&)m_smallAabbsCPU[j].m_min,(b3Vector3&)m_smallAabbsCPU[j].m_max)) { b3Int2 pair; - pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array - pair.y = m_smallAabbsCPU[j].m_minIndices[3]; + int a = m_smallAabbsCPU[i].m_minIndices[3]; + int b = m_smallAabbsCPU[j].m_minIndices[3]; + if (a<=b) + { + pair.x = a;//store the original index in the unsorted aabb array + pair.y = b; + } else + { + pair.x = b;//store the original index in the unsorted aabb array + pair.y = a; + } hostPairs.push_back(pair); } } @@ -272,8 +950,18 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) (b3Vector3&)m_largeAabbsCPU[j].m_min,(b3Vector3&)m_largeAabbsCPU[j].m_max)) { b3Int2 pair; - pair.x = m_largeAabbsCPU[j].m_minIndices[3]; - pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array + int a = m_largeAabbsCPU[j].m_minIndices[3]; + int b = m_smallAabbsCPU[i].m_minIndices[3]; + if (a<=b) + { + pair.x = a; + pair.y = b;//store the original index in the unsorted aabb array + } else + { + pair.x = b; + pair.y = a;//store the original index in the unsorted aabb array + } + hostPairs.push_back(pair); } } @@ -293,7 +981,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) m_overlappingPairs.resize(0); } - //init3dSap(); + init3dSap(); } @@ -312,6 +1000,9 @@ void b3GpuSapBroadphase::reset() void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { + //if (m_currentBuffer>=0) + // return calculateOverlappingPairsHostIncremental3Sap(); + B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); int axis = 0; @@ -363,7 +1054,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) if (m_prefixScanFloat4) { - B3_PROFILE("compute best variance axis"); + B3_PROFILE("GPU compute best variance axis"); int numSmallAabbs = m_smallAabbsGPU.size(); if (m_dst.size()!=(numSmallAabbs+1)) { @@ -609,7 +1300,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) }//B3_PROFILE("GPU_RADIX SORT"); - + //init3dSap(); } void b3GpuSapBroadphase::writeAabbsToGpu() diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h index 262ad7ce8..92c8d6169 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h @@ -7,7 +7,7 @@ class b3Vector3; #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "b3SapAabb.h" - +#include "Bullet3Common/b3Int2.h" class b3GpuSapBroadphase @@ -22,11 +22,33 @@ class b3GpuSapBroadphase cl_kernel m_sapKernel; cl_kernel m_sap2Kernel; cl_kernel m_prepareSumVarianceKernel; + cl_kernel m_computePairsIncremental3dSapKernel; class b3RadixSort32CL* m_sorter; ///test for 3d SAP b3AlignedObjectArray m_sortedAxisCPU[3][2]; + b3AlignedObjectArray m_objectMinMaxIndexCPU[3][2]; + b3OpenCLArray m_objectMinMaxIndexGPUaxis0; + b3OpenCLArray m_objectMinMaxIndexGPUaxis1; + b3OpenCLArray m_objectMinMaxIndexGPUaxis2; + b3OpenCLArray m_objectMinMaxIndexGPUaxis0prev; + b3OpenCLArray m_objectMinMaxIndexGPUaxis1prev; + b3OpenCLArray m_objectMinMaxIndexGPUaxis2prev; + + b3OpenCLArray m_sortedAxisGPU0; + b3OpenCLArray m_sortedAxisGPU1; + b3OpenCLArray m_sortedAxisGPU2; + b3OpenCLArray m_sortedAxisGPU0prev; + b3OpenCLArray m_sortedAxisGPU1prev; + b3OpenCLArray m_sortedAxisGPU2prev; + + + b3OpenCLArray m_addedHostPairsGPU; + b3OpenCLArray m_removedHostPairsGPU; + b3OpenCLArray m_addedCountGPU; + b3OpenCLArray m_removedCountGPU; + int m_currentBuffer; public: diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h b/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h index 81661db6f..7f971ba54 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h @@ -1,7 +1,9 @@ #ifndef B3_SAP_AABB_H #define B3_SAP_AABB_H -struct b3SapAabb +#include "Bullet3Common/b3Scalar.h" + +B3_ATTRIBUTE_ALIGNED16(struct) b3SapAabb { union { diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl index 4bf018384..93fd929e3 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl @@ -30,6 +30,22 @@ typedef struct }; } btAabbCL; +typedef struct +{ + union + { + unsigned int m_key; + unsigned int x; + }; + + union + { + unsigned int m_value; + unsigned int y; + + }; +}b3SortData; + /// conservative test for overlap between two aabbs bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2); @@ -46,6 +62,260 @@ bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2) return overlap; } +__kernel void computePairsIncremental3dSapKernel( __global const uint2* objectMinMaxIndexGPUaxis0, + __global const uint2* objectMinMaxIndexGPUaxis1, + __global const uint2* objectMinMaxIndexGPUaxis2, + __global const uint2* objectMinMaxIndexGPUaxis0prev, + __global const uint2* objectMinMaxIndexGPUaxis1prev, + __global const uint2* objectMinMaxIndexGPUaxis2prev, + __global const b3SortData* sortedAxisGPU0, + __global const b3SortData* sortedAxisGPU1, + __global const b3SortData* sortedAxisGPU2, + __global const b3SortData* sortedAxisGPU0prev, + __global const b3SortData* sortedAxisGPU1prev, + __global const b3SortData* sortedAxisGPU2prev, + __global int2* addedHostPairsGPU, + __global int2* removedHostPairsGPU, + volatile __global int* addedHostPairsCount, + volatile __global int* removedHostPairsCount, + int maxCapacity, + int numObjects) +{ + int i = get_global_id(0); + if (i>=numObjects) + return; + + __global const uint2* objectMinMaxIndexGPU[3][2]; + objectMinMaxIndexGPU[0][0]=objectMinMaxIndexGPUaxis0; + objectMinMaxIndexGPU[1][0]=objectMinMaxIndexGPUaxis1; + objectMinMaxIndexGPU[2][0]=objectMinMaxIndexGPUaxis2; + objectMinMaxIndexGPU[0][1]=objectMinMaxIndexGPUaxis0prev; + objectMinMaxIndexGPU[1][1]=objectMinMaxIndexGPUaxis1prev; + objectMinMaxIndexGPU[2][1]=objectMinMaxIndexGPUaxis2prev; + + __global const b3SortData* sortedAxisGPU[3][2]; + sortedAxisGPU[0][0] = sortedAxisGPU0; + sortedAxisGPU[1][0] = sortedAxisGPU1; + sortedAxisGPU[2][0] = sortedAxisGPU2; + sortedAxisGPU[0][1] = sortedAxisGPU0prev; + sortedAxisGPU[1][1] = sortedAxisGPU1prev; + sortedAxisGPU[2][1] = sortedAxisGPU2prev; + + int m_currentBuffer = 0; + + for (int axis=0;axis<3;axis++) + { + //int i = checkObjects[a]; + + unsigned int curMinIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].x; + unsigned int curMaxIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].y; + unsigned int prevMinIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].x; + int dmin = curMinIndex - prevMinIndex; + + unsigned int prevMaxIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].y; + + int dmax = curMaxIndex - prevMaxIndex; + + for (int otherbuffer = 0;otherbuffer<2;otherbuffer++) + { + if (dmin!=0) + { + int stepMin = dmin<0 ? -1 : 1; + for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin) + { + int otherIndex2 = sortedAxisGPU[axis][otherbuffer][j].y; + int otherIndex = otherIndex2/2; + if (otherIndex!=i) + { + bool otherIsMax = ((otherIndex2&1)!=0); + + if (otherIsMax) + { + + bool overlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) || + (objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + + // b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) || + (objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + //b3Assert(overlap==overlap2); + + + + if (dmin<0) + { + if (overlap && !prevOverlap) + { + //add a pair + int2 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + + { + int curPair = atomic_inc(addedHostPairsCount); + if (curPair objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) || + (objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + //b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) || + (objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + if (dmax>0) + { + if (overlap && !prevOverlap) + { + //add a pair + int2 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + { + int curPair = atomic_inc(addedHostPairsCount); + if (curPair min? + //remove a pair + int2 removedPair; + if (i<=otherIndex) + { + removedPair.x = i; + removedPair.y = otherIndex; + } else + { + removedPair.x = otherIndex; + removedPair.y = i; + } + { + int curPair = atomic_inc(removedHostPairsCount); + if (curPair