From 3e8b183587e20fb8675e9c6591044ebcbc5b52db Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 29 Jan 2014 15:20:20 -0800 Subject: [PATCH] Avoid breaking up the clipHullHull kernel, it ruins performance. Unfortunately, Mac OSX still requires it. Use indices instead of copies for small/large aabbs in broadphase (grid / sap) --- Demos3/GpuDemos/broadphase/PairBench.cpp | 2 +- .../b3GpuGridBroadphase.cpp | 97 ++++--------------- .../BroadphaseCollision/b3GpuGridBroadphase.h | 9 +- .../b3GpuSapBroadphase.cpp | 2 +- .../kernels/gridBroadphase.cl | 26 ++--- .../kernels/gridBroadphaseKernels.h | 26 ++--- .../BroadphaseCollision/kernels/sap.cl | 11 ++- .../BroadphaseCollision/kernels/sapKernels.h | 11 ++- .../b3ConvexHullContact.cpp | 10 +- .../kernels/satClipHullContacts.cl | 17 +++- .../kernels/satClipHullContacts.h | 15 ++- 11 files changed, 100 insertions(+), 126 deletions(-) diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index 3ca624e05..3a908a7eb 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -104,7 +104,7 @@ static PairBench* sPairDemo = 0; #define BP_COMBO_INDEX 123 -static int curSelectedBroadphase = 5; +static int curSelectedBroadphase = 0; static BroadphaseEntry allBroadphases[]= { {"Gpu Grid",b3GpuGridBroadphase::CreateFunc}, diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp index 276d4630f..bedd8d4d6 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp @@ -33,8 +33,8 @@ b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_ m_device(device), m_queue(q), m_allAabbsGPU1(ctx,q), -m_largeAabbsGPU(ctx,q), -m_smallAabbsGPU(ctx,q), +m_smallAabbsMappingGPU(ctx,q), +m_largeAabbsMappingGPU(ctx,q), m_gpuPairs(ctx,q), m_hashGpu(ctx,q), @@ -117,8 +117,10 @@ void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3 aabb.m_maxVec = aabbMax; aabb.m_minIndices[3] = userPtr; aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; + m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); + m_allAabbsCPU1.push_back(aabb); - m_smallAabbsCPU.push_back(aabb); + } void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask) { @@ -127,8 +129,9 @@ void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Ve aabb.m_maxVec = aabbMax; aabb.m_minIndices[3] = userPtr; aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; + m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); + m_allAabbsCPU1.push_back(aabb); - m_largeAabbsCPU.push_back(aabb); } void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) @@ -151,88 +154,25 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) return; } - //sync small AABBs - { - - - bool syncOnHost = false; - if (syncOnHost) - { - m_allAabbsGPU1.copyToHost(this->m_allAabbsCPU1); - b3AlignedObjectArray hostSmallAabbs; - m_smallAabbsGPU.copyToHost(hostSmallAabbs); - int numSmallAabbs = hostSmallAabbs.size(); - for (int i=0;i=numObjects) - // return; - int src = hostSmallAabbs[i].m_signedMaxIndices[3]; - hostSmallAabbs[i] = m_allAabbsCPU1[src]; - hostSmallAabbs[i].m_signedMaxIndices[3] = src; - } - } - m_smallAabbsGPU.copyFromHost(hostSmallAabbs); - } else - { - int numSmallAabbs = m_smallAabbsGPU.size(); - if (numSmallAabbs) - { - B3_PROFILE("copyAabbsKernelSmall"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ), - b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), - }; - - b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numSmallAabbs ); - int num = numSmallAabbs; - launcher.launch1D( num); - } - } - - } - - //sync large AABBs - { - int numLargeAabbs = m_largeAabbsGPU.size(); - - if (numLargeAabbs) - { - B3_PROFILE("copyAabbsKernelLarge"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ), - b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()), - }; - - b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numLargeAabbs ); - int num = numLargeAabbs; - launcher.launch1D( num); - clFinish(m_queue); - } - } - int numSmallAabbs = m_smallAabbsGPU.size(); + int numSmallAabbs = m_smallAabbsMappingGPU.size(); b3OpenCLArray pairCount(m_context,m_queue); pairCount.push_back(0); m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody); { - int numLargeAabbs = m_largeAabbsGPU.size(); + int numLargeAabbs = m_largeAabbsMappingGPU.size(); if (numLargeAabbs && numSmallAabbs) { B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ), - b3BufferInfoCL( m_smallAabbsGPU.getBufferCL() ), + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ), + b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), + b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), b3BufferInfoCL( m_gpuPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())}; b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); @@ -245,6 +185,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); int numPairs = pairCount.at(0); + if (numPairs >maxPairs) { b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); @@ -264,7 +205,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) B3_PROFILE("kCalcHashAABB"); b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB"); launch.setConst(numSmallAabbs); - launch.setBuffer(m_smallAabbsGPU.getBufferCL()); + launch.setBuffer(m_allAabbsGPU1.getBufferCL()); + launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(this->m_paramsGPU.getBufferCL()); launch.launch1D(numSmallAabbs); @@ -307,7 +249,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs"); launch.setConst(numSmallAabbs); - launch.setBuffer(m_smallAabbsGPU.getBufferCL()); + launch.setBuffer(m_allAabbsGPU1.getBufferCL()); + launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL()); @@ -397,8 +340,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs) void b3GpuGridBroadphase::writeAabbsToGpu() { m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1); - m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU); - m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU); + m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); + m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); } diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h index 752a8fa81..4dd5c3a3c 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h @@ -32,10 +32,11 @@ protected: b3OpenCLArray m_allAabbsGPU1; b3AlignedObjectArray m_allAabbsCPU1; - b3OpenCLArray m_smallAabbsGPU; - b3AlignedObjectArray m_smallAabbsCPU; - b3OpenCLArray m_largeAabbsGPU; - b3AlignedObjectArray m_largeAabbsCPU; + b3OpenCLArray m_smallAabbsMappingGPU; + b3AlignedObjectArray m_smallAabbsMappingCPU; + + b3OpenCLArray m_largeAabbsMappingGPU; + b3AlignedObjectArray m_largeAabbsMappingCPU; b3AlignedObjectArray m_hostPairs; b3OpenCLArray m_gpuPairs; diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 8ee378e6d..6942719d2 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -1149,7 +1149,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ), b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), - b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), + b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl index d7d71250e..ded4796d3 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl @@ -22,15 +22,15 @@ int4 getGridPos(float4 worldPos, __global float4* pParams) // calculate grid hash value for each body using its AABB -__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams ) +__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams ) { int index = get_global_id(0); if(index >= numObjects) { return; } - float4 bbMin = pAABB[index*2]; - float4 bbMax = pAABB[index*2 + 1]; + float4 bbMin = allpAABB[smallAabbMapping[index]*2]; + float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1]; float4 pos; pos.x = (bbMin.x + bbMax.x) * 0.5f; pos.y = (bbMin.y + bbMax.y) * 0.5f; @@ -102,7 +102,8 @@ void findPairsInCell( int numObjects, int index, __global int2* pHash, __global int* pCellStart, - __global float4* pAABB, + __global float4* allpAABB, + __global const int* smallAabbMapping, __global float4* pParams, volatile __global int* pairCount, __global int4* pPairBuff2, @@ -121,8 +122,8 @@ void findPairsInCell( int numObjects, // iterate over bodies in this cell int2 sortedData = pHash[index]; int unsorted_indx = sortedData.y; - float4 min0 = pAABB[unsorted_indx*2 + 0]; - float4 max0 = pAABB[unsorted_indx*2 + 1]; + float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; + float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; int handleIndex = as_int(min0.w); int bucketEnd = bucketStart + maxBodiesPerCell; @@ -138,8 +139,8 @@ void findPairsInCell( int numObjects, //if (unsorted_indx2 < unsorted_indx) // check not colliding with self if (unsorted_indx2 != unsorted_indx) // check not colliding with self { - float4 min1 = pAABB[unsorted_indx2*2 + 0]; - float4 max1 = pAABB[unsorted_indx2*2 + 1]; + float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0]; + float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1]; if(testAABBOverlap(min0, max0, min1, max1)) { if (pairCount) @@ -166,7 +167,8 @@ void findPairsInCell( int numObjects, } __kernel void kFindOverlappingPairs( int numObjects, - __global float4* pAABB, + __global float4* allpAABB, + __global const int* smallAabbMapping, __global int2* pHash, __global int* pCellStart, __global float4* pParams , @@ -183,8 +185,8 @@ __kernel void kFindOverlappingPairs( int numObjects, } int2 sortedData = pHash[index]; int unsorted_indx = sortedData.y; - float4 bbMin = pAABB[unsorted_indx*2 + 0]; - float4 bbMax = pAABB[unsorted_indx*2 + 1]; + float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; + float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; float4 pos; pos.x = (bbMin.x + bbMax.x) * 0.5f; pos.y = (bbMin.y + bbMax.y) * 0.5f; @@ -202,7 +204,7 @@ __kernel void kFindOverlappingPairs( int numObjects, for(int x=-1; x<=1; x++) { gridPosB.x = gridPosA.x + x; - findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs); + findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs); } } } diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h index b1f7db368..dad42477c 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h @@ -19,15 +19,15 @@ static const char* gridBroadphaseCL= \ " return gridPos;\n" "}\n" "// calculate grid hash value for each body using its AABB\n" -"__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )\n" +"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n" "{\n" " int index = get_global_id(0);\n" " if(index >= numObjects)\n" " {\n" " return;\n" " }\n" -" float4 bbMin = pAABB[index*2];\n" -" float4 bbMax = pAABB[index*2 + 1];\n" +" float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n" +" float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n" " float4 pos;\n" " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" @@ -91,7 +91,8 @@ static const char* gridBroadphaseCL= \ " int index,\n" " __global int2* pHash,\n" " __global int* pCellStart,\n" -" __global float4* pAABB, \n" +" __global float4* allpAABB, \n" +" __global const int* smallAabbMapping,\n" " __global float4* pParams,\n" " volatile __global int* pairCount,\n" " __global int4* pPairBuff2,\n" @@ -110,8 +111,8 @@ static const char* gridBroadphaseCL= \ " // iterate over bodies in this cell\n" " int2 sortedData = pHash[index];\n" " int unsorted_indx = sortedData.y;\n" -" float4 min0 = pAABB[unsorted_indx*2 + 0]; \n" -" float4 max0 = pAABB[unsorted_indx*2 + 1];\n" +" float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n" +" float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" " int handleIndex = as_int(min0.w);\n" " \n" " int bucketEnd = bucketStart + maxBodiesPerCell;\n" @@ -127,8 +128,8 @@ static const char* gridBroadphaseCL= \ " //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n" " if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n" " { \n" -" float4 min1 = pAABB[unsorted_indx2*2 + 0];\n" -" float4 max1 = pAABB[unsorted_indx2*2 + 1];\n" +" float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n" +" float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n" " if(testAABBOverlap(min0, max0, min1, max1))\n" " {\n" " if (pairCount)\n" @@ -154,7 +155,8 @@ static const char* gridBroadphaseCL= \ " }\n" "}\n" "__kernel void kFindOverlappingPairs( int numObjects,\n" -" __global float4* pAABB, \n" +" __global float4* allpAABB, \n" +" __global const int* smallAabbMapping,\n" " __global int2* pHash, \n" " __global int* pCellStart, \n" " __global float4* pParams ,\n" @@ -170,8 +172,8 @@ static const char* gridBroadphaseCL= \ " }\n" " int2 sortedData = pHash[index];\n" " int unsorted_indx = sortedData.y;\n" -" float4 bbMin = pAABB[unsorted_indx*2 + 0];\n" -" float4 bbMax = pAABB[unsorted_indx*2 + 1];\n" +" float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n" +" float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" " float4 pos;\n" " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" @@ -189,7 +191,7 @@ static const char* gridBroadphaseCL= \ " for(int x=-1; x<=1; x++) \n" " {\n" " gridPosB.x = gridPosA.x + x;\n" -" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs);\n" +" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n" " }\n" " }\n" " }\n" diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl index 0d7f7d6cb..1f76b0da5 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl @@ -63,25 +63,26 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* } -__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs) +__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs) { int i = get_global_id(0); if (i>=numUnsortedAabbs) return; int j = get_global_id(1); - if (j>=numSortedAabbs) + if (j>=numUnSortedAabbs2) return; __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]]; + __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]]; - if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j])) + if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2)) { int4 myPair; int xIndex = unsortedAabbPtr[0].m_minIndices[3]; - int yIndex = sortedAabbs[j].m_minIndices[3]; + int yIndex = unsortedAabbPtr2[0].m_minIndices[3]; if (xIndex>yIndex) { int tmp = xIndex; @@ -349,7 +350,7 @@ __kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btA } -__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis) +__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis) { int i = get_global_id(0); if (i>=numObjects) diff --git a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h index d1d8d4960..1c29bbaeb 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h @@ -56,21 +56,22 @@ static const char* sapCL= \ " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" " return overlap;\n" "}\n" -"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n" +"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numUnsortedAabbs)\n" " return;\n" " int j = get_global_id(1);\n" -" if (j>=numSortedAabbs)\n" +" if (j>=numUnSortedAabbs2)\n" " return;\n" " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" -" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))\n" +" __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" +" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" " {\n" " int4 myPair;\n" " \n" " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" -" int yIndex = sortedAabbs[j].m_minIndices[3];\n" +" int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" " if (xIndex>yIndex)\n" " {\n" " int tmp = xIndex;\n" @@ -306,7 +307,7 @@ static const char* sapCL= \ " destAabbs[i] = allAabbs[src];\n" " destAabbs[i].m_maxIndices[3] = src;\n" "}\n" -"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)\n" +"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numObjects)\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 5186da2d1..3ff077da0 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -3171,6 +3171,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* if (1) { + if (1) + { { B3_PROFILE("findSeparatingAxisVertexFaceKernel"); b3BufferInfoCL bInfo[] = { @@ -3228,7 +3230,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* clFinish(m_queue); } - + } + if (1) { B3_PROFILE("findSeparatingAxisUnitSphereKernel"); b3BufferInfoCL bInfo[] = { @@ -4466,8 +4469,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* //convex-convex contact clipping - B3_PROFILE("clipHullHullKernel"); - bool breakupKernel = true; + + bool breakupKernel = false; #ifdef __APPLE__ breakupKernel = true; @@ -4480,6 +4483,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* #endif//CHECK_ON_HOST if (computeConvexConvex) { + B3_PROFILE("clipHullHullKernel"); if (breakupKernel) { diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl index c5c57cab0..f43397174 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -958,8 +958,15 @@ __kernel void clipHullHullKernel( __global int4* pairs, int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); + + int mprContactIndex = pairs[pairIndex].z; + + int dstIdx = mprContactIndex; + if (dstIdx<0) + { + AppendInc( nGlobalContactsOut, dstIdx ); + } + if (dstIdxm_worldPosB[i] = pointsIn[contactIdx[i]]; + //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact + if (i>0||(mprContactIndex<0)) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } } GET_NPOINTS(*c) = nReducedContacts; } diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 5d863b487..234942ee5 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -1245,8 +1245,13 @@ static const char* satClipKernelsCL= \ " \n" " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" " \n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" int mprContactIndex = pairs[pairIndex].z;\n" +" int dstIdx = mprContactIndex;\n" +" if (dstIdx<0)\n" +" {\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" }\n" " if (dstIdxm_childIndexB = -1;\n" " for (int i=0;im_worldPosB[i] = pointsIn[contactIdx[i]];\n" +" //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" +" if (i>0||(mprContactIndex<0))\n" +" {\n" +" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +" }\n" " }\n" " GET_NPOINTS(*c) = nReducedContacts;\n" " }\n"