Avoid breaking up the clipHullHull kernel, it ruins performance. Unfortunately, Mac OSX still requires it.

Use indices instead of copies for small/large aabbs in broadphase (grid / sap)
This commit is contained in:
erwincoumans
2014-01-29 15:20:20 -08:00
parent ff051f87aa
commit 3e8b183587
11 changed files with 100 additions and 126 deletions

View File

@@ -22,15 +22,15 @@ int4 getGridPos(float4 worldPos, __global float4* pParams)
// calculate grid hash value for each body using its AABB
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )
__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2 + 1];
float4 bbMin = allpAABB[smallAabbMapping[index]*2];
float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
@@ -102,7 +102,8 @@ void findPairsInCell( int numObjects,
int index,
__global int2* pHash,
__global int* pCellStart,
__global float4* pAABB,
__global float4* allpAABB,
__global const int* smallAabbMapping,
__global float4* pParams,
volatile __global int* pairCount,
__global int4* pPairBuff2,
@@ -121,8 +122,8 @@ void findPairsInCell( int numObjects,
// iterate over bodies in this cell
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = pAABB[unsorted_indx*2 + 0];
float4 max0 = pAABB[unsorted_indx*2 + 1];
float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
int handleIndex = as_int(min0.w);
int bucketEnd = bucketStart + maxBodiesPerCell;
@@ -138,8 +139,8 @@ void findPairsInCell( int numObjects,
//if (unsorted_indx2 < unsorted_indx) // check not colliding with self
if (unsorted_indx2 != unsorted_indx) // check not colliding with self
{
float4 min1 = pAABB[unsorted_indx2*2 + 0];
float4 max1 = pAABB[unsorted_indx2*2 + 1];
float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];
float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
if (pairCount)
@@ -166,7 +167,8 @@ void findPairsInCell( int numObjects,
}
__kernel void kFindOverlappingPairs( int numObjects,
__global float4* pAABB,
__global float4* allpAABB,
__global const int* smallAabbMapping,
__global int2* pHash,
__global int* pCellStart,
__global float4* pParams ,
@@ -183,8 +185,8 @@ __kernel void kFindOverlappingPairs( int numObjects,
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 bbMin = pAABB[unsorted_indx*2 + 0];
float4 bbMax = pAABB[unsorted_indx*2 + 1];
float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
@@ -202,7 +204,7 @@ __kernel void kFindOverlappingPairs( int numObjects,
for(int x=-1; x<=1; x++)
{
gridPosB.x = gridPosA.x + x;
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs);
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);
}
}
}

View File

@@ -19,15 +19,15 @@ static const char* gridBroadphaseCL= \
" return gridPos;\n"
"}\n"
"// calculate grid hash value for each body using its AABB\n"
"__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )\n"
"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n"
"{\n"
" int index = get_global_id(0);\n"
" if(index >= numObjects)\n"
" {\n"
" return;\n"
" }\n"
" float4 bbMin = pAABB[index*2];\n"
" float4 bbMax = pAABB[index*2 + 1];\n"
" float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n"
" float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n"
" float4 pos;\n"
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
@@ -91,7 +91,8 @@ static const char* gridBroadphaseCL= \
" int index,\n"
" __global int2* pHash,\n"
" __global int* pCellStart,\n"
" __global float4* pAABB, \n"
" __global float4* allpAABB, \n"
" __global const int* smallAabbMapping,\n"
" __global float4* pParams,\n"
" volatile __global int* pairCount,\n"
" __global int4* pPairBuff2,\n"
@@ -110,8 +111,8 @@ static const char* gridBroadphaseCL= \
" // iterate over bodies in this cell\n"
" int2 sortedData = pHash[index];\n"
" int unsorted_indx = sortedData.y;\n"
" float4 min0 = pAABB[unsorted_indx*2 + 0]; \n"
" float4 max0 = pAABB[unsorted_indx*2 + 1];\n"
" float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n"
" float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
" int handleIndex = as_int(min0.w);\n"
" \n"
" int bucketEnd = bucketStart + maxBodiesPerCell;\n"
@@ -127,8 +128,8 @@ static const char* gridBroadphaseCL= \
" //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
" if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n"
" { \n"
" float4 min1 = pAABB[unsorted_indx2*2 + 0];\n"
" float4 max1 = pAABB[unsorted_indx2*2 + 1];\n"
" float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n"
" float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n"
" if(testAABBOverlap(min0, max0, min1, max1))\n"
" {\n"
" if (pairCount)\n"
@@ -154,7 +155,8 @@ static const char* gridBroadphaseCL= \
" }\n"
"}\n"
"__kernel void kFindOverlappingPairs( int numObjects,\n"
" __global float4* pAABB, \n"
" __global float4* allpAABB, \n"
" __global const int* smallAabbMapping,\n"
" __global int2* pHash, \n"
" __global int* pCellStart, \n"
" __global float4* pParams ,\n"
@@ -170,8 +172,8 @@ static const char* gridBroadphaseCL= \
" }\n"
" int2 sortedData = pHash[index];\n"
" int unsorted_indx = sortedData.y;\n"
" float4 bbMin = pAABB[unsorted_indx*2 + 0];\n"
" float4 bbMax = pAABB[unsorted_indx*2 + 1];\n"
" float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n"
" float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
" float4 pos;\n"
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
@@ -189,7 +191,7 @@ static const char* gridBroadphaseCL= \
" for(int x=-1; x<=1; x++) \n"
" {\n"
" gridPosB.x = gridPosA.x + x;\n"
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs);\n"
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n"
" }\n"
" }\n"
" }\n"

View File

@@ -63,25 +63,26 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL*
}
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)
{
int i = get_global_id(0);
if (i>=numUnsortedAabbs)
return;
int j = get_global_id(1);
if (j>=numSortedAabbs)
if (j>=numUnSortedAabbs2)
return;
__global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];
__global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];
if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))
if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))
{
int4 myPair;
int xIndex = unsortedAabbPtr[0].m_minIndices[3];
int yIndex = sortedAabbs[j].m_minIndices[3];
int yIndex = unsortedAabbPtr2[0].m_minIndices[3];
if (xIndex>yIndex)
{
int tmp = xIndex;
@@ -349,7 +350,7 @@ __kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btA
}
__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)
__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)
{
int i = get_global_id(0);
if (i>=numObjects)

View File

@@ -56,21 +56,22 @@ static const char* sapCL= \
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
" return overlap;\n"
"}\n"
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" if (i>=numUnsortedAabbs)\n"
" return;\n"
" int j = get_global_id(1);\n"
" if (j>=numSortedAabbs)\n"
" if (j>=numUnSortedAabbs2)\n"
" return;\n"
" __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n"
" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))\n"
" __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n"
" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n"
" {\n"
" int4 myPair;\n"
" \n"
" int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n"
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
" int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n"
" if (xIndex>yIndex)\n"
" {\n"
" int tmp = xIndex;\n"
@@ -306,7 +307,7 @@ static const char* sapCL= \
" destAabbs[i] = allAabbs[src];\n"
" destAabbs[i].m_maxIndices[3] = src;\n"
"}\n"
"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)\n"
"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n"
"{\n"
" int i = get_global_id(0);\n"
" if (i>=numObjects)\n"