use int4 for broadphase pair, it allows to store persistent information in the future

(contact cache, applied impulse/warm starting info etc)
This commit is contained in:
erwincoumans
2013-07-31 09:58:15 -07:00
parent 338118b3c6
commit 7992ff816b
19 changed files with 127 additions and 94 deletions

View File

@@ -62,7 +62,7 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL*
}
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
{
int i = get_global_id(0);
if (i>=numUnsortedAabbs)
@@ -74,10 +74,19 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))
{
int2 myPair;
int4 myPair;
myPair.x = unsortedAabbs[i].m_minIndices[3];
myPair.y = sortedAabbs[j].m_minIndices[3];
int xIndex = unsortedAabbs[i].m_minIndices[3];
int yIndex = sortedAabbs[j].m_minIndices[3];
if (xIndex>yIndex)
{
int tmp = xIndex;
xIndex=yIndex;
yIndex=tmp;
}
myPair.x = xIndex;
myPair.y = yIndex;
int curPair = atomic_inc (pairCount);
if (curPair<maxPairs)
@@ -87,7 +96,7 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
}
}
__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
{
int i = get_global_id(0);
if (i>=numObjects)
@@ -100,9 +109,10 @@ __kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, vola
}
if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
{
int2 myPair;
int4 myPair;
myPair.x = aabbs[i].m_minIndices[3];
myPair.y = aabbs[j].m_minIndices[3];
int curPair = atomic_inc (pairCount);
if (curPair<maxPairs)
{
@@ -115,7 +125,7 @@ __kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, vola
__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
{
int i = get_global_id(0);
int localId = get_local_id(0);
@@ -163,7 +173,7 @@ __kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volat
{
if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
{
int2 myPair;
int4 myPair;
myPair.x = aabbs[i].m_minIndices[3];
myPair.y = aabbs[j].m_minIndices[3];
int curPair = atomic_inc (pairCount);
@@ -179,7 +189,7 @@ __kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volat
}
__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
{
int i = get_global_id(0);
int localId = get_local_id(0);
@@ -238,7 +248,7 @@ __kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aa
{
if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))
{
int2 myPair;
int4 myPair;
myPair.x = myAabb.m_minIndices[3];
myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];
int curPair = atomic_inc (pairCount);

View File

@@ -74,8 +74,8 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
__global const b3SortData* sortedAxisGPU0prev,
__global const b3SortData* sortedAxisGPU1prev,
__global const b3SortData* sortedAxisGPU2prev,
__global int2* addedHostPairsGPU,
__global int2* removedHostPairsGPU,
__global int4* addedHostPairsGPU,
__global int4* removedHostPairsGPU,
volatile __global int* addedHostPairsCount,
volatile __global int* removedHostPairsCount,
int maxCapacity,
@@ -162,7 +162,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
if (overlap && !prevOverlap)
{
//add a pair
int2 newPair;
int4 newPair;
if (i<=otherIndex)
{
newPair.x = i;
@@ -191,7 +191,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
{
//remove a pair
int2 removedPair;
int4 removedPair;
if (i<=otherIndex)
{
removedPair.x = i;
@@ -256,7 +256,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
if (overlap && !prevOverlap)
{
//add a pair
int2 newPair;
int4 newPair;
if (i<=otherIndex)
{
newPair.x = i;
@@ -284,7 +284,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
{
//if (otherIndex2&1==0) -> min?
//remove a pair
int2 removedPair;
int4 removedPair;
if (i<=otherIndex)
{
removedPair.x = i;
@@ -318,7 +318,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
}
//computePairsKernelBatchWrite
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
{
int i = get_global_id(0);
int localId = get_local_id(0);
@@ -393,7 +393,11 @@ __kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __g
{
for (int p=0;p<curNumPairs;p++)
{
pairsOut[curPair+p] = myPairs[p]; //flush to main memory
int4 tmpPair;
tmpPair.x = myPairs[p].x;
tmpPair.y = myPairs[p].y;
pairsOut[curPair+p] = tmpPair; //flush to main memory
}
}
curNumPairs = 0;
@@ -423,7 +427,11 @@ __kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __g
{
for (int p=0;p<curNumPairs;p++)
{
pairsOut[curPair+p] = myPairs[p]; //flush to main memory
int4 tmpPair;
tmpPair.x = myPairs[p].x;
tmpPair.y = myPairs[p].y;
pairsOut[curPair+p] = tmpPair; //flush to main memory
}
}
curNumPairs = 0;

View File

@@ -76,8 +76,8 @@ static const char* sapFastCL= \
" __global const b3SortData* sortedAxisGPU0prev,\n"
" __global const b3SortData* sortedAxisGPU1prev,\n"
" __global const b3SortData* sortedAxisGPU2prev,\n"
" __global int2* addedHostPairsGPU,\n"
" __global int2* removedHostPairsGPU,\n"
" __global int4* addedHostPairsGPU,\n"
" __global int4* removedHostPairsGPU,\n"
" volatile __global int* addedHostPairsCount,\n"
" volatile __global int* removedHostPairsCount,\n"
" int maxCapacity,\n"
@@ -164,7 +164,7 @@ static const char* sapFastCL= \
" if (overlap && !prevOverlap)\n"
" {\n"
" //add a pair\n"
" int2 newPair;\n"
" int4 newPair;\n"
" if (i<=otherIndex)\n"
" {\n"
" newPair.x = i;\n"
@@ -193,7 +193,7 @@ static const char* sapFastCL= \
" {\n"
" \n"
" //remove a pair\n"
" int2 removedPair;\n"
" int4 removedPair;\n"
" if (i<=otherIndex)\n"
" {\n"
" removedPair.x = i;\n"
@@ -258,7 +258,7 @@ static const char* sapFastCL= \
" if (overlap && !prevOverlap)\n"
" {\n"
" //add a pair\n"
" int2 newPair;\n"
" int4 newPair;\n"
" if (i<=otherIndex)\n"
" {\n"
" newPair.x = i;\n"
@@ -286,7 +286,7 @@ static const char* sapFastCL= \
" {\n"
" //if (otherIndex2&1==0) -> min?\n"
" //remove a pair\n"
" int2 removedPair;\n"
" int4 removedPair;\n"
" if (i<=otherIndex)\n"
" {\n"
" removedPair.x = i;\n"
@@ -320,7 +320,7 @@ static const char* sapFastCL= \
"}\n"
"\n"
"//computePairsKernelBatchWrite\n"
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" int localId = get_local_id(0);\n"
@@ -395,7 +395,11 @@ static const char* sapFastCL= \
" {\n"
" for (int p=0;p<curNumPairs;p++)\n"
" {\n"
" pairsOut[curPair+p] = myPairs[p]; //flush to main memory\n"
" int4 tmpPair;\n"
" tmpPair.x = myPairs[p].x;\n"
" tmpPair.y = myPairs[p].y;\n"
" \n"
" pairsOut[curPair+p] = tmpPair; //flush to main memory\n"
" }\n"
" }\n"
" curNumPairs = 0;\n"
@@ -425,7 +429,11 @@ static const char* sapFastCL= \
" {\n"
" for (int p=0;p<curNumPairs;p++)\n"
" {\n"
" pairsOut[curPair+p] = myPairs[p]; //flush to main memory\n"
" int4 tmpPair;\n"
" tmpPair.x = myPairs[p].x;\n"
" tmpPair.y = myPairs[p].y;\n"
" \n"
" pairsOut[curPair+p] = tmpPair; //flush to main memory\n"
" }\n"
" }\n"
" curNumPairs = 0;\n"

View File

@@ -64,7 +64,7 @@ static const char* sapCL= \
"}\n"
"\n"
"\n"
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" if (i>=numUnsortedAabbs)\n"
@@ -76,10 +76,19 @@ static const char* sapCL= \
"\n"
" if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))\n"
" {\n"
" int2 myPair;\n"
" int4 myPair;\n"
" \n"
" myPair.x = unsortedAabbs[i].m_minIndices[3];\n"
" myPair.y = sortedAabbs[j].m_minIndices[3];\n"
" int xIndex = unsortedAabbs[i].m_minIndices[3];\n"
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
" if (xIndex>yIndex)\n"
" {\n"
" int tmp = xIndex;\n"
" xIndex=yIndex;\n"
" yIndex=tmp;\n"
" }\n"
" \n"
" myPair.x = xIndex;\n"
" myPair.y = yIndex;\n"
"\n"
" int curPair = atomic_inc (pairCount);\n"
" if (curPair<maxPairs)\n"
@@ -89,7 +98,7 @@ static const char* sapCL= \
" }\n"
"}\n"
"\n"
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" if (i>=numObjects)\n"
@@ -102,9 +111,10 @@ static const char* sapCL= \
" }\n"
" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
" {\n"
" int2 myPair;\n"
" int4 myPair;\n"
" myPair.x = aabbs[i].m_minIndices[3];\n"
" myPair.y = aabbs[j].m_minIndices[3];\n"
" \n"
" int curPair = atomic_inc (pairCount);\n"
" if (curPair<maxPairs)\n"
" {\n"
@@ -117,7 +127,7 @@ static const char* sapCL= \
"\n"
"\n"
"\n"
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" int localId = get_local_id(0);\n"
@@ -165,7 +175,7 @@ static const char* sapCL= \
" {\n"
" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
" {\n"
" int2 myPair;\n"
" int4 myPair;\n"
" myPair.x = aabbs[i].m_minIndices[3];\n"
" myPair.y = aabbs[j].m_minIndices[3];\n"
" int curPair = atomic_inc (pairCount);\n"
@@ -181,7 +191,7 @@ static const char* sapCL= \
"}\n"
"\n"
"\n"
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
"{\n"
" int i = get_global_id(0);\n"
" int localId = get_local_id(0);\n"
@@ -240,7 +250,7 @@ static const char* sapCL= \
" {\n"
" if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n"
" {\n"
" int2 myPair;\n"
" int4 myPair;\n"
" myPair.x = myAabb.m_minIndices[3];\n"
" myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n"
" int curPair = atomic_inc (pairCount);\n"