use int4 for broadphase pair, it allows to store persistent information in the future
(contact cache, applied impulse/warm starting info etc)
This commit is contained in:
@@ -31,7 +31,7 @@ __kernel void moveObjectsKernel(__global float4* posOrnColors, int numObjects)
|
||||
colors[iGID] = (float4)(0,0,1,1);
|
||||
}
|
||||
|
||||
__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int2* pairs, int numPairs)
|
||||
__kernel void colorPairsKernel(__global float4* posOrnColors, int numObjects, __global const int4* pairs, int numPairs)
|
||||
{
|
||||
int iPairId = get_global_id(0);
|
||||
if (iPairId>=numPairs)
|
||||
@@ -48,8 +48,8 @@ __kernel void
|
||||
sineWaveKernel( __global float4* posOrnColors, __global float* pBodyTimes,const int numNodes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float timeStepPos = 0.00166666;
|
||||
float mAmplitude = 36.f;
|
||||
float timeStepPos = 0.000166666;
|
||||
float mAmplitude = 86.f;
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
pBodyTimes[nodeID] += timeStepPos;
|
||||
|
||||
@@ -16,10 +16,10 @@ subject to the following restrictions:
|
||||
#ifndef B3_OVERLAPPING_PAIR_H
|
||||
#define B3_OVERLAPPING_PAIR_H
|
||||
|
||||
#include "Bullet3Common/b3Int2.h"
|
||||
#include "Bullet3Common/b3Int4.h"
|
||||
|
||||
//typedef b3Int2 b3BroadphasePair;
|
||||
struct b3BroadphasePair : public b3Int2
|
||||
struct b3BroadphasePair : public b3Int4
|
||||
{
|
||||
explicit b3BroadphasePair(){}
|
||||
b3BroadphasePair(int xx,int yy)
|
||||
|
||||
@@ -192,29 +192,29 @@ void b3GpuSapBroadphase::init3dSap()
|
||||
}
|
||||
|
||||
|
||||
static bool b3PairCmp(const b3Int2& p, const b3Int2& q)
|
||||
static bool b3PairCmp(const b3Int4& p, const b3Int4& q)
|
||||
{
|
||||
return ((p.x<q.x) || ((p.x==q.x) && (p.y<q.y)));
|
||||
}
|
||||
|
||||
|
||||
static bool operator==(const b3Int2& a,const b3Int2& b)
|
||||
static bool operator==(const b3Int4& a,const b3Int4& b)
|
||||
{
|
||||
return a.x == b.x && a.y == b.y;
|
||||
};
|
||||
|
||||
static bool operator<(const b3Int2& a,const b3Int2& b)
|
||||
static bool operator<(const b3Int4& a,const b3Int4& b)
|
||||
{
|
||||
return a.x < b.x || (a.x == b.x && a.y < b.y);
|
||||
};
|
||||
|
||||
static bool operator>(const b3Int2& a,const b3Int2& b)
|
||||
static bool operator>(const b3Int4& a,const b3Int4& b)
|
||||
{
|
||||
return a.x > b.x || (a.x == b.x && a.y > b.y);
|
||||
};
|
||||
|
||||
b3AlignedObjectArray<b3Int2> addedHostPairs;
|
||||
b3AlignedObjectArray<b3Int2> removedHostPairs;
|
||||
b3AlignedObjectArray<b3Int4> addedHostPairs;
|
||||
b3AlignedObjectArray<b3Int4> removedHostPairs;
|
||||
|
||||
b3AlignedObjectArray<b3SapAabb> preAabbs;
|
||||
|
||||
@@ -247,7 +247,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
}
|
||||
|
||||
b3AlignedObjectArray<b3Int2> allPairs;
|
||||
b3AlignedObjectArray<b3Int4> allPairs;
|
||||
{
|
||||
B3_PROFILE("m_overlappingPairs.copyToHost");
|
||||
m_overlappingPairs.copyToHost(allPairs);
|
||||
@@ -268,7 +268,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
|
||||
|
||||
{
|
||||
b3Int2 newPair;
|
||||
b3Int4 newPair;
|
||||
newPair.x = 40;
|
||||
newPair.y = 53;
|
||||
int index = allPairs.findBinarySearch(newPair);
|
||||
@@ -587,7 +587,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
b3Int2 newPair;
|
||||
b3Int4 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
@@ -606,7 +606,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
|
||||
//remove a pair
|
||||
b3Int2 removedPair;
|
||||
b3Int4 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
@@ -664,7 +664,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
b3Int2 newPair;
|
||||
b3Int4 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
@@ -684,7 +684,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
//if (otherIndex2&1==0) -> min?
|
||||
//remove a pair
|
||||
b3Int2 removedPair;
|
||||
b3Int4 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
@@ -727,7 +727,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
}
|
||||
}
|
||||
|
||||
b3Int2 prevPair;
|
||||
b3Int4 prevPair;
|
||||
prevPair.x = -1;
|
||||
prevPair.y = -1;
|
||||
|
||||
@@ -739,7 +739,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
B3_PROFILE("actual removing");
|
||||
for (int i=0;i<removedHostPairs.size();i++)
|
||||
{
|
||||
b3Int2 removedPair = removedHostPairs[i];
|
||||
b3Int4 removedPair = removedHostPairs[i];
|
||||
if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y))
|
||||
{
|
||||
|
||||
@@ -785,13 +785,13 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
prevPair.y = -1;
|
||||
|
||||
int uniqueAddedPairs=0;
|
||||
b3AlignedObjectArray<b3Int2> actualAddedPairs;
|
||||
b3AlignedObjectArray<b3Int4> actualAddedPairs;
|
||||
|
||||
{
|
||||
B3_PROFILE("actual adding");
|
||||
for (int i=0;i<addedHostPairs.size();i++)
|
||||
{
|
||||
b3Int2 newPair = addedHostPairs[i];
|
||||
b3Int4 newPair = addedHostPairs[i];
|
||||
if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y))
|
||||
{
|
||||
//#ifdef _DEBUG
|
||||
@@ -828,9 +828,6 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
// printf("uniqueAddedPairs=%d\n", uniqueAddedPairs);
|
||||
|
||||
|
||||
|
||||
//b3AlignedObjectArray<b3Int2> addedHostPairs;
|
||||
//b3AlignedObjectArray<b3Int2> removedHostPairs;
|
||||
{
|
||||
B3_PROFILE("m_overlappingPairs.copyFromHost");
|
||||
m_overlappingPairs.copyFromHost(allPairs);
|
||||
@@ -905,7 +902,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
}
|
||||
}
|
||||
|
||||
b3AlignedObjectArray<b3Int2> hostPairs;
|
||||
b3AlignedObjectArray<b3Int4> hostPairs;
|
||||
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
||||
@@ -918,7 +915,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
||||
(b3Vector3&)m_smallAabbsCPU[j].m_min,(b3Vector3&)m_smallAabbsCPU[j].m_max))
|
||||
{
|
||||
b3Int2 pair;
|
||||
b3Int4 pair;
|
||||
int a = m_smallAabbsCPU[i].m_minIndices[3];
|
||||
int b = m_smallAabbsCPU[j].m_minIndices[3];
|
||||
if (a<=b)
|
||||
@@ -949,7 +946,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
||||
(b3Vector3&)m_largeAabbsCPU[j].m_min,(b3Vector3&)m_largeAabbsCPU[j].m_max))
|
||||
{
|
||||
b3Int2 pair;
|
||||
b3Int4 pair;
|
||||
int a = m_largeAabbsCPU[j].m_minIndices[3];
|
||||
int b = m_smallAabbsCPU[i].m_minIndices[3];
|
||||
if (a<=b)
|
||||
@@ -1276,8 +1273,8 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
|
||||
numPairs = pairCount.at(0);
|
||||
//printf("overlapping pairs = %d\n",numPairs);
|
||||
b3AlignedObjectArray<b3Int2> hostOoverlappingPairs;
|
||||
b3OpenCLArray<b3Int2> tmpGpuPairs(m_context,m_queue);
|
||||
b3AlignedObjectArray<b3Int4> hostOoverlappingPairs;
|
||||
b3OpenCLArray<b3Int4> tmpGpuPairs(m_context,m_queue);
|
||||
tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs );
|
||||
|
||||
tmpGpuPairs.copyToHost(hostOoverlappingPairs);
|
||||
|
||||
@@ -44,8 +44,8 @@ class b3GpuSapBroadphase
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
|
||||
|
||||
|
||||
b3OpenCLArray<b3Int2> m_addedHostPairsGPU;
|
||||
b3OpenCLArray<b3Int2> m_removedHostPairsGPU;
|
||||
b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
|
||||
b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
|
||||
b3OpenCLArray<int> m_addedCountGPU;
|
||||
b3OpenCLArray<int> m_removedCountGPU;
|
||||
|
||||
@@ -68,7 +68,7 @@ class b3GpuSapBroadphase
|
||||
b3OpenCLArray<b3SapAabb> m_largeAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_largeAabbsCPU;
|
||||
|
||||
b3OpenCLArray<b3Int2> m_overlappingPairs;
|
||||
b3OpenCLArray<b3Int4> m_overlappingPairs;
|
||||
|
||||
//temporary gpu work memory
|
||||
b3OpenCLArray<b3SortData> m_gpuSmallSortData;
|
||||
|
||||
@@ -62,7 +62,7 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL*
|
||||
}
|
||||
|
||||
|
||||
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
|
||||
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numUnsortedAabbs)
|
||||
@@ -74,10 +74,19 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
|
||||
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))
|
||||
{
|
||||
int2 myPair;
|
||||
int4 myPair;
|
||||
|
||||
myPair.x = unsortedAabbs[i].m_minIndices[3];
|
||||
myPair.y = sortedAabbs[j].m_minIndices[3];
|
||||
int xIndex = unsortedAabbs[i].m_minIndices[3];
|
||||
int yIndex = sortedAabbs[j].m_minIndices[3];
|
||||
if (xIndex>yIndex)
|
||||
{
|
||||
int tmp = xIndex;
|
||||
xIndex=yIndex;
|
||||
yIndex=tmp;
|
||||
}
|
||||
|
||||
myPair.x = xIndex;
|
||||
myPair.y = yIndex;
|
||||
|
||||
int curPair = atomic_inc (pairCount);
|
||||
if (curPair<maxPairs)
|
||||
@@ -87,7 +96,7 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numObjects)
|
||||
@@ -100,9 +109,10 @@ __kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, vola
|
||||
}
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
|
||||
{
|
||||
int2 myPair;
|
||||
int4 myPair;
|
||||
myPair.x = aabbs[i].m_minIndices[3];
|
||||
myPair.y = aabbs[j].m_minIndices[3];
|
||||
|
||||
int curPair = atomic_inc (pairCount);
|
||||
if (curPair<maxPairs)
|
||||
{
|
||||
@@ -115,7 +125,7 @@ __kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, vola
|
||||
|
||||
|
||||
|
||||
__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
int localId = get_local_id(0);
|
||||
@@ -163,7 +173,7 @@ __kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volat
|
||||
{
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
|
||||
{
|
||||
int2 myPair;
|
||||
int4 myPair;
|
||||
myPair.x = aabbs[i].m_minIndices[3];
|
||||
myPair.y = aabbs[j].m_minIndices[3];
|
||||
int curPair = atomic_inc (pairCount);
|
||||
@@ -179,7 +189,7 @@ __kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volat
|
||||
}
|
||||
|
||||
|
||||
__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
int localId = get_local_id(0);
|
||||
@@ -238,7 +248,7 @@ __kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aa
|
||||
{
|
||||
if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))
|
||||
{
|
||||
int2 myPair;
|
||||
int4 myPair;
|
||||
myPair.x = myAabb.m_minIndices[3];
|
||||
myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];
|
||||
int curPair = atomic_inc (pairCount);
|
||||
|
||||
@@ -74,8 +74,8 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
__global const b3SortData* sortedAxisGPU0prev,
|
||||
__global const b3SortData* sortedAxisGPU1prev,
|
||||
__global const b3SortData* sortedAxisGPU2prev,
|
||||
__global int2* addedHostPairsGPU,
|
||||
__global int2* removedHostPairsGPU,
|
||||
__global int4* addedHostPairsGPU,
|
||||
__global int4* removedHostPairsGPU,
|
||||
volatile __global int* addedHostPairsCount,
|
||||
volatile __global int* removedHostPairsCount,
|
||||
int maxCapacity,
|
||||
@@ -162,7 +162,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
int2 newPair;
|
||||
int4 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
@@ -191,7 +191,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
{
|
||||
|
||||
//remove a pair
|
||||
int2 removedPair;
|
||||
int4 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
@@ -256,7 +256,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
int2 newPair;
|
||||
int4 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
@@ -284,7 +284,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
{
|
||||
//if (otherIndex2&1==0) -> min?
|
||||
//remove a pair
|
||||
int2 removedPair;
|
||||
int4 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
@@ -318,7 +318,7 @@ __kernel void computePairsIncremental3dSapKernel( __global const uint2* object
|
||||
}
|
||||
|
||||
//computePairsKernelBatchWrite
|
||||
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
int localId = get_local_id(0);
|
||||
@@ -393,7 +393,11 @@ __kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __g
|
||||
{
|
||||
for (int p=0;p<curNumPairs;p++)
|
||||
{
|
||||
pairsOut[curPair+p] = myPairs[p]; //flush to main memory
|
||||
int4 tmpPair;
|
||||
tmpPair.x = myPairs[p].x;
|
||||
tmpPair.y = myPairs[p].y;
|
||||
|
||||
pairsOut[curPair+p] = tmpPair; //flush to main memory
|
||||
}
|
||||
}
|
||||
curNumPairs = 0;
|
||||
@@ -423,7 +427,11 @@ __kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __g
|
||||
{
|
||||
for (int p=0;p<curNumPairs;p++)
|
||||
{
|
||||
pairsOut[curPair+p] = myPairs[p]; //flush to main memory
|
||||
int4 tmpPair;
|
||||
tmpPair.x = myPairs[p].x;
|
||||
tmpPair.y = myPairs[p].y;
|
||||
|
||||
pairsOut[curPair+p] = tmpPair; //flush to main memory
|
||||
}
|
||||
}
|
||||
curNumPairs = 0;
|
||||
|
||||
@@ -76,8 +76,8 @@ static const char* sapFastCL= \
|
||||
" __global const b3SortData* sortedAxisGPU0prev,\n"
|
||||
" __global const b3SortData* sortedAxisGPU1prev,\n"
|
||||
" __global const b3SortData* sortedAxisGPU2prev,\n"
|
||||
" __global int2* addedHostPairsGPU,\n"
|
||||
" __global int2* removedHostPairsGPU,\n"
|
||||
" __global int4* addedHostPairsGPU,\n"
|
||||
" __global int4* removedHostPairsGPU,\n"
|
||||
" volatile __global int* addedHostPairsCount,\n"
|
||||
" volatile __global int* removedHostPairsCount,\n"
|
||||
" int maxCapacity,\n"
|
||||
@@ -164,7 +164,7 @@ static const char* sapFastCL= \
|
||||
" if (overlap && !prevOverlap)\n"
|
||||
" {\n"
|
||||
" //add a pair\n"
|
||||
" int2 newPair;\n"
|
||||
" int4 newPair;\n"
|
||||
" if (i<=otherIndex)\n"
|
||||
" {\n"
|
||||
" newPair.x = i;\n"
|
||||
@@ -193,7 +193,7 @@ static const char* sapFastCL= \
|
||||
" {\n"
|
||||
" \n"
|
||||
" //remove a pair\n"
|
||||
" int2 removedPair;\n"
|
||||
" int4 removedPair;\n"
|
||||
" if (i<=otherIndex)\n"
|
||||
" {\n"
|
||||
" removedPair.x = i;\n"
|
||||
@@ -258,7 +258,7 @@ static const char* sapFastCL= \
|
||||
" if (overlap && !prevOverlap)\n"
|
||||
" {\n"
|
||||
" //add a pair\n"
|
||||
" int2 newPair;\n"
|
||||
" int4 newPair;\n"
|
||||
" if (i<=otherIndex)\n"
|
||||
" {\n"
|
||||
" newPair.x = i;\n"
|
||||
@@ -286,7 +286,7 @@ static const char* sapFastCL= \
|
||||
" {\n"
|
||||
" //if (otherIndex2&1==0) -> min?\n"
|
||||
" //remove a pair\n"
|
||||
" int2 removedPair;\n"
|
||||
" int4 removedPair;\n"
|
||||
" if (i<=otherIndex)\n"
|
||||
" {\n"
|
||||
" removedPair.x = i;\n"
|
||||
@@ -320,7 +320,7 @@ static const char* sapFastCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"//computePairsKernelBatchWrite\n"
|
||||
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
@@ -395,7 +395,11 @@ static const char* sapFastCL= \
|
||||
" {\n"
|
||||
" for (int p=0;p<curNumPairs;p++)\n"
|
||||
" {\n"
|
||||
" pairsOut[curPair+p] = myPairs[p]; //flush to main memory\n"
|
||||
" int4 tmpPair;\n"
|
||||
" tmpPair.x = myPairs[p].x;\n"
|
||||
" tmpPair.y = myPairs[p].y;\n"
|
||||
" \n"
|
||||
" pairsOut[curPair+p] = tmpPair; //flush to main memory\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" curNumPairs = 0;\n"
|
||||
@@ -425,7 +429,11 @@ static const char* sapFastCL= \
|
||||
" {\n"
|
||||
" for (int p=0;p<curNumPairs;p++)\n"
|
||||
" {\n"
|
||||
" pairsOut[curPair+p] = myPairs[p]; //flush to main memory\n"
|
||||
" int4 tmpPair;\n"
|
||||
" tmpPair.x = myPairs[p].x;\n"
|
||||
" tmpPair.y = myPairs[p].y;\n"
|
||||
" \n"
|
||||
" pairsOut[curPair+p] = tmpPair; //flush to main memory\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" curNumPairs = 0;\n"
|
||||
|
||||
@@ -64,7 +64,7 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numUnsortedAabbs)\n"
|
||||
@@ -76,10 +76,19 @@ static const char* sapCL= \
|
||||
"\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))\n"
|
||||
" {\n"
|
||||
" int2 myPair;\n"
|
||||
" int4 myPair;\n"
|
||||
" \n"
|
||||
" myPair.x = unsortedAabbs[i].m_minIndices[3];\n"
|
||||
" myPair.y = sortedAabbs[j].m_minIndices[3];\n"
|
||||
" int xIndex = unsortedAabbs[i].m_minIndices[3];\n"
|
||||
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
|
||||
" if (xIndex>yIndex)\n"
|
||||
" {\n"
|
||||
" int tmp = xIndex;\n"
|
||||
" xIndex=yIndex;\n"
|
||||
" yIndex=tmp;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" myPair.x = xIndex;\n"
|
||||
" myPair.y = yIndex;\n"
|
||||
"\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
@@ -89,7 +98,7 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
@@ -102,9 +111,10 @@ static const char* sapCL= \
|
||||
" }\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
|
||||
" {\n"
|
||||
" int2 myPair;\n"
|
||||
" int4 myPair;\n"
|
||||
" myPair.x = aabbs[i].m_minIndices[3];\n"
|
||||
" myPair.y = aabbs[j].m_minIndices[3];\n"
|
||||
" \n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
" if (curPair<maxPairs)\n"
|
||||
" {\n"
|
||||
@@ -117,7 +127,7 @@ static const char* sapCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
@@ -165,7 +175,7 @@ static const char* sapCL= \
|
||||
" {\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
|
||||
" {\n"
|
||||
" int2 myPair;\n"
|
||||
" int4 myPair;\n"
|
||||
" myPair.x = aabbs[i].m_minIndices[3];\n"
|
||||
" myPair.y = aabbs[j].m_minIndices[3];\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
@@ -181,7 +191,7 @@ static const char* sapCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" int localId = get_local_id(0);\n"
|
||||
@@ -240,7 +250,7 @@ static const char* sapCL= \
|
||||
" {\n"
|
||||
" if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n"
|
||||
" {\n"
|
||||
" int2 myPair;\n"
|
||||
" int4 myPair;\n"
|
||||
" myPair.x = myAabb.m_minIndices[3];\n"
|
||||
" myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n"
|
||||
" int curPair = atomic_inc (pairCount);\n"
|
||||
|
||||
@@ -1681,7 +1681,7 @@ void computeContactConvexConvex(
|
||||
}
|
||||
|
||||
|
||||
void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int2>* pairs, int nPairs,
|
||||
void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int4>* pairs, int nPairs,
|
||||
const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
int maxContactCapacity,
|
||||
@@ -1719,7 +1719,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3I
|
||||
#ifdef CHECK_ON_HOST
|
||||
b3AlignedObjectArray<b3YetAnotherAabb> hostAabbs;
|
||||
clAabbsWS.copyToHost(hostAabbs);
|
||||
b3AlignedObjectArray<b3Int2> hostPairs;
|
||||
b3AlignedObjectArray<b3Int4> hostPairs;
|
||||
pairs->copyToHost(hostPairs);
|
||||
|
||||
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
|
||||
|
||||
@@ -75,7 +75,7 @@ struct GpuSatCollision
|
||||
virtual ~GpuSatCollision();
|
||||
|
||||
|
||||
void computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int2>* pairs, int nPairs,
|
||||
void computeConvexConvexContactsGPUSAT( const b3OpenCLArray<b3Int4>* pairs, int nPairs,
|
||||
const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
|
||||
b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
|
||||
int maxContactCapacity,
|
||||
|
||||
@@ -192,7 +192,7 @@ void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhA
|
||||
|
||||
|
||||
// work-in-progress
|
||||
__kernel void bvhTraversalKernel( __global const int2* pairs,
|
||||
__kernel void bvhTraversalKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global btAabbCL* aabbs,
|
||||
|
||||
@@ -194,7 +194,7 @@ static const char* bvhTraversalKernelCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void bvhTraversalKernel( __global const int2* pairs, \n"
|
||||
"__kernel void bvhTraversalKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global btAabbCL* aabbs,\n"
|
||||
|
||||
@@ -802,7 +802,7 @@ void computeContactPlaneSphere(int pairIndex,
|
||||
}
|
||||
|
||||
|
||||
__kernel void primitiveContactsKernel( __global const int2* pairs,
|
||||
__kernel void primitiveContactsKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const ConvexPolyhedronCL* convexShapes,
|
||||
|
||||
@@ -804,7 +804,7 @@ static const char* primitiveContactsKernelsCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void primitiveContactsKernel( __global const int2* pairs, \n"
|
||||
"__kernel void primitiveContactsKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
|
||||
@@ -756,7 +756,7 @@ __kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPair
|
||||
}
|
||||
|
||||
// work-in-progress
|
||||
__kernel void findCompoundPairsKernel( __global const int2* pairs,
|
||||
__kernel void findCompoundPairsKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const ConvexPolyhedronCL* convexShapes,
|
||||
@@ -938,7 +938,7 @@ __kernel void findCompoundPairsKernel( __global const int2* pairs,
|
||||
}
|
||||
|
||||
// work-in-progress
|
||||
__kernel void findSeparatingAxisKernel( __global const int2* pairs,
|
||||
__kernel void findSeparatingAxisKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const ConvexPolyhedronCL* convexShapes,
|
||||
|
||||
@@ -885,7 +885,7 @@ int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, i
|
||||
|
||||
|
||||
|
||||
__kernel void extractManifoldAndAddContactKernel(__global const int2* pairs,
|
||||
__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const float4* closestPointsWorld,
|
||||
__global const float4* separatingNormalsWorld,
|
||||
@@ -960,7 +960,7 @@ void trMul(float4 translationA, Quaternion orientationA,
|
||||
|
||||
|
||||
|
||||
__kernel void clipHullHullKernel( __global const int2* pairs,
|
||||
__kernel void clipHullHullKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const ConvexPolyhedronCL* convexShapes,
|
||||
@@ -1192,7 +1192,7 @@ __kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPai
|
||||
|
||||
|
||||
|
||||
__kernel void sphereSphereCollisionKernel( __global const int2* pairs,
|
||||
__kernel void sphereSphereCollisionKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const float4* separatingNormals,
|
||||
@@ -1677,7 +1677,7 @@ int clipFaces(__global float4* worldVertsA1,
|
||||
|
||||
|
||||
|
||||
__kernel void findClippingFacesKernel( __global const int2* pairs,
|
||||
__kernel void findClippingFacesKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const btCollidableGpu* collidables,
|
||||
__global const ConvexPolyhedronCL* convexShapes,
|
||||
@@ -1740,7 +1740,7 @@ __kernel void findClippingFacesKernel( __global const int2* pairs,
|
||||
|
||||
|
||||
|
||||
__kernel void clipFacesAndContactReductionKernel( __global const int2* pairs,
|
||||
__kernel void clipFacesAndContactReductionKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
@@ -1853,7 +1853,7 @@ __kernel void clipFacesAndContactReductionKernel( __global const int2* pairs,
|
||||
|
||||
|
||||
|
||||
__kernel void newContactReductionKernel( __global const int2* pairs,
|
||||
__kernel void newContactReductionKernel( __global const int4* pairs,
|
||||
__global const BodyData* rigidBodies,
|
||||
__global const float4* separatingNormals,
|
||||
__global const int* hasSeparatingAxis,
|
||||
|
||||
@@ -887,7 +887,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void extractManifoldAndAddContactKernel(__global const int2* pairs, \n"
|
||||
"__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const float4* closestPointsWorld,\n"
|
||||
" __global const float4* separatingNormalsWorld,\n"
|
||||
@@ -962,7 +962,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void clipHullHullKernel( __global const int2* pairs, \n"
|
||||
"__kernel void clipHullHullKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
@@ -1194,7 +1194,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void sphereSphereCollisionKernel( __global const int2* pairs, \n"
|
||||
"__kernel void sphereSphereCollisionKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
@@ -1679,7 +1679,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void findClippingFacesKernel( __global const int2* pairs,\n"
|
||||
"__kernel void findClippingFacesKernel( __global const int4* pairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes,\n"
|
||||
@@ -1742,7 +1742,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void clipFacesAndContactReductionKernel( __global const int2* pairs,\n"
|
||||
"__kernel void clipFacesAndContactReductionKernel( __global const int4* pairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global const int* hasSeparatingAxis,\n"
|
||||
@@ -1855,7 +1855,7 @@ static const char* satClipKernelsCL= \
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel void newContactReductionKernel( __global const int2* pairs,\n"
|
||||
"__kernel void newContactReductionKernel( __global const int4* pairs,\n"
|
||||
" __global const BodyData* rigidBodies,\n"
|
||||
" __global const float4* separatingNormals,\n"
|
||||
" __global const int* hasSeparatingAxis,\n"
|
||||
|
||||
@@ -758,7 +758,7 @@ static const char* satKernelsCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findCompoundPairsKernel( __global const int2* pairs, \n"
|
||||
"__kernel void findCompoundPairsKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
@@ -940,7 +940,7 @@ static const char* satKernelsCL= \
|
||||
"}\n"
|
||||
"\n"
|
||||
"// work-in-progress\n"
|
||||
"__kernel void findSeparatingAxisKernel( __global const int2* pairs, \n"
|
||||
"__kernel void findSeparatingAxisKernel( __global const int4* pairs, \n"
|
||||
" __global const BodyData* rigidBodies, \n"
|
||||
" __global const btCollidableGpu* collidables,\n"
|
||||
" __global const ConvexPolyhedronCL* convexShapes, \n"
|
||||
|
||||
@@ -727,7 +727,7 @@ void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
|
||||
int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity;
|
||||
int numTriConvexPairsOut=0;
|
||||
|
||||
b3OpenCLArray<b3Int2> broadphasePairsGPU(m_context,m_queue);
|
||||
b3OpenCLArray<b3Int4> broadphasePairsGPU(m_context,m_queue);
|
||||
broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs);
|
||||
b3OpenCLArray<b3YetAnotherAabb> clAabbArray(this->m_context,this->m_queue);
|
||||
clAabbArray.setFromOpenCLBuffer(aabbsWS,numObjects);
|
||||
|
||||
Reference in New Issue
Block a user