implement 'new contact reduction' kernel on CPU , for debugging

This commit is contained in:
erwin coumans
2013-12-14 10:54:25 -08:00
parent d24b1eaae9
commit 9ebef9552b
7 changed files with 340 additions and 63 deletions

View File

@@ -48,9 +48,9 @@ public:
arraySizeZ(10), arraySizeZ(10),
#else #else
arraySizeX(1), arraySizeX(10),
arraySizeY(10), arraySizeY(10),
arraySizeZ(1), arraySizeZ(10),
#endif #endif
m_useConcaveMesh(false), m_useConcaveMesh(false),
gapX(16.3), gapX(16.3),

View File

@@ -356,7 +356,6 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs
b3Float4 localCenter = b3MakeFloat4(0.f,0.f,0.f,0.f); b3Float4 localCenter = b3MakeFloat4(0.f,0.f,0.f,0.f);
b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
b3Float4 triMinAabb, triMaxAabb;
b3Aabb triAabb; b3Aabb triAabb;
triAabb.m_minVec = b3MakeFloat4(1e30f,1e30f,1e30f,0.f); triAabb.m_minVec = b3MakeFloat4(1e30f,1e30f,1e30f,0.f);
triAabb.m_maxVec = b3MakeFloat4(-1e30f,-1e30f,-1e30f,0.f); triAabb.m_maxVec = b3MakeFloat4(-1e30f,-1e30f,-1e30f,0.f);

View File

@@ -0,0 +1,196 @@
#ifndef B3_NEW_CONTACT_REDUCTION_H
#define B3_NEW_CONTACT_REDUCTION_H
#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
#define GET_NPOINTS(x) (x).m_worldNormalOnB.w
int b3ExtractManifoldSequentialGlobal(__global const b3Float4* p, int nPoints, b3Float4ConstArg nearNormal, b3Int4* contactIdx)
{
if( nPoints == 0 )
return 0;
if (nPoints <=4)
return nPoints;
if (nPoints >64)
nPoints = 64;
b3Float4 center = b3MakeFloat4(0,0,0,0);
{
for (int i=0;i<nPoints;i++)
center += p[i];
center /= (float)nPoints;
}
// sample 4 directions
b3Float4 aVector = p[0] - center;
b3Float4 u = b3Cross( nearNormal, aVector );
b3Float4 v = b3Cross( nearNormal, u );
u = b3Normalized( u );
v = b3Normalized( v );
//keep point with deepest penetration
float minW= FLT_MAX;
int minIndex=-1;
b3Float4 maxDots;
maxDots.x = FLT_MIN;
maxDots.y = FLT_MIN;
maxDots.z = FLT_MIN;
maxDots.w = FLT_MIN;
// idx, distance
for(int ie = 0; ie<nPoints; ie++ )
{
if (p[ie].w<minW)
{
minW = p[ie].w;
minIndex=ie;
}
float f;
b3Float4 r = p[ie]-center;
f = b3Dot( u, r );
if (f<maxDots.x)
{
maxDots.x = f;
contactIdx[0].x = ie;
}
f = b3Dot( -u, r );
if (f<maxDots.y)
{
maxDots.y = f;
contactIdx[0].y = ie;
}
f = b3Dot( v, r );
if (f<maxDots.z)
{
maxDots.z = f;
contactIdx[0].z = ie;
}
f = b3Dot( -v, r );
if (f<maxDots.w)
{
maxDots.w = f;
contactIdx[0].w = ie;
}
}
if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
{
//replace the first contact with minimum (todo: replace contact with least penetration)
contactIdx[0].x = minIndex;
}
return 4;
}
__kernel void b3NewContactReductionKernel( __global b3Int4* pairs,
__global const b3RigidBodyData_t* rigidBodies,
__global const b3Float4* separatingNormals,
__global const int* hasSeparatingAxis,
__global struct b3Contact4Data* globalContactsOut,
__global b3Int4* clippingFaces,
__global b3Float4* worldVertsB2,
volatile __global int* nGlobalContactsOut,
int vertexFaceCapacity,
int contactCapacity,
int numPairs,
int pairIndex
)
{
// int i = get_global_id(0);
//int pairIndex = i;
int i = pairIndex;
b3Int4 contactIdx;
contactIdx=b3MakeInt4(0,1,2,3);
if (i<numPairs)
{
if (hasSeparatingAxis[i])
{
int nPoints = clippingFaces[pairIndex].w;
if (nPoints>0)
{
__global b3Float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];
b3Float4 normal = -separatingNormals[i];
int nReducedContacts = b3ExtractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
int dstIdx;
dstIdx = b3AtomicInc( nGlobalContactsOut);
//#if 0
b3Assert(dstIdx < contactCapacity);
if (dstIdx < contactCapacity)
{
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
c->m_worldNormalOnB = -normal;
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
c->m_batchIdx = pairIndex;
int bodyA = pairs[pairIndex].x;
int bodyB = pairs[pairIndex].y;
pairs[pairIndex].w = dstIdx;
c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
c->m_childIndexA =-1;
c->m_childIndexB =-1;
switch (nReducedContacts)
{
case 4:
c->m_worldPosB[3] = pointsIn[contactIdx.w];
case 3:
c->m_worldPosB[2] = pointsIn[contactIdx.z];
case 2:
c->m_worldPosB[1] = pointsIn[contactIdx.y];
case 1:
c->m_worldPosB[0] = pointsIn[contactIdx.x];
default:
{
}
};
GET_NPOINTS(*c) = nReducedContacts;
}
//#endif
}// if (numContactsOut>0)
}// if (hasSeparatingAxis[i])
}// if (i<numPairs)
}
#endif

View File

@@ -17,8 +17,8 @@ bool findSeparatingAxisOnGpu = true;
bool bvhTraversalKernelGPU = true; bool bvhTraversalKernelGPU = true;
bool findConcaveSeparatingAxisKernelGPU = true; bool findConcaveSeparatingAxisKernelGPU = true;
bool clipFacesAndFindContactsCPU = false; bool clipFacesAndFindContactsCPU = false;//true;
bool reduceContactsOnGPU = true;//false;
///This file was written by Erwin Coumans ///This file was written by Erwin Coumans
@@ -75,6 +75,7 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h"
@@ -156,8 +157,8 @@ m_numCompoundPairsOut(m_context, m_queue)
m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
b3Assert(errNum==CL_SUCCESS); b3Assert(errNum==CL_SUCCESS);
m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
b3Assert(errNum==CL_SUCCESS); // b3Assert(errNum==CL_SUCCESS);
m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
"newContactReductionKernel",&errNum,satClipContactsProg); "newContactReductionKernel",&errNum,satClipContactsProg);
@@ -171,7 +172,7 @@ m_numCompoundPairsOut(m_context, m_queue)
m_newContactReductionKernel=0; m_newContactReductionKernel=0;
m_clipFacesAndFindContacts = 0; m_clipFacesAndFindContacts = 0;
m_clipHullHullConcaveConvexKernel = 0; m_clipHullHullConcaveConvexKernel = 0;
m_extractManifoldAndAddContactKernel = 0; // m_extractManifoldAndAddContactKernel = 0;
} }
if (1) if (1)
@@ -244,8 +245,8 @@ GpuSatCollision::~GpuSatCollision()
if (m_clipHullHullConcaveConvexKernel) if (m_clipHullHullConcaveConvexKernel)
clReleaseKernel(m_clipHullHullConcaveConvexKernel); clReleaseKernel(m_clipHullHullConcaveConvexKernel);
if (m_extractManifoldAndAddContactKernel) // if (m_extractManifoldAndAddContactKernel)
clReleaseKernel(m_extractManifoldAndAddContactKernel); // clReleaseKernel(m_extractManifoldAndAddContactKernel);
if (m_bvhTraversalKernel) if (m_bvhTraversalKernel)
clReleaseKernel(m_bvhTraversalKernel); clReleaseKernel(m_bvhTraversalKernel);
@@ -2776,8 +2777,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
if (!nPairs) if (!nPairs)
return; return;
#ifdef CHECK_ON_HOST #ifdef CHECK_ON_HOST
@@ -2937,7 +2936,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
hostContacts.resize(nContacts); hostContacts.resize(nContacts);
if (nContacts) if (nContacts)
{ {
@@ -2948,6 +2946,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
//printf("(HOST) nContacts = %d\n",nContacts);
#else #else
@@ -3018,7 +3017,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
clFinish(m_queue); clFinish(m_queue);
if (findSeparatingAxisOnGpu) if (findSeparatingAxisOnGpu)
{ {
#ifndef CHECK_ON_HOST
{ {
B3_PROFILE("findSeparatingAxisKernel"); B3_PROFILE("findSeparatingAxisKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
@@ -3043,12 +3041,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
} }
#endif
numCompoundPairs = m_numCompoundPairsOut.at(0); numCompoundPairs = m_numCompoundPairsOut.at(0);
bool useGpuFindCompoundPairs=true; bool useGpuFindCompoundPairs=true;
#ifndef CHECK_ON_HOST
if (useGpuFindCompoundPairs) if (useGpuFindCompoundPairs)
{ {
B3_PROFILE("findCompoundPairsKernel"); B3_PROFILE("findCompoundPairsKernel");
@@ -3163,7 +3159,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
if (numCompoundPairsOut) if (numCompoundPairsOut)
{ {
printf("numCompoundPairsOut=%d\n",numCompoundPairsOut); // printf("numCompoundPairsOut=%d\n",numCompoundPairsOut);
} }
} }
@@ -3172,7 +3168,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity);
numCompoundPairs = compoundPairCapacity; numCompoundPairs = compoundPairCapacity;
} }
#endif //CHECK_ON_HOST
@@ -3183,7 +3178,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
if (numCompoundPairs) if (numCompoundPairs)
{ {
#ifndef CHECK_ON_HOST
B3_PROFILE("processCompoundPairsPrimitivesKernel"); B3_PROFILE("processCompoundPairsPrimitivesKernel");
b3BufferInfoCL bInfo[] = b3BufferInfoCL bInfo[] =
{ {
@@ -3210,19 +3204,18 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
//printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts);
if (nContacts>maxContactCapacity) if (nContacts>maxContactCapacity)
{ {
b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
nContacts = maxContactCapacity; nContacts = maxContactCapacity;
} }
#endif
} }
if (numCompoundPairs) if (numCompoundPairs)
{ {
#ifndef CHECK_ON_HOST
B3_PROFILE("processCompoundPairsKernel"); B3_PROFILE("processCompoundPairsKernel");
b3BufferInfoCL bInfo[] = b3BufferInfoCL bInfo[] =
{ {
@@ -3247,7 +3240,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
int num = numCompoundPairs; int num = numCompoundPairs;
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
#endif
} }
@@ -3516,6 +3508,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
{ {
B3_PROFILE("findConcaveSphereContactsKernel"); B3_PROFILE("findConcaveSphereContactsKernel");
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
// printf("nContacts1 = %d\n",nContacts);
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( bodyBuf->getBufferCL(),true),
@@ -3540,6 +3533,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
//printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts);
//printf("nContacts2 = %d\n",nContacts);
if (nContacts >= maxContactCapacity) if (nContacts >= maxContactCapacity)
{ {
b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
@@ -3560,6 +3557,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
if (contactClippingOnGpu) if (contactClippingOnGpu)
{ {
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
// printf("nContacts3 = %d\n",nContacts);
//B3_PROFILE("clipHullHullKernel"); //B3_PROFILE("clipHullHullKernel");
@@ -3663,32 +3662,96 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
//contactReduction //contactReduction
{ {
contactOut->reserve(nContacts+numConcavePairs); int newContactCapacity=nContacts+numConcavePairs;
contactOut->reserve(newContactCapacity);
if (reduceContactsOnGPU)
{ {
B3_PROFILE("newContactReductionKernel"); // printf("newReservation = %d\n",newReservation);
b3BufferInfoCL bInfo[] =
{ {
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), B3_PROFILE("newContactReductionKernel");
b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL bInfo[] =
b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), {
b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),
b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()),
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( contactOut->getBufferCL()),
}; b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity); launcher.setConst(vertexFaceCapacity);
launcher.setConst( numConcavePairs ); launcher.setConst(newContactCapacity);
int num = numConcavePairs; launcher.setConst( numConcavePairs );
int num = numConcavePairs;
launcher.launch1D( num); launcher.launch1D( num);
}
nContacts = m_totalContactsOut.at(0);
contactOut->resize(nContacts);
//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
}else
{
volatile int nGlobalContactsOut = nContacts;
b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
bodyBuf->copyToHost(hostBodyBuf);
b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU;
m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
b3AlignedObjectArray<b3Contact4> hostContacts;
if (nContacts)
{
contactOut->copyToHost(hostContacts);
}
hostContacts.resize(newContactCapacity);
b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
worldVertsB2GPU.copyToHost(worldVertsB2CPU);
for (int i=0;i<numConcavePairs;i++)
{
b3NewContactReductionKernel( &triangleConvexPairsOutHost.at(0),
&hostBodyBuf.at(0),
&concaveSepNormalsHost.at(0),
&concaveHasSeparatingNormalsCPU.at(0),
&hostContacts.at(0),
&clippingFacesOutCPU.at(0),
&worldVertsB2CPU.at(0),
&nGlobalContactsOut,
vertexFaceCapacity,
newContactCapacity,
numConcavePairs,
i
);
}
nContacts = nGlobalContactsOut;
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
// nContacts = m_totalContactsOut.at(0);
//contactOut->resize(nContacts);
hostContacts.resize(nContacts);
//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
contactOut->copyFromHost(hostContacts);
} }
nContacts = m_totalContactsOut.at(0);
contactOut->resize(nContacts);
} }
//re-use? //re-use?
@@ -3698,6 +3761,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
{ {
B3_PROFILE("clipHullHullConcaveConvexKernel"); B3_PROFILE("clipHullHullConcaveConvexKernel");
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
int newContactCapacity = contactOut->capacity();
//printf("contactOut5 = %d\n",nContacts);
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( bodyBuf->getBufferCL(),true),
@@ -3714,12 +3780,14 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
}; };
b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(newContactCapacity);
launcher.setConst( numConcavePairs ); launcher.setConst( numConcavePairs );
int num = numConcavePairs; int num = numConcavePairs;
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
contactOut->resize(nContacts); contactOut->resize(nContacts);
//printf("contactOut6 = %d\n",nContacts);
b3AlignedObjectArray<b3Contact4> cpuContacts; b3AlignedObjectArray<b3Contact4> cpuContacts;
contactOut->copyToHost(cpuContacts); contactOut->copyToHost(cpuContacts);
} }
@@ -3736,8 +3804,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
breakupKernel = true; breakupKernel = true;
#endif #endif
#ifndef CHECK_ON_HOST #ifdef CHECK_ON_HOST
bool computeConvexConvex = false;
#else
bool computeConvexConvex = true;
#endif//CHECK_ON_HOST
if (computeConvexConvex)
{
if (breakupKernel) if (breakupKernel)
{ {
@@ -3815,7 +3888,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
// nContacts = m_totalContactsOut.at(0); // nContacts = m_totalContactsOut.at(0);
// printf("nContacts = %d\n",nContacts); // printf("nContacts = %d\n",nContacts);
contactOut->reserve(nContacts+nPairs); int newContactCapacity = nContacts+nPairs;
contactOut->reserve(newContactCapacity);
{ {
B3_PROFILE("newContactReductionKernel"); B3_PROFILE("newContactReductionKernel");
@@ -3834,6 +3908,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity); launcher.setConst(vertexFaceCapacity);
launcher.setConst(newContactCapacity);
launcher.setConst( nPairs ); launcher.setConst( nPairs );
int num = nPairs; int num = nPairs;
@@ -3885,9 +3960,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
contactOut->resize(nContacts); contactOut->resize(nContacts);
} }
} }
#endif //#endif //CHECK_ON_HOST
#ifndef CHECK_ON_HOST
int nCompoundsPairs = m_gpuCompoundPairs.size(); int nCompoundsPairs = m_gpuCompoundPairs.size();
@@ -3926,7 +3999,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
contactOut->resize(nContacts); contactOut->resize(nContacts);
}//if nCompoundsPairs }//if nCompoundsPairs
#endif //CHECK_ON_HOST }
}//contactClippingOnGpu }//contactClippingOnGpu
//printf("nContacts end = %d\n",nContacts);
//printf("frameCount = %d\n",frameCount++);
} }

View File

@@ -37,7 +37,7 @@ struct GpuSatCollision
cl_kernel m_findClippingFacesKernel; cl_kernel m_findClippingFacesKernel;
cl_kernel m_clipHullHullConcaveConvexKernel; cl_kernel m_clipHullHullConcaveConvexKernel;
cl_kernel m_extractManifoldAndAddContactKernel; // cl_kernel m_extractManifoldAndAddContactKernel;
cl_kernel m_newContactReductionKernel; cl_kernel m_newContactReductionKernel;
cl_kernel m_bvhTraversalKernel; cl_kernel m_bvhTraversalKernel;

View File

@@ -819,6 +819,7 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
__global const int* contactOffsets, __global const int* contactOffsets,
__global struct b3Contact4Data* restrict contactsOut, __global struct b3Contact4Data* restrict contactsOut,
counter32_t nContactsOut, counter32_t nContactsOut,
int contactCapacity,
int numPairs, int numPairs,
int pairIndex int pairIndex
) )
@@ -846,7 +847,7 @@ __kernel void extractManifoldAndAddContactKernel(__global const int4* pairs,
int dstIdx; int dstIdx;
AppendInc( nContactsOut, dstIdx ); AppendInc( nContactsOut, dstIdx );
//if ((dstIdx+nContacts) < capacity) if (dstIdx<contactCapacity)
{ {
__global struct b3Contact4Data* c = contactsOut + dstIdx; __global struct b3Contact4Data* c = contactsOut + dstIdx;
c->m_worldNormalOnB = -normal; c->m_worldNormalOnB = -normal;
@@ -1128,6 +1129,7 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs,
__global const int* hasSeparatingAxis, __global const int* hasSeparatingAxis,
__global struct b3Contact4Data* restrict globalContactsOut, __global struct b3Contact4Data* restrict globalContactsOut,
counter32_t nGlobalContactsOut, counter32_t nGlobalContactsOut,
int contactCapacity,
int numPairs) int numPairs)
{ {
@@ -1168,9 +1170,8 @@ __kernel void sphereSphereCollisionKernel( __global const int4* pairs,
contactPosB.w = dist; contactPosB.w = dist;
int dstIdx; int dstIdx;
AppendInc( nGlobalContactsOut, dstIdx ); AppendInc( nGlobalContactsOut, dstIdx );
if (dstIdx < contactCapacity)
if (dstIdx < numPairs)
{ {
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
c->m_worldNormalOnB = -normalOnSurfaceB; c->m_worldNormalOnB = -normalOnSurfaceB;
@@ -1203,6 +1204,7 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
__global const float4* separatingNormals, __global const float4* separatingNormals,
__global struct b3Contact4Data* restrict globalContactsOut, __global struct b3Contact4Data* restrict globalContactsOut,
counter32_t nGlobalContactsOut, counter32_t nGlobalContactsOut,
int contactCapacity,
int numConcavePairs) int numConcavePairs)
{ {
@@ -1403,7 +1405,7 @@ __kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,
int dstIdx; int dstIdx;
AppendInc( nGlobalContactsOut, dstIdx ); AppendInc( nGlobalContactsOut, dstIdx );
//if ((dstIdx+nReducedContacts) < capacity) if (dstIdx<contactCapacity)
{ {
__global struct b3Contact4Data* c = globalContactsOut+ dstIdx; __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;
c->m_worldNormalOnB = -normal; c->m_worldNormalOnB = -normal;
@@ -1787,6 +1789,7 @@ __kernel void newContactReductionKernel( __global int4* pairs,
__global float4* worldVertsB2, __global float4* worldVertsB2,
volatile __global int* nGlobalContactsOut, volatile __global int* nGlobalContactsOut,
int vertexFaceCapacity, int vertexFaceCapacity,
int contactCapacity,
int numPairs int numPairs
) )
{ {
@@ -1820,7 +1823,7 @@ __kernel void newContactReductionKernel( __global int4* pairs,
//#if 0 //#if 0
if (dstIdx < numPairs) if (dstIdx < contactCapacity)
{ {
__global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];

View File

@@ -1092,6 +1092,7 @@ static const char* satClipKernelsCL= \
" __global const int* contactOffsets,\n" " __global const int* contactOffsets,\n"
" __global struct b3Contact4Data* restrict contactsOut,\n" " __global struct b3Contact4Data* restrict contactsOut,\n"
" counter32_t nContactsOut,\n" " counter32_t nContactsOut,\n"
" int contactCapacity,\n"
" int numPairs,\n" " int numPairs,\n"
" int pairIndex\n" " int pairIndex\n"
" )\n" " )\n"
@@ -1116,7 +1117,7 @@ static const char* satClipKernelsCL= \
" int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" " int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n"
" int dstIdx;\n" " int dstIdx;\n"
" AppendInc( nContactsOut, dstIdx );\n" " AppendInc( nContactsOut, dstIdx );\n"
" //if ((dstIdx+nContacts) < capacity)\n" " if (dstIdx<contactCapacity)\n"
" {\n" " {\n"
" __global struct b3Contact4Data* c = contactsOut + dstIdx;\n" " __global struct b3Contact4Data* c = contactsOut + dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n" " c->m_worldNormalOnB = -normal;\n"
@@ -1367,6 +1368,7 @@ static const char* satClipKernelsCL= \
" __global const int* hasSeparatingAxis,\n" " __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n" " __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n" " counter32_t nGlobalContactsOut,\n"
" int contactCapacity,\n"
" int numPairs)\n" " int numPairs)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
@@ -1404,9 +1406,8 @@ static const char* satClipKernelsCL= \
" contactPosB.w = dist;\n" " contactPosB.w = dist;\n"
" \n" " \n"
" int dstIdx;\n" " int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n" " AppendInc( nGlobalContactsOut, dstIdx );\n"
" \n" " if (dstIdx < contactCapacity)\n"
" if (dstIdx < numPairs)\n"
" {\n" " {\n"
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
" c->m_worldNormalOnB = -normalOnSurfaceB;\n" " c->m_worldNormalOnB = -normalOnSurfaceB;\n"
@@ -1437,6 +1438,7 @@ static const char* satClipKernelsCL= \
" __global const float4* separatingNormals,\n" " __global const float4* separatingNormals,\n"
" __global struct b3Contact4Data* restrict globalContactsOut,\n" " __global struct b3Contact4Data* restrict globalContactsOut,\n"
" counter32_t nGlobalContactsOut,\n" " counter32_t nGlobalContactsOut,\n"
" int contactCapacity,\n"
" int numConcavePairs)\n" " int numConcavePairs)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
@@ -1618,7 +1620,7 @@ static const char* satClipKernelsCL= \
" \n" " \n"
" int dstIdx;\n" " int dstIdx;\n"
" AppendInc( nGlobalContactsOut, dstIdx );\n" " AppendInc( nGlobalContactsOut, dstIdx );\n"
" //if ((dstIdx+nReducedContacts) < capacity)\n" " if (dstIdx<contactCapacity)\n"
" {\n" " {\n"
" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" " __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n"
" c->m_worldNormalOnB = -normal;\n" " c->m_worldNormalOnB = -normal;\n"
@@ -1973,6 +1975,7 @@ static const char* satClipKernelsCL= \
" __global float4* worldVertsB2,\n" " __global float4* worldVertsB2,\n"
" volatile __global int* nGlobalContactsOut,\n" " volatile __global int* nGlobalContactsOut,\n"
" int vertexFaceCapacity,\n" " int vertexFaceCapacity,\n"
" int contactCapacity,\n"
" int numPairs\n" " int numPairs\n"
" )\n" " )\n"
"{\n" "{\n"
@@ -2005,7 +2008,7 @@ static const char* satClipKernelsCL= \
" \n" " \n"
"//#if 0\n" "//#if 0\n"
" \n" " \n"
" if (dstIdx < numPairs)\n" " if (dstIdx < contactCapacity)\n"
" {\n" " {\n"
" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n"
" c->m_worldNormalOnB = -normal;\n" " c->m_worldNormalOnB = -normal;\n"