Some performance improvements and fixes related to btVector3 being aligned on SPU.

btQuantizedBvh has a version number, memory layout might be different now (due to aligned btVector3)
reorganized some data members of some classes, to reduce memory footprint
This commit is contained in:
erwin.coumans
2008-10-29 02:44:50 +00:00
parent ce5df7cf47
commit 55b29e2355
10 changed files with 241 additions and 110 deletions

View File

@@ -19,11 +19,7 @@ subject to the following restrictions:
#include "BulletCollision/CollisionShapes/btCollisionShape.h" #include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h" #include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm()
:m_manifoldPtr(0)
{
}
void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
@@ -40,6 +36,9 @@ btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollision
#ifndef __SPU__ #ifndef __SPU__
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1) SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
:btCollisionAlgorithm(ci) :btCollisionAlgorithm(ci)
#ifdef USE_SEPDISTANCE_UTIL
,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())
#endif //USE_SEPDISTANCE_UTIL
{ {
m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1); m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
m_shapeType0 = body0->getCollisionShape()->getShapeType(); m_shapeType0 = body0->getCollisionShape()->getShapeType();

View File

@@ -20,9 +20,12 @@ subject to the following restrictions:
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h" #include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
#include "BulletCollision/BroadphaseCollision/btDispatcher.h" #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
#include "LinearMath/btTransformUtil.h"
class btPersistentManifold; class btPersistentManifold;
//#define USE_SEPDISTANCE_UTIL 1
/// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU. /// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU.
ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm
{ {
@@ -39,15 +42,18 @@ ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btColli
public: public:
virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut); virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
SpuContactManifoldCollisionAlgorithm();
SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1); SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
#ifdef USE_SEPDISTANCE_UTIL
btConvexSeparatingDistanceUtil m_sepDistance;
#endif //USE_SEPDISTANCE_UTIL
virtual ~SpuContactManifoldCollisionAlgorithm(); virtual ~SpuContactManifoldCollisionAlgorithm();

View File

@@ -126,7 +126,11 @@ public:
if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1)) if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1))
{ {
int so = sizeof(SpuContactManifoldCollisionAlgorithm); int so = sizeof(SpuContactManifoldCollisionAlgorithm);
void* mem = btAlignedAlloc(sizeof(SpuContactManifoldCollisionAlgorithm),16);//m_dispatcher->allocateCollisionAlgorithm(so); #ifdef ALLOCATE_SEPARATELY
void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
#else
void* mem = m_dispatcher->allocateCollisionAlgorithm(so);
#endif
collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1); collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1);
collisionPair.m_userInfo = (void*) 2; collisionPair.m_userInfo = (void*) 2;
} else } else

View File

@@ -21,8 +21,13 @@ subject to the following restrictions:
///Tuning value to optimized SPU utilization ///Tuning value to optimized SPU utilization
///Too small value means Task overhead is large compared to computation (too fine granularity) ///Too small value means Task overhead is large compared to computation (too fine granularity)
///Too big value might render some SPUs are idle, while a few other SPUs are doing all work. ///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
class SpuCollisionTaskProcess; class SpuCollisionTaskProcess;

View File

@@ -16,6 +16,17 @@ subject to the following restrictions:
#include "SpuCollisionShapes.h" #include "SpuCollisionShapes.h"
#ifdef __SPU__
#include <spu_intrinsics.h>
static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 )
{
vec_float4 result;
result = spu_mul( vec0, vec1 );
result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
}
#endif //__SPU__
btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex) btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex)
{ {
switch (shapeType) switch (shapeType)
@@ -170,7 +181,45 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons
{ {
//spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n"); //spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n");
#if defined (__SPU__)
vec_float4 v_distMax = {-FLT_MAX,0,0,0};
vec_int4 v_idxMax = {-999,0,0,0};
int v=0;
int numverts = convexVertexData->gNumConvexPoints;
btVector3* points = convexVertexData->gConvexPoints;
for(;v<(int)numverts-4;v+=4) {
vec_float4 p0 = vec_dot3(points[v ].get128(),localDir.get128());
vec_float4 p1 = vec_dot3(points[v+1].get128(),localDir.get128());
vec_float4 p2 = vec_dot3(points[v+2].get128(),localDir.get128());
vec_float4 p3 = vec_dot3(points[v+3].get128(),localDir.get128());
const vec_int4 i0 = {v ,0,0,0};
const vec_int4 i1 = {v+1,0,0,0};
const vec_int4 i2 = {v+2,0,0,0};
const vec_int4 i3 = {v+3,0,0,0};
vec_uint4 retGt01 = spu_cmpgt(p0,p1);
vec_float4 pmax01 = spu_sel(p1,p0,retGt01);
vec_int4 imax01 = spu_sel(i1,i0,retGt01);
vec_uint4 retGt23 = spu_cmpgt(p2,p3);
vec_float4 pmax23 = spu_sel(p3,p2,retGt23);
vec_int4 imax23 = spu_sel(i3,i2,retGt23);
vec_uint4 retGt0123 = spu_cmpgt(pmax01,pmax23);
vec_float4 pmax0123 = spu_sel(pmax23,pmax01,retGt0123);
vec_int4 imax0123 = spu_sel(imax23,imax01,retGt0123);
vec_uint4 retGtMax = spu_cmpgt(v_distMax,pmax0123);
v_distMax = spu_sel(pmax0123,v_distMax,retGtMax);
v_idxMax = spu_sel(imax0123,v_idxMax,retGtMax);
}
for(;v<(int)numverts;v++) {
vec_float4 p = vec_dot3(points[v].get128(),localDir.get128());
const vec_int4 i = {v,0,0,0};
vec_uint4 retGtMax = spu_cmpgt(v_distMax,p);
v_distMax = spu_sel(p,v_distMax,retGtMax);
v_idxMax = spu_sel(i,v_idxMax,retGtMax);
}
int ptIndex = spu_extract(v_idxMax,0);
const btVector3& supVec= points[ptIndex];
#else
btVector3* points = 0; btVector3* points = 0;
int numPoints = 0; int numPoints = 0;
@@ -179,7 +228,7 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons
// spu_printf("numPoints = %d\n",numPoints); // spu_printf("numPoints = %d\n",numPoints);
btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.)); int ptIndex = 0;
btScalar newDot,maxDot = btScalar(-1e30); btScalar newDot,maxDot = btScalar(-1e30);
btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ());
@@ -197,15 +246,18 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons
for (int i=0;i<numPoints;i++) for (int i=0;i<numPoints;i++)
{ {
btVector3 vtx = points[i];// * m_localScaling; const btVector3& vtx = points[i];// * m_localScaling;
newDot = vec.dot(vtx); newDot = vec.dot(vtx);
if (newDot > maxDot) if (newDot > maxDot)
{ {
maxDot = newDot; maxDot = newDot;
supVec = vtx; ptIndex = i;
} }
} }
const btVector3& supVec= points[ptIndex];
#endif
return btVector3(supVec.getX(),supVec.getY(),supVec.getZ()); return btVector3(supVec.getX(),supVec.getY(),supVec.getZ());
break; break;
@@ -223,7 +275,7 @@ btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, cons
} }
} }
void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform) void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform)
{ {
//calculate the aabb, given the types... //calculate the aabb, given the types...
switch (shapeType) switch (shapeType)
@@ -235,7 +287,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
float margin=convexShape->getMarginNV(); float margin=convexShape->getMarginNV();
btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
halfExtents += btVector3(margin,margin,margin); halfExtents += btVector3(margin,margin,margin);
btTransform& t = xform; const btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute(); btMatrix3x3 abs_b = t.getBasis().absolute();
btVector3 center = t.getOrigin(); btVector3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
@@ -258,7 +310,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
btScalar radius = convexShape->getRadius(); btScalar radius = convexShape->getRadius();
halfExtents[capsuleUpAxis] = radius + halfHeight; halfExtents[capsuleUpAxis] = radius + halfHeight;
#endif #endif
btTransform& t = xform; const btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute(); btMatrix3x3 abs_b = t.getBasis().absolute();
btVector3 center = t.getOrigin(); btVector3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents)); btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
@@ -271,7 +323,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
{ {
float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX(); float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
float margin = radius + convexShape->getMarginNV(); float margin = radius + convexShape->getMarginNV();
btTransform& t = xform; const btTransform& t = xform;
const btVector3& center = t.getOrigin(); const btVector3& center = t.getOrigin();
btVector3 extent(margin,margin,margin); btVector3 extent(margin,margin,margin);
aabbMin = center - extent; aabbMin = center - extent;
@@ -284,7 +336,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0); cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1)); cellDmaWaitTagStatusAll(DMA_MASK(1));
btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0;
btTransform& t = xform; const btTransform& t = xform;
btScalar margin = convexShape->getMarginNV(); btScalar margin = convexShape->getMarginNV();
localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin); localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin);
//spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ());
@@ -450,7 +502,9 @@ void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned
int curIndex = startNodeIndex; int curIndex = startNodeIndex;
int walkIterations = 0; int walkIterations = 0;
#ifdef BT_DEBUG
int subTreeSize = endNodeIndex - startNodeIndex; int subTreeSize = endNodeIndex - startNodeIndex;
#endif
int escapeIndex; int escapeIndex;
@@ -459,7 +513,7 @@ void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned
while (curIndex < endNodeIndex) while (curIndex < endNodeIndex)
{ {
//catch bugs in tree data //catch bugs in tree data
assert (walkIterations < subTreeSize); btAssert (walkIterations < subTreeSize);
walkIterations++; walkIterations++;
aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);

View File

@@ -64,7 +64,7 @@ struct bvhMeshShape_LocalStoreMemory
btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData);//, int *featureIndex) btVector3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData);//, int *featureIndex)
void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform); void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform);
void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape); void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape);
void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag); void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag);
void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag); void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag);
@@ -76,18 +76,27 @@ void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionSha
void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag); void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
#define USE_BRANCHFREE_TEST 1 #define USE_BRANCHFREE_TEST 1
#ifdef USE_BRANCHFREE_TEST #ifdef USE_BRANCHFREE_TEST
SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{ {
#if defined(__CELLOS_LV2__) && defined (__SPU__)
vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0};
vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0};
vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax);
return spu_extract(spu_gather(isGt),0)==0;
#else
return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])), & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
1, 0); 1, 0);
#endif
} }
#else #else
unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{ {
unsigned int overlap = 1; unsigned int overlap = 1;
overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap; overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap;

View File

@@ -118,6 +118,8 @@ struct CollisionTask_LocalStoreMemory
return (btCollisionObject*) gColObj1; return (btCollisionObject*) gColObj1;
} }
bool needsDmaPutContactManifoldAlgo;
DoubleBuffer<unsigned char, MIDPHASE_WORKUNIT_PAGE_SIZE> g_workUnitTaskBuffers; DoubleBuffer<unsigned char, MIDPHASE_WORKUNIT_PAGE_SIZE> g_workUnitTaskBuffers;
ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairs[SPU_BATCHSIZE_BROADPHASE_PAIRS]); ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairs[SPU_BATCHSIZE_BROADPHASE_PAIRS]);
@@ -125,7 +127,7 @@ struct CollisionTask_LocalStoreMemory
//SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; //SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo;
//ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo[sizeof(SpuContactManifoldCollisionAlgorithm)+128]); //ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo[sizeof(SpuContactManifoldCollisionAlgorithm)+128]);
SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo [sizeof(SpuContactManifoldCollisionAlgorithm)+16]);
SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm()
{ {
@@ -250,8 +252,8 @@ class spuNodeCallback : public btNodeOverlapCallback
public: public:
spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts) spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult& spuContacts)
: m_wuInput(wuInput), : m_wuInput(wuInput),
m_lsMemPtr(lsMemPtr), m_spuContacts(spuContacts),
m_spuContacts(spuContacts) m_lsMemPtr(lsMemPtr)
{ {
} }
@@ -346,8 +348,6 @@ public:
void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
{ {
//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1]; btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1];
//need the mesh interface, for access to triangle vertices //need the mesh interface, for access to triangle vertices
@@ -559,8 +559,17 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa
lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(), lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
wuInput->m_isSwapped); wuInput->m_isSwapped);
{
SpuGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&vsSolver,penetrationSolver); SpuGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&vsSolver,penetrationSolver);
gjk.getClosestPoints(cpInput,spuContacts);//,debugDraw); gjk.getClosestPoints(cpInput,spuContacts);//,debugDraw);
#ifdef USE_SEPDISTANCE_UTIL
btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold();
lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1);
lsMemPtr->needsDmaPutContactManifoldAlgo = true;
#endif //USE_SEPDISTANCE_UTIL
}
} }
@@ -581,11 +590,11 @@ SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collis
register ppu_address_t dmaPpuAddress2; register ppu_address_t dmaPpuAddress2;
dmaSize = sizeof(btCollisionObject);//btTransform); dmaSize = sizeof(btCollisionObject);//btTransform);
dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.gSpuContactManifoldAlgo.getCollisionObject0(); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0();
cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
dmaSize = sizeof(btCollisionObject);//btTransform); dmaSize = sizeof(btCollisionObject);//btTransform);
dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.gSpuContactManifoldAlgo.getCollisionObject1(); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1();
cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
@@ -601,8 +610,6 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
ppu_address_t collisionShape0Ptr, void* collisionShape0Loc, ppu_address_t collisionShape0Ptr, void* collisionShape0Loc,
ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true) ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true)
{ {
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)
&& btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
@@ -904,31 +911,11 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
cellDmaGet(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaGet(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
//snPause();
#ifdef DEBUG_SPU_COLLISION_DETECTION
//spu_printf("SPU: manifoldPtr: %llx",collisionPairInput->m_persistentManifoldPtr);
#endif //DEBUG_SPU_COLLISION_DETECTION
/*
dmaSize = sizeof(btBroadphaseProxy);
dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy0;
//stallingUnalignedDmaSmallGet(lsMem.gProxyPtr0, dmaPpuAddress2 , dmaSize);
void* tmpPtr = cellDmaSmallGetReadOnly(&lsMem.bufferProxy0, dmaPpuAddress2 , dmaSize,DMA_TAG(1), 0, 0);
lsMem.gProxyPtr0 = (btBroadphaseProxy*) tmpPtr;
dmaSize = sizeof(btBroadphaseProxy);
dmaPpuAddress2 = (ppu_address_t)pair.m_pProxy1;
tmpPtr = cellDmaSmallGetReadOnly(&lsMem.bufferProxy1, dmaPpuAddress2 , dmaSize,DMA_TAG(1), 0, 0);
lsMem.gProxyPtr1 = (btBroadphaseProxy*)tmpPtr;
*/
cellDmaWaitTagStatusAll(DMA_MASK(1)); cellDmaWaitTagStatusAll(DMA_MASK(1));
collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.gSpuContactManifoldAlgo.getContactManifoldPtr(); lsMem.needsDmaPutContactManifoldAlgo = false;
collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr();
collisionPairInput.m_isSwapped = false; collisionPairInput.m_isSwapped = false;
if (1) if (1)
@@ -948,10 +935,10 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr; dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr;
cellDmaGet(&lsMem.gPersistentManifold, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaGet(&lsMem.gPersistentManifold, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
collisionPairInput.m_shapeType0 = lsMem.gSpuContactManifoldAlgo.getShapeType0(); collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0();
collisionPairInput.m_shapeType1 = lsMem.gSpuContactManifoldAlgo.getShapeType1(); collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1();
collisionPairInput.m_collisionMargin0 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin0(); collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
collisionPairInput.m_collisionMargin1 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin1(); collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
@@ -965,11 +952,18 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
// Get the collision objects // Get the collision objects
dmaAndSetupCollisionObjects(collisionPairInput, lsMem); dmaAndSetupCollisionObjects(collisionPairInput, lsMem);
//if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive()) if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive())
{ {
bool boxbox = ((lsMem.gSpuContactManifoldAlgo.getShapeType0()==BOX_SHAPE_PROXYTYPE)&&
(lsMem.gSpuContactManifoldAlgo.getShapeType1()==BOX_SHAPE_PROXYTYPE)); lsMem.needsDmaPutContactManifoldAlgo = true;
if (boxbox && !gUseEpa)//for now use gUseEpa for this toggle #ifdef USE_SEPDISTANCE_UTIL
lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
#endif //USE_SEPDISTANCE_UTIL
bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&&
(lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE));
if (boxbox)// && !gUseEpa)//for now use gUseEpa for this toggle
{ {
//getVmVector3 //getVmVector3
//getBtVector3 //getBtVector3
@@ -977,11 +971,33 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
//getBtQuat //getBtQuat
//getVmMatrix3 //getVmMatrix3
//spu_printf("boxbox dist = %f\n",distance);
btPersistentManifold* spuManifold=&lsMem.gPersistentManifold;
btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
ppu_address_t manifoldAddress = (ppu_address_t)manifold;
spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
lsMem.getColObj1()->getWorldTransform(),
lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
collisionPairInput.m_isSwapped);
if (//!gUseEpa &&
#ifdef USE_SEPDISTANCE_UTIL
lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
#else
1
#endif
)
{
//getCollisionMargin0 //getCollisionMargin0
btScalar margin0 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin0(); btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
btScalar margin1 = lsMem.gSpuContactManifoldAlgo.getCollisionMargin1(); btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
btVector3 shapeDim0 = lsMem.gSpuContactManifoldAlgo.getShapeDimensions0()+btVector3(margin0,margin0,margin0); btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
btVector3 shapeDim1 = lsMem.gSpuContactManifoldAlgo.getShapeDimensions1()+btVector3(margin1,margin1,margin1); btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ()); Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()); Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
@@ -995,54 +1011,77 @@ void processCollisionTask(void* userPtr, void* lsMemPtr)
BoxPoint resultClosestBoxPointA; BoxPoint resultClosestBoxPointA;
BoxPoint resultClosestBoxPointB; BoxPoint resultClosestBoxPointB;
Vector3 resultNormal; Vector3 resultNormal;
float distanceThreshold = gContactBreakingThreshold;//0.0f;//FLT_MAX;//use epsilon? float distanceThreshold = FLT_MAX;//0.0f;//FLT_MAX;//use epsilon?
float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB,
boxA, transformA, boxB,transformB,distanceThreshold);
if(distance < distanceThreshold) float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB, boxA, transformA, boxB,transformB,distanceThreshold);
btVector3 normalInB = -getBtVector3(resultNormal);
if(distance < spuManifold->getContactBreakingThreshold())
{
btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
spuContacts.addContactPoint(
normalInB,
pointOnB,
distance);
}
lsMem.needsDmaPutContactManifoldAlgo = true;
#ifdef USE_SEPDISTANCE_UTIL
btScalar sepDist = distance+spuManifold->getContactBreakingThreshold();
lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
#endif //USE_SEPDISTANCE_UTIL
}
spuContacts.flush();
} else
{
if (
#ifdef USE_SEPDISTANCE_UTIL
lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
#else
1
#endif //USE_SEPDISTANCE_UTIL
)
{
handleCollisionPair(collisionPairInput, lsMem, spuContacts,
(ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
(ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
} else
{ {
//spu_printf("boxbox dist = %f\n",distance); //spu_printf("boxbox dist = %f\n",distance);
btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; btPersistentManifold* spuManifold=&lsMem.gPersistentManifold;
btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
ppu_address_t manifoldAddress = (ppu_address_t)manifold; ppu_address_t manifoldAddress = (ppu_address_t)manifold;
//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(), spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
lsMem.getColObj1()->getWorldTransform(), lsMem.getColObj1()->getWorldTransform(),
lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(), lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(), lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
collisionPairInput.m_isSwapped); collisionPairInput.m_isSwapped);
btVector3 normalInB = -getBtVector3(resultNormal);
btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
//transform pointOnB to worldspace?
spuContacts.addContactPoint(
normalInB,
pointOnB,
distance);
//normalInB,
//pointOnB+positionOffset,
//distance);
//SET_CONTACT_POINT(cp[0],distance,-testNormal,
// boxPointA,relTransformA,primIndexA,
// boxPointB,relTransformB,primIndexB);
spuContacts.flush(); spuContacts.flush();
} }
}
} else }
}
}
#ifdef USE_SEPDISTANCE_UTIL
if (lsMem.needsDmaPutContactManifoldAlgo)
{ {
handleCollisionPair(collisionPairInput, lsMem, spuContacts, dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
(ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
(ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape); cellDmaLargePut(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
} cellDmaWaitTagStatusAll(DMA_MASK(1));
}
}
} }
#endif //#ifdef USE_SEPDISTANCE_UTIL
} }
} }

View File

@@ -38,6 +38,7 @@ int gSpuNumGjkChecks = 0;
SpuGjkPairDetector::SpuGjkPairDetector(void* objectA,void* objectB,int shapeTypeA, int shapeTypeB, float marginA,float marginB,SpuVoronoiSimplexSolver* simplexSolver, const SpuConvexPenetrationDepthSolver* penetrationDepthSolver) SpuGjkPairDetector::SpuGjkPairDetector(void* objectA,void* objectB,int shapeTypeA, int shapeTypeB, float marginA,float marginB,SpuVoronoiSimplexSolver* simplexSolver, const SpuConvexPenetrationDepthSolver* penetrationDepthSolver)
:m_cachedSeparatingAxis(float(0.),float(0.),float(1.)), :m_cachedSeparatingAxis(float(0.),float(0.),float(1.)),
m_cachedSeparatingDistance(0.f),
m_penetrationDepthSolver(penetrationDepthSolver), m_penetrationDepthSolver(penetrationDepthSolver),
m_simplexSolver(simplexSolver), m_simplexSolver(simplexSolver),
m_minkowskiA(objectA), m_minkowskiA(objectA),
@@ -54,6 +55,8 @@ m_catchDegeneracies(1)
void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuContactResult& output) void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuContactResult& output)
{ {
m_cachedSeparatingDistance = 0.f;
btScalar distance=btScalar(0.); btScalar distance=btScalar(0.);
btVector3 normalInB(btScalar(0.),btScalar(0.),btScalar(0.)); btVector3 normalInB(btScalar(0.),btScalar(0.),btScalar(0.));
btVector3 pointOnA,pointOnB; btVector3 pointOnA,pointOnB;
@@ -294,6 +297,8 @@ void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuC
//spu_printf("distance\n"); //spu_printf("distance\n");
#endif //__SPU__ #endif //__SPU__
m_cachedSeparatingDistance = distance;
m_cachedSeparatingAxis = normalInB;
output.addContactPoint( output.addContactPoint(
normalInB, normalInB,

View File

@@ -34,6 +34,7 @@ class SpuGjkPairDetector
btVector3 m_cachedSeparatingAxis; btVector3 m_cachedSeparatingAxis;
btScalar m_cachedSeparatingDistance;
const SpuConvexPenetrationDepthSolver* m_penetrationDepthSolver; const SpuConvexPenetrationDepthSolver* m_penetrationDepthSolver;
SpuVoronoiSimplexSolver* m_simplexSolver; SpuVoronoiSimplexSolver* m_simplexSolver;
void* m_minkowskiA; void* m_minkowskiA;
@@ -74,6 +75,15 @@ public:
m_cachedSeparatingAxis = seperatingAxis; m_cachedSeparatingAxis = seperatingAxis;
} }
const btVector3& getCachedSeparatingAxis() const
{
return m_cachedSeparatingAxis;
}
btScalar getCachedSeparatingDistance() const
{
return m_cachedSeparatingDistance;
}
void setPenetrationDepthSolver(SpuConvexPenetrationDepthSolver* penetrationDepthSolver) void setPenetrationDepthSolver(SpuConvexPenetrationDepthSolver* penetrationDepthSolver)
{ {
m_penetrationDepthSolver = penetrationDepthSolver; m_penetrationDepthSolver = penetrationDepthSolver;

View File

@@ -338,7 +338,7 @@ bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver&
} else { } else {
// could not seperate shapes // could not seperate shapes
btAssert (false); //btAssert (false);
} }
return res.m_hasResult; return res.m_hasResult;
} }