diff --git a/src/BulletMultiThreaded/SpuDoubleBuffer.h b/src/BulletMultiThreaded/SpuDoubleBuffer.h index 52d518d1a..875748c55 100644 --- a/src/BulletMultiThreaded/SpuDoubleBuffer.h +++ b/src/BulletMultiThreaded/SpuDoubleBuffer.h @@ -61,7 +61,7 @@ DoubleBuffer::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsig { m_dmaPending = true; m_dmaTag = tag; - cellDmaLargeGet(m_backBuffer, ea, numBytes, tag, 0, 0); + m_backBuffer = (T*)cellDmaLargeGetReadOnly(m_backBuffer, ea, numBytes, tag, 0, 0); } template diff --git a/src/BulletMultiThreaded/SpuFakeDma.cpp b/src/BulletMultiThreaded/SpuFakeDma.cpp index 8d20b9c3e..dc1e3c411 100644 --- a/src/BulletMultiThreaded/SpuFakeDma.cpp +++ b/src/BulletMultiThreaded/SpuFakeDma.cpp @@ -11,199 +11,199 @@ subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. -*/ - -#include "SpuFakeDma.h" -#include //for btAssert -//Disabling memcpy sometimes helps debugging DMA - -#define USE_MEMCPY 1 -#ifdef USE_MEMCPY - -#endif - - -void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) -{ - -#if defined (__CELLOS_LV2__) || defined (USE_LIBSPE2) - cellDmaLargeGet(ls,ea,size,tag,tid,rid); - return ls; -#else - return (void*)(uint32_t)ea; -#endif -} - -void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) -{ -#if defined (__SPU__) || defined (USE_LIBSPE2) - mfc_get(ls,ea,size,tag,0,0); - return ls; -#else - return (void*)(uint32_t)ea; -#endif -} - - - - -void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) -{ -#if defined (__SPU__) || defined (USE_LIBSPE2) - cellDmaGet(ls,ea,size,tag,tid,rid); - return ls; -#else - return (void*)(uint32_t)ea; -#endif -} - - -///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) -int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) -{ - - btAssert(size<32); - - ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); - - char* mainMem = (char*)ea; - char* localStore = (char*)ls; - uint32_t i; - - - ///make sure last 4 bits are the same, for cellDmaSmallGet - uint32_t last4BitsOffset = ea & 0x0f; - char* tmpTarget = tmpBuffer + last4BitsOffset; - -#if defined (__SPU__) || defined (USE_LIBSPE2) - - int remainingSize = size; - -//#define FORCE_cellDmaUnalignedGet 1 -#ifdef FORCE_cellDmaUnalignedGet - cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0); -#else - char* remainingTmpTarget = tmpTarget; - uint64_t remainingEa = ea; - - while (remainingSize) - { - switch (remainingSize) - { - case 1: - case 2: - case 4: - case 8: - case 16: - { - mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0); - remainingSize=0; - break; - } - default: - { - //spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize); - int actualSize = 0; - - if (remainingSize > 16) - actualSize = 16; - else - if (remainingSize >8) - actualSize=8; - else - if (remainingSize >4) - actualSize=4; - else - if (remainingSize >2) - actualSize=2; - mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0); - remainingSize-=actualSize; - remainingTmpTarget+=actualSize; - remainingEa += actualSize; - } - } - } -#endif//FORCE_cellDmaUnalignedGet - -#else - //copy into final destination -#ifdef USE_MEMCPY - memcpy(tmpTarget,mainMem,size); -#else - for ( i=0;i //for btAssert +//Disabling memcpy sometimes helps debugging DMA + +#define USE_MEMCPY 1 +#ifdef USE_MEMCPY + +#endif + + +void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ + +#if defined (__SPU__) || defined (USE_LIBSPE2) + cellDmaLargeGet(ls,ea,size,tag,tid,rid); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + +void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ +#if defined (__SPU__) || defined (USE_LIBSPE2) + mfc_get(ls,ea,size,tag,0,0); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + + + + +void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid) +{ +#if defined (__SPU__) || defined (USE_LIBSPE2) + cellDmaGet(ls,ea,size,tag,tid,rid); + return ls; +#else + return (void*)(uint32_t)ea; +#endif +} + + +///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) +int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) +{ + + btAssert(size<32); + + ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); + + char* mainMem = (char*)ea; + char* localStore = (char*)ls; + uint32_t i; + + + ///make sure last 4 bits are the same, for cellDmaSmallGet + uint32_t last4BitsOffset = ea & 0x0f; + char* tmpTarget = tmpBuffer + last4BitsOffset; + +#if defined (__SPU__) || defined (USE_LIBSPE2) + + int remainingSize = size; + +//#define FORCE_cellDmaUnalignedGet 1 +#ifdef FORCE_cellDmaUnalignedGet + cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0); +#else + char* remainingTmpTarget = tmpTarget; + uint64_t remainingEa = ea; + + while (remainingSize) + { + switch (remainingSize) + { + case 1: + case 2: + case 4: + case 8: + case 16: + { + mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0); + remainingSize=0; + break; + } + default: + { + //spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize); + int actualSize = 0; + + if (remainingSize > 16) + actualSize = 16; + else + if (remainingSize >8) + actualSize=8; + else + if (remainingSize >4) + actualSize=4; + else + if (remainingSize >2) + actualSize=2; + mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0); + remainingSize-=actualSize; + remainingTmpTarget+=actualSize; + remainingEa += actualSize; + } + } + } +#endif//FORCE_cellDmaUnalignedGet + +#else + //copy into final destination +#ifdef USE_MEMCPY + memcpy(tmpTarget,mainMem,size); +#else + for ( i=0;i g_workUnitTaskBuffers; - ATTRIBUTE_ALIGNED16(btBroadphasePair gBroadphasePairs[SPU_BATCHSIZE_BROADPHASE_PAIRS]); - - - //SpuContactManifoldCollisionAlgorithm gSpuContactManifoldAlgo; - //ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo[sizeof(SpuContactManifoldCollisionAlgorithm)+128]); - - ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgo [sizeof(SpuContactManifoldCollisionAlgorithm)+16]); - - SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() - { - return (SpuContactManifoldCollisionAlgorithm*)&gSpuContactManifoldAlgo; - - } - btPersistentManifold gPersistentManifold; - - CollisionShape_LocalStoreMemory gCollisionShapes[2]; - + ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgoBuffer [sizeof(SpuContactManifoldCollisionAlgorithm)+16]); + ATTRIBUTE_ALIGNED16(char gColObj0Buffer [sizeof(btCollisionObject)+16]); + ATTRIBUTE_ALIGNED16(char gColObj1Buffer [sizeof(btCollisionObject)+16]); ///we reserve 32bit integer indices, even though they might be 16bit ATTRIBUTE_ALIGNED16(int spuIndices[16]); - + btPersistentManifold gPersistentManifoldBuffer; + CollisionShape_LocalStoreMemory gCollisionShapes[2]; bvhMeshShape_LocalStoreMemory bvhShapeData; SpuConvexPolyhedronVertexData convexVertexData[2]; CompoundShape_LocalStoreMemory compoundShapeData[2]; + + ///The following pointers might either point into this local store memory, or to the original/other memory locations. + ///See SpuFakeDma for implementation of cellDmaSmallGetReadOnly. + btCollisionObject* m_lsColObj0Ptr; + btCollisionObject* m_lsColObj1Ptr; + btBroadphasePair* m_pairsPointer; + btPersistentManifold* m_lsManifoldPtr; + SpuContactManifoldCollisionAlgorithm* m_lsCollisionAlgorithmPtr; + + bool needsDmaPutContactManifoldAlgo; + + btCollisionObject* getColObj0() + { + return m_lsColObj0Ptr; + } + btCollisionObject* getColObj1() + { + return m_lsColObj1Ptr; + } + + + btBroadphasePair* getBroadphasePairPtr() + { + return m_pairsPointer; + } + + SpuContactManifoldCollisionAlgorithm* getlocalCollisionAlgorithm() + { + return m_lsCollisionAlgorithmPtr; + } + + btPersistentManifold* getContactManifoldPtr() + { + return m_lsManifoldPtr; + } }; @@ -560,12 +571,12 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1]; cpInput.m_transformA = wuInput->m_worldTransform0; cpInput.m_transformB = wuInput->m_worldTransform1; - float sumMargin = (marginA+marginB+lsMemPtr->gPersistentManifold.getContactBreakingThreshold()); + float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold()); cpInput.m_maximumDistanceSquared = sumMargin * sumMargin; ppu_address_t manifoldAddress = (ppu_address_t)manifold; - btPersistentManifold* spuManifold=&lsMemPtr->gPersistentManifold; + btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr(); //spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped); spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(), lsMemPtr->getColObj1()->getWorldTransform(), @@ -605,11 +616,11 @@ SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collis dmaSize = sizeof(btCollisionObject);//btTransform); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0(); - cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + lsMem.m_lsColObj0Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj0Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); dmaSize = sizeof(btCollisionObject);//btTransform); dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1(); - cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + lsMem.m_lsColObj1Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj1Buffer, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); @@ -899,7 +910,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) { dmaSize = numPairs*sizeof(btBroadphasePair); dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair); - cellDmaGet(&lsMem.gBroadphasePairs, dmaPpuAddress , dmaSize, DMA_TAG(1), 0, 0); + lsMem.m_pairsPointer = (btBroadphasePair*)cellDmaGetReadOnly(&lsMem.gBroadphasePairsBuffer, dmaPpuAddress , dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -908,7 +919,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) //for each broadphase pair, do something - btBroadphasePair& pair = lsMem.gBroadphasePairs[p]; + btBroadphasePair& pair = lsMem.getBroadphasePairPtr()[p]; #ifdef DEBUG_SPU_COLLISION_DETECTION spu_printf("pair->m_userInfo = %d\n",pair.m_userInfo); spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm); @@ -920,7 +931,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) { dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm); dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm; - cellDmaGet(&lsMem.gSpuContactManifoldAlgo, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + lsMem.m_lsCollisionAlgorithmPtr = (SpuContactManifoldCollisionAlgorithm*)cellDmaGetReadOnly(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -944,7 +955,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) dmaSize = sizeof(btPersistentManifold); dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr; - cellDmaGet(&lsMem.gPersistentManifold, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + lsMem.m_lsManifoldPtr = (btPersistentManifold*)cellDmaGetReadOnly(&lsMem.gPersistentManifoldBuffer, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0(); collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1(); @@ -977,7 +988,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) if (boxbox) { //spu_printf("boxbox dist = %f\n",distance); - btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; + btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr(); btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; ppu_address_t manifoldAddress = (ppu_address_t)manifold; @@ -1107,7 +1118,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) } else { //spu_printf("boxbox dist = %f\n",distance); - btPersistentManifold* spuManifold=&lsMem.gPersistentManifold; + btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr(); btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr; ppu_address_t manifoldAddress = (ppu_address_t)manifold;