added a cellDmaLargeGetReadOnly/cellDmaGetReadOnly, which avoid memcpy on non-SPU platforms.
enabled it for triangle mesh aabb DMA transfers. Todo: check if it is worth also modifying other parts of the code to avoid the memcpy into the 256kb Task memory.
This commit is contained in:
@@ -5,6 +5,27 @@
|
|||||||
|
|
||||||
#define USE_MEMCPY 1
|
#define USE_MEMCPY 1
|
||||||
|
|
||||||
|
void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
|
||||||
|
{
|
||||||
|
#if defined (WIN32) || defined (__PPU__)
|
||||||
|
return (void*)(uint32_t)ea;
|
||||||
|
#else
|
||||||
|
cellDmaLargeGet(ls,ea,size,tag,tid,rid);
|
||||||
|
return ls;
|
||||||
|
#endif //WIN32
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
|
||||||
|
{
|
||||||
|
#if defined (WIN32) || defined (__PPU__)
|
||||||
|
return (void*)(uint32_t)ea;
|
||||||
|
#else
|
||||||
|
cellDmaGet(ls,ea,size,tag,tid,rid);
|
||||||
|
return ls;
|
||||||
|
#endif //WIN32
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
|
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
|
||||||
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
|
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "PlatformDefinitions.h"
|
#include "PlatformDefinitions.h"
|
||||||
|
#include "LinearMath/btScalar.h"
|
||||||
|
|
||||||
|
|
||||||
#ifdef __CELLOS_LV2__
|
#ifdef __CELLOS_LV2__
|
||||||
@@ -118,4 +118,9 @@
|
|||||||
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
|
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
|
||||||
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
|
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
|
||||||
|
|
||||||
|
|
||||||
|
void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
||||||
|
void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
||||||
|
|
||||||
|
|
||||||
#endif //FAKE_DMA_H
|
#endif //FAKE_DMA_H
|
||||||
|
|||||||
@@ -149,7 +149,8 @@ struct CollisionTask_LocalStoreMemory
|
|||||||
return (btOptimizedBvh*) gOptimizedBvh;
|
return (btOptimizedBvh*) gOptimizedBvh;
|
||||||
}
|
}
|
||||||
|
|
||||||
ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterface);
|
ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterfaceStorage);
|
||||||
|
btTriangleIndexVertexArray* gTriangleMeshInterfacePtr;
|
||||||
///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment
|
///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment
|
||||||
ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh);
|
ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh);
|
||||||
|
|
||||||
@@ -388,7 +389,7 @@ public:
|
|||||||
// spu_printf("SPU index2=%d ,",spuIndices[2]);
|
// spu_printf("SPU index2=%d ,",spuIndices[2]);
|
||||||
// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
|
// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
|
||||||
|
|
||||||
const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterface.getScaling();
|
const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterfacePtr->getScaling();
|
||||||
for (int j=2;btLikely( j>=0 );j--)
|
for (int j=2;btLikely( j>=0 );j--)
|
||||||
{
|
{
|
||||||
int graphicsindex = m_lsMemPtr->spuIndices[j];
|
int graphicsindex = m_lsMemPtr->spuIndices[j];
|
||||||
@@ -482,7 +483,7 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
|
|||||||
dmaPpuAddress2 = reinterpret_cast<uint32_t>(trimeshShape->getMeshInterface());
|
dmaPpuAddress2 = reinterpret_cast<uint32_t>(trimeshShape->getMeshInterface());
|
||||||
#endif
|
#endif
|
||||||
// spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
|
// spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
|
||||||
cellDmaGet(&lsMemPtr->gTriangleMeshInterface, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
|
lsMemPtr->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&lsMemPtr->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
|
||||||
//cellDmaWaitTagStatusAll(DMA_MASK(1));
|
//cellDmaWaitTagStatusAll(DMA_MASK(1));
|
||||||
|
|
||||||
|
|
||||||
@@ -611,7 +612,7 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
|
|||||||
BvhSubtreeInfoArray& subTrees = lsMemPtr->getOptimizedBvh()->getSubtreeInfoArray();
|
BvhSubtreeInfoArray& subTrees = lsMemPtr->getOptimizedBvh()->getSubtreeInfoArray();
|
||||||
|
|
||||||
spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts);
|
spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts);
|
||||||
IndexedMeshArray& indexArray = lsMemPtr->gTriangleMeshInterface.getIndexedMeshArray();
|
IndexedMeshArray& indexArray = lsMemPtr->gTriangleMeshInterfacePtr->getIndexedMeshArray();
|
||||||
//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
|
//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user