added a cellDmaLargeGetReadOnly/cellDmaGetReadOnly, which avoid memcpy on non-SPU platforms.

enabled it for triangle mesh aabb DMA transfers. Todo: check if it is worth also modifying other parts of the code to avoid the memcpy into the 256kb Task memory.
2007-10-16 23:58:26 +00:00
parent 239d88985f
commit 830c096e10
3 changed files with 32 additions and 5 deletions
--- a/Extras/BulletMultiThreaded/SpuFakeDma.cpp
+++ b/Extras/BulletMultiThreaded/SpuFakeDma.cpp
@@ -5,6 +5,27 @@

 #define USE_MEMCPY 1

+void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (WIN32) || defined (__PPU__)
+	return (void*)(uint32_t)ea;
+#else
+	cellDmaLargeGet(ls,ea,size,tag,tid,rid);
+	return ls;
+#endif //WIN32
+}
+
+
+void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (WIN32) || defined (__PPU__)
+	return (void*)(uint32_t)ea;
+#else
+	cellDmaGet(ls,ea,size,tag,tid,rid);
+	return ls;
+#endif //WIN32
+}
+

 ///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
 int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
--- a/Extras/BulletMultiThreaded/SpuFakeDma.h
+++ b/Extras/BulletMultiThreaded/SpuFakeDma.h
@@ -4,7 +4,7 @@


 #include "PlatformDefinitions.h"
-
+#include "LinearMath/btScalar.h"


 #ifdef __CELLOS_LV2__
@@ -118,4 +118,9 @@
 ///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
 int	stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);

+
+void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+
+
 #endif //FAKE_DMA_H
--- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
+++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
@@ -149,7 +149,8 @@ struct	CollisionTask_LocalStoreMemory
 		return (btOptimizedBvh*) gOptimizedBvh;
 	}

-	ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray	gTriangleMeshInterface);
+	ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray	gTriangleMeshInterfaceStorage);
+	btTriangleIndexVertexArray*	gTriangleMeshInterfacePtr;
 	///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment 
 	ATTRIBUTE_ALIGNED16(btIndexedMesh	gIndexMesh);

@@ -388,7 +389,7 @@ public:
 		//		spu_printf("SPU index2=%d ,",spuIndices[2]);
 		//		spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);

-		const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterface.getScaling();
+		const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterfacePtr->getScaling();
 		for (int j=2;btLikely( j>=0 );j--)
 		{
 			int graphicsindex = m_lsMemPtr->spuIndices[j];
@@ -482,7 +483,7 @@ void	ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
 	dmaPpuAddress2 = reinterpret_cast<uint32_t>(trimeshShape->getMeshInterface());
 #endif
 	//	spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
-	cellDmaGet(&lsMemPtr->gTriangleMeshInterface, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+	lsMemPtr->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&lsMemPtr->gTriangleMeshInterfaceStorage, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
 	//cellDmaWaitTagStatusAll(DMA_MASK(1));
 	

@@ -611,7 +612,7 @@ void	ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
 	BvhSubtreeInfoArray& subTrees = lsMemPtr->getOptimizedBvh()->getSubtreeInfoArray();

 	spuNodeCallback	nodeCallback(wuInput,lsMemPtr,spuContacts);
-	IndexedMeshArray&	indexArray = lsMemPtr->gTriangleMeshInterface.getIndexedMeshArray();
+	IndexedMeshArray&	indexArray = lsMemPtr->gTriangleMeshInterfacePtr->getIndexedMeshArray();
 	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());