Added libspe2 parallel Cell SPE support contribution by IBM Germany 'Extreme Blue' project:

Thanks to Benjamin Hoeferlin, Minh Cuong Tran,Martina Huellmann,Frederick Roth.
This commit is contained in:
ejcoumans
2007-09-26 23:35:47 +00:00
parent b2b2ea71c6
commit 0ff4444118
3 changed files with 334 additions and 248 deletions

View File

@@ -2,15 +2,16 @@
#define DOUBLE_BUFFER_H #define DOUBLE_BUFFER_H
#include "SpuFakeDma.h" #include "SpuFakeDma.h"
#include <LinearMath/btScalar.h>
///DoubleBuffer ///DoubleBuffer
template<class T, int size> template<class T, int size>
class DoubleBuffer class DoubleBuffer
{ {
#ifdef __CELLOS_LV2__ #if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
T m_buffer0[size] __attribute__ ((aligned (128))); ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
T m_buffer1[size] __attribute__ ((aligned (128))); ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
#else #else
T m_buffer0[size]; T m_buffer0[size];
T m_buffer1[size]; T m_buffer1[size];

View File

@@ -7,7 +7,7 @@
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) ///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
{ {
btAssert(size<16); btAssert(size<16);
ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); ATTRIBUTE_ALIGNED16(char tmpBuffer[32]);
@@ -23,22 +23,22 @@ int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
#ifdef WIN32 #ifdef WIN32
#ifdef USE_MEMCPY #ifdef USE_MEMCPY
memcpy(tmpTarget,mainMem,size); memcpy(tmpTarget,mainMem,size);
#else #else
for ( i=0;i<size;i++) for ( i=0;i<size;i++)
{ {
tmpTarget[i] = mainMem[i]; tmpTarget[i] = mainMem[i];
} }
#endif #endif //USE_MEMCPY
#else #else
cellDmaSmallGet(tmpTarget,ea,size,DMA_TAG(1),0,0); cellDmaSmallGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
//copy into final destination //copy into final destination
#endif #endif //WIN32
cellDmaWaitTagStatusAll(DMA_MASK(1)); cellDmaWaitTagStatusAll(DMA_MASK(1));
//this is slowish, perhaps memcpy on SPU is smarter? //this is slowish, perhaps memcpy on SPU is smarter?
for (i=0;i<size;i++) for (i=0; btLikely( i<size );i++)
{ {
localStore[i] = tmpTarget[i]; localStore[i] = tmpTarget[i];
} }

View File

@@ -15,20 +15,105 @@
#define DMA_TAG(xfer) (xfer + 1) #define DMA_TAG(xfer) (xfer + 1)
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) #define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
#elif defined (WIN32)
#else
#ifdef WIN32
#define DMA_TAG(a) (a) #define DMA_TAG(a) (a)
#define DMA_MASK(a) (a) #define DMA_MASK(a) (a)
/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) /// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) /// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) /// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
void cellDmaWaitTagStatusAll(int ignore); void cellDmaWaitTagStatusAll(int ignore);
#endif //WIN32
#elif defined(USE_LIBSPE2)
#define DMA_TAG(xfer) (xfer + 1)
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
#include <spu_mfcio.h>
//#define DEBUG_DMA
#ifdef DEBUG_DMA
#define dUASSERT(a,b) if (!(a)) { printf(b);}
#ifdef USE_ADDR64
#define uintsize unsigned long long
#else
#define uintsize unsigned int
#endif
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
} \
mfc_put(ls, ea, size, tag, tid, rid)
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
#else
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
#endif // DEBUG_DMA
#endif // WIN32
#endif //__CELLOS_LV2__
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1) ///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size); int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);