Added libspe2 parallel Cell SPE support contribution by IBM Germany 'Extreme Blue' project:
Thanks to Benjamin Hoeferlin, Minh Cuong Tran,Martina Huellmann,Frederick Roth.
This commit is contained in:
@@ -2,15 +2,16 @@
|
|||||||
#define DOUBLE_BUFFER_H
|
#define DOUBLE_BUFFER_H
|
||||||
|
|
||||||
#include "SpuFakeDma.h"
|
#include "SpuFakeDma.h"
|
||||||
|
#include <LinearMath/btScalar.h>
|
||||||
|
|
||||||
|
|
||||||
///DoubleBuffer
|
///DoubleBuffer
|
||||||
template<class T, int size>
|
template<class T, int size>
|
||||||
class DoubleBuffer
|
class DoubleBuffer
|
||||||
{
|
{
|
||||||
#ifdef __CELLOS_LV2__
|
#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
|
||||||
T m_buffer0[size] __attribute__ ((aligned (128)));
|
ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
|
||||||
T m_buffer1[size] __attribute__ ((aligned (128)));
|
ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
|
||||||
#else
|
#else
|
||||||
T m_buffer0[size];
|
T m_buffer0[size];
|
||||||
T m_buffer1[size];
|
T m_buffer1[size];
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
|
|
||||||
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
|
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
|
||||||
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
|
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
|
||||||
{
|
{
|
||||||
btAssert(size<16);
|
btAssert(size<16);
|
||||||
ATTRIBUTE_ALIGNED16(char tmpBuffer[32]);
|
ATTRIBUTE_ALIGNED16(char tmpBuffer[32]);
|
||||||
@@ -23,22 +23,22 @@ int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
|
|||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
#ifdef USE_MEMCPY
|
#ifdef USE_MEMCPY
|
||||||
memcpy(tmpTarget,mainMem,size);
|
memcpy(tmpTarget,mainMem,size);
|
||||||
#else
|
#else
|
||||||
for ( i=0;i<size;i++)
|
for ( i=0;i<size;i++)
|
||||||
{
|
{
|
||||||
tmpTarget[i] = mainMem[i];
|
tmpTarget[i] = mainMem[i];
|
||||||
}
|
}
|
||||||
#endif
|
#endif //USE_MEMCPY
|
||||||
#else
|
#else
|
||||||
cellDmaSmallGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
|
cellDmaSmallGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
|
||||||
//copy into final destination
|
//copy into final destination
|
||||||
#endif
|
#endif //WIN32
|
||||||
|
|
||||||
cellDmaWaitTagStatusAll(DMA_MASK(1));
|
cellDmaWaitTagStatusAll(DMA_MASK(1));
|
||||||
|
|
||||||
//this is slowish, perhaps memcpy on SPU is smarter?
|
//this is slowish, perhaps memcpy on SPU is smarter?
|
||||||
for (i=0;i<size;i++)
|
for (i=0; btLikely( i<size );i++)
|
||||||
{
|
{
|
||||||
localStore[i] = tmpTarget[i];
|
localStore[i] = tmpTarget[i];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,20 +15,105 @@
|
|||||||
#define DMA_TAG(xfer) (xfer + 1)
|
#define DMA_TAG(xfer) (xfer + 1)
|
||||||
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
|
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
|
||||||
|
|
||||||
#elif defined (WIN32)
|
|
||||||
|
#else
|
||||||
|
#ifdef WIN32
|
||||||
|
|
||||||
#define DMA_TAG(a) (a)
|
#define DMA_TAG(a) (a)
|
||||||
#define DMA_MASK(a) (a)
|
#define DMA_MASK(a) (a)
|
||||||
|
|
||||||
|
|
||||||
/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
||||||
int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
||||||
int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
||||||
/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
||||||
int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
|
||||||
/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
|
||||||
void cellDmaWaitTagStatusAll(int ignore);
|
void cellDmaWaitTagStatusAll(int ignore);
|
||||||
#endif //WIN32
|
|
||||||
|
|
||||||
|
#elif defined(USE_LIBSPE2)
|
||||||
|
|
||||||
|
#define DMA_TAG(xfer) (xfer + 1)
|
||||||
|
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
|
||||||
|
|
||||||
|
#include <spu_mfcio.h>
|
||||||
|
|
||||||
|
//#define DEBUG_DMA
|
||||||
|
#ifdef DEBUG_DMA
|
||||||
|
#define dUASSERT(a,b) if (!(a)) { printf(b);}
|
||||||
|
#ifdef USE_ADDR64
|
||||||
|
#define uintsize unsigned long long
|
||||||
|
#else
|
||||||
|
#define uintsize unsigned int
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
|
||||||
|
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
|
||||||
|
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
|
||||||
|
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
|
||||||
|
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
|
||||||
|
dUASSERT(size < 16384, "size too big: "); \
|
||||||
|
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
|
||||||
|
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
|
||||||
|
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
|
||||||
|
} \
|
||||||
|
mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
|
||||||
|
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
|
||||||
|
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
|
||||||
|
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
|
||||||
|
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
|
||||||
|
dUASSERT(size < 16384, "size too big: "); \
|
||||||
|
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
|
||||||
|
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
|
||||||
|
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
|
||||||
|
} \
|
||||||
|
mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
|
||||||
|
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
|
||||||
|
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
|
||||||
|
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
|
||||||
|
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
|
||||||
|
dUASSERT(size < 16384, "size too big: "); \
|
||||||
|
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
|
||||||
|
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
|
||||||
|
printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
|
||||||
|
} \
|
||||||
|
mfc_put(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
|
||||||
|
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
|
||||||
|
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
|
||||||
|
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
|
||||||
|
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
|
||||||
|
dUASSERT(size < 16384, "size too big: "); \
|
||||||
|
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
|
||||||
|
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
|
||||||
|
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
|
||||||
|
} \
|
||||||
|
mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
|
||||||
|
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
|
||||||
|
#endif // DEBUG_DMA
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // WIN32
|
||||||
|
|
||||||
|
#endif //__CELLOS_LV2__
|
||||||
|
|
||||||
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
|
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
|
||||||
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
|
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
|
||||||
|
|||||||
Reference in New Issue
Block a user