From 0ff4444118eadf8a16f9b4619074602a2fac6d92 Mon Sep 17 00:00:00 2001 From: ejcoumans Date: Wed, 26 Sep 2007 23:35:47 +0000 Subject: [PATCH] Added libspe2 parallel Cell SPE support contribution by IBM Germany 'Extreme Blue' project: Thanks to Benjamin Hoeferlin, Minh Cuong Tran,Martina Huellmann,Frederick Roth. --- Extras/BulletMultiThreaded/SpuDoubleBuffer.h | 213 ++++++++++--------- Extras/BulletMultiThreaded/SpuFakeDma.cpp | 212 +++++++++--------- Extras/BulletMultiThreaded/SpuFakeDma.h | 157 ++++++++++---- 3 files changed, 334 insertions(+), 248 deletions(-) diff --git a/Extras/BulletMultiThreaded/SpuDoubleBuffer.h b/Extras/BulletMultiThreaded/SpuDoubleBuffer.h index c533a68a3..55740db4b 100644 --- a/Extras/BulletMultiThreaded/SpuDoubleBuffer.h +++ b/Extras/BulletMultiThreaded/SpuDoubleBuffer.h @@ -1,106 +1,107 @@ -#ifndef DOUBLE_BUFFER_H -#define DOUBLE_BUFFER_H - -#include "SpuFakeDma.h" - - -///DoubleBuffer -template -class DoubleBuffer -{ -#ifdef __CELLOS_LV2__ - T m_buffer0[size] __attribute__ ((aligned (128))); - T m_buffer1[size] __attribute__ ((aligned (128))); -#else - T m_buffer0[size]; - T m_buffer1[size]; -#endif - - T *m_frontBuffer; - T *m_backBuffer; - - unsigned int m_dmaTag; - bool m_dmaPending; -public: - bool isPending() const { return m_dmaPending;} - DoubleBuffer(); - - void init (); - - // dma get and put commands - void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag); - void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag); - - // gets pointer to a buffer - T *getFront(); - T *getBack(); - - // if back buffer dma was started, wait for it to complete - // then move back to front and vice versa - T *swapBuffers(); -}; - -template -DoubleBuffer::DoubleBuffer() -{ - init (); -} - -template -void DoubleBuffer::init() -{ - this->m_dmaPending = false; - this->m_frontBuffer = &this->m_buffer0[0]; - this->m_backBuffer = &this->m_buffer1[0]; -} - -template -void -DoubleBuffer::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag) -{ - m_dmaPending = true; - m_dmaTag = tag; - cellDmaLargeGet(m_backBuffer, ea, numBytes, tag, 0, 0); -} - -template -void -DoubleBuffer::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag) -{ - m_dmaPending = true; - m_dmaTag = tag; - cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0); -} - -template -T * -DoubleBuffer::getFront() -{ - return m_frontBuffer; -} - -template -T * -DoubleBuffer::getBack() -{ - return m_backBuffer; -} - -template -T * -DoubleBuffer::swapBuffers() -{ - if (m_dmaPending) - { - cellDmaWaitTagStatusAll(1< + + +///DoubleBuffer +template +class DoubleBuffer +{ +#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) + ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ; + ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ; +#else + T m_buffer0[size]; + T m_buffer1[size]; +#endif + + T *m_frontBuffer; + T *m_backBuffer; + + unsigned int m_dmaTag; + bool m_dmaPending; +public: + bool isPending() const { return m_dmaPending;} + DoubleBuffer(); + + void init (); + + // dma get and put commands + void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag); + void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag); + + // gets pointer to a buffer + T *getFront(); + T *getBack(); + + // if back buffer dma was started, wait for it to complete + // then move back to front and vice versa + T *swapBuffers(); +}; + +template +DoubleBuffer::DoubleBuffer() +{ + init (); +} + +template +void DoubleBuffer::init() +{ + this->m_dmaPending = false; + this->m_frontBuffer = &this->m_buffer0[0]; + this->m_backBuffer = &this->m_buffer1[0]; +} + +template +void +DoubleBuffer::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag) +{ + m_dmaPending = true; + m_dmaTag = tag; + cellDmaLargeGet(m_backBuffer, ea, numBytes, tag, 0, 0); +} + +template +void +DoubleBuffer::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag) +{ + m_dmaPending = true; + m_dmaTag = tag; + cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0); +} + +template +T * +DoubleBuffer::getFront() +{ + return m_frontBuffer; +} + +template +T * +DoubleBuffer::getBack() +{ + return m_backBuffer; +} + +template +T * +DoubleBuffer::swapBuffers() +{ + if (m_dmaPending) + { + cellDmaWaitTagStatusAll(1< //for btAssert -//Disabling memcpy sometimes helps debugging DMA - -#define USE_MEMCPY 1 - - -///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) -int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) -{ - btAssert(size<16); - ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); - - char* mainMem = (char*)ea; - char* localStore = (char*)ls; - uint32_t i; - - - ///make sure last 4 bits are the same, for cellDmaSmallGet - uint32_t last4BitsOffset = ea & 0x0f; - char* tmpTarget = tmpBuffer + last4BitsOffset; -#ifdef WIN32 - -#ifdef USE_MEMCPY - memcpy(tmpTarget,mainMem,size); -#else - for ( i=0;i //for btAssert +//Disabling memcpy sometimes helps debugging DMA + +#define USE_MEMCPY 1 + + +///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes) +int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size) +{ + btAssert(size<16); + ATTRIBUTE_ALIGNED16(char tmpBuffer[32]); + + char* mainMem = (char*)ea; + char* localStore = (char*)ls; + uint32_t i; + + + ///make sure last 4 bits are the same, for cellDmaSmallGet + uint32_t last4BitsOffset = ea & 0x0f; + char* tmpTarget = tmpBuffer + last4BitsOffset; +#ifdef WIN32 + +#ifdef USE_MEMCPY + memcpy(tmpTarget,mainMem,size); +#else + for ( i=0;i -#include - -#define DMA_TAG(xfer) (xfer + 1) -#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) - -#elif defined (WIN32) - -#define DMA_TAG(a) (a) -#define DMA_MASK(a) (a) - - -/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) -int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); -int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); -/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) -int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); -/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) -void cellDmaWaitTagStatusAll(int ignore); -#endif //WIN32 - -///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1) -int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size); - -#endif //FAKE_DMA_H + +#ifndef FAKE_DMA_H +#define FAKE_DMA_H + + +#include "PlatformDefinitions.h" + + + +#ifdef __CELLOS_LV2__ + +#include +#include + +#define DMA_TAG(xfer) (xfer + 1) +#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) + + +#else +#ifdef WIN32 + +#define DMA_TAG(a) (a) +#define DMA_MASK(a) (a) + + + /// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + /// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid); + /// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy) + void cellDmaWaitTagStatusAll(int ignore); + + +#elif defined(USE_LIBSPE2) + +#define DMA_TAG(xfer) (xfer + 1) +#define DMA_MASK(xfer) (1 << DMA_TAG(xfer)) + +#include + +//#define DEBUG_DMA +#ifdef DEBUG_DMA +#define dUASSERT(a,b) if (!(a)) { printf(b);} +#ifdef USE_ADDR64 +#define uintsize unsigned long long +#else +#define uintsize unsigned int +#endif + +#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \ + } \ + mfc_put(ls, ea, size, tag, tid, rid) +#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \ + dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \ + dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \ + dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \ + dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \ + dUASSERT(size < 16384, "size too big: "); \ + dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \ + dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\ + printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\ + } \ + mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all() + +#else +#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid) +#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid) +#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all() +#endif // DEBUG_DMA + + + + + + + + + + +#endif // WIN32 + +#endif //__CELLOS_LV2__ + +///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1) +int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size); + +#endif //FAKE_DMA_H