moved Extras/Gimpact to src/BulletCollision/Gimpact

moved Extras/BulletMultiThreaded to src/BulletMultiThreaded
(build systems will be updated soon)
This commit is contained in:
erwin.coumans
2008-10-10 19:48:36 +00:00
parent 6f6f88fa08
commit 512c0f167e
111 changed files with 10811 additions and 11799 deletions

View File

@@ -0,0 +1,66 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/Extras/BulletMultiThreaded/
${BULLET_PHYSICS_SOURCE_DIR}/src
)
ADD_LIBRARY(LibBulletMultiThreaded
PlatformDefinitions.h
SpuFakeDma.cpp
SpuFakeDma.h
SpuSync.h
SpuDoubleBuffer.h
SpuLibspe2Support.cpp
SpuLibspe2Support.h
btThreadSupportInterface.cpp
btThreadSupportInterface.h
Win32ThreadSupport.cpp
Win32ThreadSupport.h
PosixThreadSupport.cpp
PosixThreadSupport.h
SequentialThreadSupport.cpp
SequentialThreadSupport.h
SpuSampleTaskProcess.h
SpuSampleTaskProcess.cpp
SpuCollisionObjectWrapper.cpp
SpuCollisionObjectWrapper.h
SpuCollisionTaskProcess.h
SpuCollisionTaskProcess.cpp
SpuGatheringCollisionDispatcher.h
SpuGatheringCollisionDispatcher.cpp
SpuContactManifoldCollisionAlgorithm.cpp
SpuContactManifoldCollisionAlgorithm.h
SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
SpuNarrowPhaseCollisionTask/SpuContactResult.h
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
SpuNarrowPhaseCollisionTask/SpuEpaPenetrationDepthSolver.cpp
SpuNarrowPhaseCollisionTask/SpuEpaPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuGjkEpa2.cpp
SpuNarrowPhaseCollisionTask/SpuGjkEpa2.h
SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
SpuNarrowPhaseCollisionTask/SpuVoronoiSimplexSolver.cpp
SpuNarrowPhaseCollisionTask/SpuVoronoiSimplexSolver.h
SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp
SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
SpuParallelSolver.cpp
SpuParallelSolver.h
SpuSolverTask/SpuParallellSolverTask.cpp
SpuSolverTask/SpuParallellSolverTask.h
SpuBatchRaycaster.cpp
SpuBatchRaycaster.h
SpuRaycastTaskProcess.cpp
SpuRaycastTaskProcess.h
SpuRaycastTask/SpuRaycastTask.cpp
SpuRaycastTask/SpuRaycastTask.h
SpuRaycastTask/SpuSubSimplexConvexCast.cpp
SpuRaycastTask/SpuSubSimplexConvexCast.h
)

View File

@@ -0,0 +1,12 @@
SubDir TOP Extras BulletMultiThreaded ;
#IncludeDir Extras/BulletMultiThreaded ;
Library bulletmultithreaded : [ Wildcard . : */.h *.cpp ] [ Wildcard SpuNarrowPhaseCollisionTask : *.h *.cpp ] [ Wildcard SpuSolverTask : *.h *.cpp ] : noinstall ;
CFlags bulletmultithreaded : [ FIncludes $(TOP)/Extras/BulletMultiThreaded ] ;
LibDepends bulletmultithreaded : ;
MsvcIncDirs bulletmultithreaded :
"../../Extras/BulletMultiThreaded" ;
InstallHeader [ Wildcard *.h ] : bulletmultithreaded ;

View File

@@ -0,0 +1,177 @@
__ARCH_BITS__ := 32
# define macros
NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask
SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask
IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
CELL_TOP ?= /opt/cell/sdk
CELL_SYSROOT := /opt/cell/sysroot
else
CELL_TOP ?= /opt/ibm/cell-sdk/prototype
CELL_SYSROOT := $(CELL_TOP)/sysroot
endif
USE_CCACHE=ccache
RM=rm -f
OUTDIR=./out
DEBUGFLAG=-DNDEBUG
LIBOUTDIR=../../lib/ibmsdk
COLLISIONDIR=../../src/BulletCollision
MATHDIR=../../src/LinearMath
ARCHITECTUREFLAG=-m$(__ARCH_BITS__)
ifeq "$(__ARCH_BITS__)" "64"
SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64
else
SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__
endif
SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc
SPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR)
#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h
SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h
SPU_LFLAGS= -Wl,-N
SPU_LIBRARIES=-lstdc++
SPU_EMBED=/usr/bin/ppu-embedspu
SPU_AR=/usr/bin/ar
SYMBOLNAME=spu_program
ifeq "$(__ARCH_BITS__)" "64"
PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64
PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc
else
PPU_DEFFLAGS= -DUSE_LIBSPE2
PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc
endif
PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h
PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR)
PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc
PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib
PPU_AR=/usr/bin/ar
MakeOut :
# rm -f -R $(OUTDIR) ; mkdir $(OUTDIR)
@echo "usage: make spu, make ppu, make all, or make clean"
# SPU
SpuTaskFile : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp
SpuFakeDma : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuContactManifoldCollisionAlgorithm_spu : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp
SpuCollisionShapes : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuContactResult : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
#SpuGatheringCollisionTask : MakeOut
# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuGjkPairDetector: MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuMinkowskiPenetrationDepthSolver : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
SpuVoronoiSimplexSolver : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
#SpuLibspe2Support_spu : MakeOut
# $(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp
## SPU-Bullet
btPersistentManifold : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp
btOptimizedBvh : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btCollisionObject : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp
btTriangleCallback : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btTriangleIndexVertexArray : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btStridingMeshInterface : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
btAlignedAllocator : MakeOut
$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp
# PPU
SpuGatheringCollisionDispatcher : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuLibspe2Support: MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
btThreadSupportInterface: MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuCollisionTaskProcess : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuContactManifoldCollisionAlgorithm : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
SpuSampleTaskProcess : MakeOut
$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
spu : SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \
SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \
btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \
btStridingMeshInterface btAlignedAllocator
$(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \
$(OUTDIR)/SpuTaskFile.o \
$(OUTDIR)/SpuFakeDma.o \
$(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \
$(OUTDIR)/SpuContactResult.o \
$(OUTDIR)/SpuCollisionShapes.o \
$(OUTDIR)/SpuGjkPairDetector.o \
$(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \
$(OUTDIR)/SpuVoronoiSimplexSolver.o \
$(OUTDIR)/btPersistentManifold.o \
$(OUTDIR)/btTriangleCallback.o \
$(OUTDIR)/btTriangleIndexVertexArray.o \
$(OUTDIR)/btStridingMeshInterface.o \
$(OUTDIR)/btAlignedAllocator.o \
$(SPU_LFLAGS) $(SPU_LIBRARIES)
spu-embed : spu
$(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o
$(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o
ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \
SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess
$(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \
$(OUTDIR)/SpuCollisionTaskProcess.o \
$(OUTDIR)/SpuSampleTaskProcess.o \
$(OUTDIR)/SpuGatheringCollisionDispatcher.o \
$(OUTDIR)/SpuLibspe2Support.o \
$(OUTDIR)/btThreadSupportInterface.o \
$(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o
all : spu-embed ppu
clean:
$(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a

View File

@@ -0,0 +1,82 @@
#ifndef TYPE_DEFINITIONS_H
#define TYPE_DEFINITIONS_H
///This file provides some platform/compiler checks for common definitions
#ifdef WIN32
typedef union
{
unsigned int u;
void *p;
} addr64;
#define USE_WIN32_THREADING 1
#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
#else
#endif //__MINGW32__
typedef unsigned char uint8_t;
#ifndef __PHYSICS_COMMON_H__
typedef unsigned long int uint64_t;
typedef unsigned int uint32_t;
#endif //__PHYSICS_COMMON_H__
typedef unsigned short uint16_t;
#include <malloc.h>
#define memalign(alignment, size) malloc(size);
#include <string.h> //memcpy
#include <stdio.h>
#define spu_printf printf
#else
#include <stdint.h>
#include <stdlib.h>
#include <string.h> //for memcpy
#if defined (__CELLOS_LV2__)
// Playstation 3 Cell SDK
#include <spu_printf.h>
#else
// posix system
#define USE_PTHREADS (1)
#ifdef USE_LIBSPE2
#include <stdio.h>
#define spu_printf printf
#define DWORD unsigned int
typedef union
{
unsigned long long ull;
unsigned int ui[2];
void *p;
} addr64;
#else
#include <stdio.h>
#define spu_printf printf
#endif // USE_LIBSPE2
#endif //__CELLOS_LV2__
#endif
/* Included here because we need uint*_t typedefs */
#include "PpuAddressSpace.h"
#endif //TYPE_DEFINITIONS_H

View File

@@ -0,0 +1,211 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h>
#include "PosixThreadSupport.h"
#ifdef USE_PTHREADS
#include "SpuCollisionTaskProcess.h"
#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
#define checkPThreadFunction(returnValue) \
if(0 != returnValue) { \
printf("PThread problem at line %i in file %s: %i\n", __LINE__, __FILE__, returnValue); \
}
// The number of threads should be equal to the number of available cores
// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
// Setup and initialize SPU/CELL/Libspe2
PosixThreadSupport::PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo)
{
startThreads(threadConstructionInfo);
}
// cleanup/shutdown Libspe2
PosixThreadSupport::~PosixThreadSupport()
{
stopSPU();
}
// this semaphore will signal, if and how many threads are finished with their work
static sem_t mainSemaphore;
static void *threadFunction(void *argument)
{
PosixThreadSupport::btSpuStatus* status = (PosixThreadSupport::btSpuStatus*)argument;
while (1)
{
checkPThreadFunction(sem_wait(&status->startSemaphore));
void* userPtr = status->m_userPtr;
if (userPtr)
{
btAssert(status->m_status);
status->m_userThreadFunc(userPtr,status->m_lsMemory);
status->m_status = 2;
checkPThreadFunction(sem_post(&mainSemaphore));
status->threadUsed++;
} else {
//exit Thread
status->m_status = 3;
checkPThreadFunction(sem_post(&mainSemaphore));
printf("Thread with taskId %i exiting\n",status->m_taskId);
break;
}
}
printf("Thread TERMINATED\n");
return 0;
}
///send messages to SPUs
void PosixThreadSupport::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t taskId)
{
/// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc);
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
switch (uiCommand)
{
case CMD_GATHER_AND_PROCESS_PAIRLIST:
{
btSpuStatus& spuStatus = m_activeSpuStatus[taskId];
btAssert(taskId >= 0);
btAssert(taskId < m_activeSpuStatus.size());
spuStatus.m_commandId = uiCommand;
spuStatus.m_status = 1;
spuStatus.m_userPtr = (void*)uiArgument0;
// fire event to start new task
checkPThreadFunction(sem_post(&spuStatus.startSemaphore));
break;
}
default:
{
///not implemented
btAssert(0);
}
};
}
///check for messages from SPUs
void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert(m_activeSpuStatus.size());
// wait for any of the threads to finish
checkPThreadFunction(sem_wait(&mainSemaphore));
// get at least one thread which has finished
size_t last = -1;
for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
if(2 == m_activeSpuStatus[t].m_status) {
last = t;
break;
}
}
btSpuStatus& spuStatus = m_activeSpuStatus[last];
btAssert(spuStatus.m_status > 1);
spuStatus.m_status = 0;
// need to find an active spu
btAssert(last >= 0);
*puiArgument0 = spuStatus.m_taskId;
*puiArgument1 = spuStatus.m_status;
}
void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstructionInfo)
{
printf("%s creating %i threads.\n", __FUNCTION__, threadConstructionInfo.m_numThreads);
m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
checkPThreadFunction(sem_init(&mainSemaphore, 0, 0));
for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
{
printf("starting thread %d\n",i);
btSpuStatus& spuStatus = m_activeSpuStatus[i];
checkPThreadFunction(sem_init(&spuStatus.startSemaphore, 0, 0));
checkPThreadFunction(pthread_create(&spuStatus.thread, NULL, &threadFunction, (void*)&spuStatus));
spuStatus.m_userPtr=0;
spuStatus.m_taskId = i;
spuStatus.m_commandId = 0;
spuStatus.m_status = 0;
spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
spuStatus.threadUsed = 0;
printf("started thread %d \n",i);
}
}
void PosixThreadSupport::startSPU()
{
}
///tell the task scheduler we are done with the SPU tasks
void PosixThreadSupport::stopSPU()
{
for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
btSpuStatus& spuStatus = m_activeSpuStatus[t];
printf("%s: Thread %i used: %ld\n", __FUNCTION__, t, spuStatus.threadUsed);
checkPThreadFunction(sem_destroy(&spuStatus.startSemaphore));
checkPThreadFunction(pthread_cancel(spuStatus.thread));
}
checkPThreadFunction(sem_destroy(&mainSemaphore));
m_activeSpuStatus.clear();
}
#endif // USE_PTHREADS

View File

@@ -0,0 +1,118 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btScalar.h"
#include "PlatformDefinitions.h"
#ifdef USE_PTHREADS //platform specific defines are defined in PlatformDefinitions.h
#include <pthread.h>
#include <semaphore.h>
#ifndef POSIX_THREAD_SUPPORT_H
#define POSIX_THREAD_SUPPORT_H
#include "LinearMath/btAlignedObjectArray.h"
#include "btThreadSupportInterface.h"
typedef void (*PosixThreadFunc)(void* userPtr,void* lsMemory);
typedef void* (*PosixlsMemorySetupFunc)();
// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class PosixThreadSupport : public btThreadSupportInterface
{
public:
typedef enum sStatus {
STATUS_BUSY,
STATUS_READY,
STATUS_FINISHED
} Status;
// placeholder, until libspe2 support is there
struct btSpuStatus
{
uint32_t m_taskId;
uint32_t m_commandId;
uint32_t m_status;
PosixThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc
pthread_t thread;
sem_t startSemaphore;
unsigned long threadUsed;
};
private:
btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
public:
///Setup and initialize SPU/CELL/Libspe2
struct ThreadConstructionInfo
{
ThreadConstructionInfo(char* uniqueName,
PosixThreadFunc userThreadFunc,
PosixlsMemorySetupFunc lsMemoryFunc,
int numThreads=1,
int threadStackSize=65535
)
:m_uniqueName(uniqueName),
m_userThreadFunc(userThreadFunc),
m_lsMemoryFunc(lsMemoryFunc),
m_numThreads(numThreads),
m_threadStackSize(threadStackSize)
{
}
char* m_uniqueName;
PosixThreadFunc m_userThreadFunc;
PosixlsMemorySetupFunc m_lsMemoryFunc;
int m_numThreads;
int m_threadStackSize;
};
PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo);
///cleanup/shutdown Libspe2
virtual ~PosixThreadSupport();
void startThreads(ThreadConstructionInfo& threadInfo);
///send messages to SPUs
virtual void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1);
///check for messages from SPUs
virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
virtual void startSPU();
///tell the task scheduler we are done with the SPU tasks
virtual void stopSPU();
};
#endif // POSIX_THREAD_SUPPORT_H
#endif // USE_PTHREADS

View File

@@ -0,0 +1,11 @@
#ifndef __PPU_ADDRESS_SPACE_H
#define __PPU_ADDRESS_SPACE_H
#ifdef USE_ADDR64
typedef uint64_t ppu_address_t;
#else
typedef uint32_t ppu_address_t;
#endif
#endif

View File

@@ -0,0 +1,89 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SequentialThreadSupport.h"
#include "SpuCollisionTaskProcess.h"
#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
SequentialThreadSupport::SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo)
{
startThreads(threadConstructionInfo);
}
///cleanup/shutdown Libspe2
SequentialThreadSupport::~SequentialThreadSupport()
{
stopSPU();
}
#include <stdio.h>
///send messages to SPUs
void SequentialThreadSupport::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t taskId)
{
switch (uiCommand)
{
case CMD_GATHER_AND_PROCESS_PAIRLIST:
{
btSpuStatus& spuStatus = m_activeSpuStatus[0];
spuStatus.m_userPtr=(void*)uiArgument0;
spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
}
break;
default:
{
///not implemented
btAssert(0 && "Not implemented");
}
};
}
///check for messages from SPUs
void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
{
btAssert(m_activeSpuStatus.size());
btSpuStatus& spuStatus = m_activeSpuStatus[0];
*puiArgument0 = spuStatus.m_taskId;
*puiArgument1 = spuStatus.m_status;
}
void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo)
{
m_activeSpuStatus.resize(1);
printf("STS: Not starting any threads\n");
btSpuStatus& spuStatus = m_activeSpuStatus[0];
spuStatus.m_userPtr = 0;
spuStatus.m_taskId = 0;
spuStatus.m_commandId = 0;
spuStatus.m_status = 0;
spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName);
}
void SequentialThreadSupport::startSPU()
{
}
void SequentialThreadSupport::stopSPU()
{
m_activeSpuStatus.clear();
}

View File

@@ -0,0 +1,84 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btScalar.h"
#include "PlatformDefinitions.h"
#ifndef SEQUENTIAL_THREAD_SUPPORT_H
#define SEQUENTIAL_THREAD_SUPPORT_H
#include "LinearMath/btAlignedObjectArray.h"
#include "btThreadSupportInterface.h"
typedef void (*SequentialThreadFunc)(void* userPtr,void* lsMemory);
typedef void* (*SequentiallsMemorySetupFunc)();
///SequentialThreadSupport is a portable non-parallel implementation of the btThreadSupportInterface
class SequentialThreadSupport : public btThreadSupportInterface
{
public:
struct btSpuStatus
{
uint32_t m_taskId;
uint32_t m_commandId;
uint32_t m_status;
SequentialThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_lsMemory; //initialized using SequentiallsMemorySetupFunc
};
private:
btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
btAlignedObjectArray<void*> m_completeHandles;
public:
struct SequentialThreadConstructionInfo
{
SequentialThreadConstructionInfo (char* uniqueName,
SequentialThreadFunc userThreadFunc,
SequentiallsMemorySetupFunc lsMemoryFunc
)
:m_uniqueName(uniqueName),
m_userThreadFunc(userThreadFunc),
m_lsMemoryFunc(lsMemoryFunc)
{
}
char* m_uniqueName;
SequentialThreadFunc m_userThreadFunc;
SequentiallsMemorySetupFunc m_lsMemoryFunc;
};
SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo);
virtual ~SequentialThreadSupport();
void startThreads(SequentialThreadConstructionInfo& threadInfo);
///send messages to SPUs
virtual void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1);
///check for messages from SPUs
virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
virtual void startSPU();
///tell the task scheduler we are done with the SPU tasks
virtual void stopSPU();
};
#endif //SEQUENTIAL_THREAD_SUPPORT_H

View File

@@ -0,0 +1,151 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <new>
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "LinearMath/btAlignedAllocator.h"
#include "SpuBatchRaycaster.h"
SpuBatchRaycaster::SpuBatchRaycaster (class btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
{
m_threadInterface = threadInterface;
castUponObjectWrappers = NULL;
numCastUponObjectWrappers = 0;
m_spuRaycastTaskProcess = new SpuRaycastTaskProcess(m_threadInterface,maxNumOutstandingTasks); // FIXME non constant
}
SpuBatchRaycaster::~SpuBatchRaycaster ()
{
if (castUponObjectWrappers)
{
btAlignedFree (castUponObjectWrappers);
castUponObjectWrappers = NULL;
}
}
void
SpuBatchRaycaster::setCollisionObjects (btCollisionObjectArray& castUponObjects, int numCastUponObjects)
{
if (castUponObjectWrappers)
{
btAlignedFree (castUponObjectWrappers);
castUponObjectWrappers = NULL;
}
castUponObjectWrappers = (SpuCollisionObjectWrapper*)btAlignedAlloc (sizeof(SpuCollisionObjectWrapper) * numCastUponObjects,16);
numCastUponObjectWrappers = numCastUponObjects;
for (int i = 0; i < numCastUponObjectWrappers; i++)
{
castUponObjectWrappers[i] = SpuCollisionObjectWrapper(castUponObjects[i]);
}
}
void
SpuBatchRaycaster::setCollisionObjectsSkipPE (btCollisionObjectArray& castUponObjects, int numCastUponObjects)
{
if (castUponObjectWrappers)
{
btAlignedFree (castUponObjectWrappers);
castUponObjectWrappers = NULL;
}
int numNonPEShapes = 0;
for (int i = 0; i < numCastUponObjects; i++)
{
const btCollisionShape* shape = castUponObjects[i]->getCollisionShape();
if (shape->getShapeType () == BOX_SHAPE_PROXYTYPE ||
shape->getShapeType () == SPHERE_SHAPE_PROXYTYPE ||
shape->getShapeType () == CAPSULE_SHAPE_PROXYTYPE)
{
continue;
}
numNonPEShapes++;
}
castUponObjectWrappers = (SpuCollisionObjectWrapper*)btAlignedAlloc (sizeof(SpuCollisionObjectWrapper) * numNonPEShapes,16);
numCastUponObjectWrappers = numNonPEShapes;
int index = 0;
for (int i = 0; i < numCastUponObjects; i++)
{
const btCollisionShape* shape = castUponObjects[i]->getCollisionShape();
if (shape->getShapeType () == BOX_SHAPE_PROXYTYPE ||
shape->getShapeType () == SPHERE_SHAPE_PROXYTYPE ||
shape->getShapeType () == CAPSULE_SHAPE_PROXYTYPE)
{
continue;
}
castUponObjectWrappers[index] = SpuCollisionObjectWrapper(castUponObjects[i]);
index++;
}
// printf("Number of shapes bullet is casting against: %d\n", numNonPEShapes);
btAssert (index == numNonPEShapes);
}
void
SpuBatchRaycaster::addRay (const btVector3& rayFrom, const btVector3& rayTo, const btScalar hitFraction)
{
SpuRaycastTaskWorkUnitOut workUnitOut;
workUnitOut.hitFraction = hitFraction;
workUnitOut.hitNormal = btVector3(0.0, 1.0, 0.0);
rayBatchOutput.push_back (workUnitOut);
SpuRaycastTaskWorkUnit workUnit;
workUnit.rayFrom = rayFrom;
workUnit.rayTo = rayTo;
rayBatch.push_back (workUnit);
}
void
SpuBatchRaycaster::clearRays ()
{
rayBatch.clear ();
rayBatchOutput.clear ();
}
void
SpuBatchRaycaster::performBatchRaycast ()
{
m_spuRaycastTaskProcess->initialize2 (castUponObjectWrappers, numCastUponObjectWrappers);
for (int i = 0; i < rayBatch.size(); i++)
{
rayBatch[i].output = &rayBatchOutput[i]; // assign output memory location
m_spuRaycastTaskProcess->addWorkToTask(rayBatch[i]);
}
m_spuRaycastTaskProcess->flush2 ();
}
const SpuRaycastTaskWorkUnitOut&
SpuBatchRaycaster::operator [] (int i) const
{
return rayBatchOutput[i];
}
int
SpuBatchRaycaster::getNumRays () const
{
return rayBatchOutput.size();
}

View File

@@ -0,0 +1,49 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_BATCH_RAYCASTER_H
#define SPU_BATCH_RAYCASTER_H
#include "LinearMath/btAlignedObjectArray.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "SpuRaycastTaskProcess.h"
#include "SpuRaycastTask/SpuRaycastTask.h"
#include "SpuCollisionObjectWrapper.h"
/* FIXME:
* Need to decide how callbacks are performed...
*/
class SpuBatchRaycaster
{
protected:
SpuCollisionObjectWrapper* castUponObjectWrappers;
int numCastUponObjectWrappers;
btAlignedObjectArray<SpuRaycastTaskWorkUnit> rayBatch;
btAlignedObjectArray<SpuRaycastTaskWorkUnitOut> rayBatchOutput;
SpuRaycastTaskProcess* m_spuRaycastTaskProcess;
class btThreadSupportInterface* m_threadInterface;
public:
SpuBatchRaycaster (class btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
~SpuBatchRaycaster ();
void setCollisionObjects (btCollisionObjectArray& castUponObjects, int numCastUponObjects);
void setCollisionObjectsSkipPE (btCollisionObjectArray& castUponObjects, int numCastUponObjects);
void addRay (const btVector3& rayFrom, const btVector3& rayTo, const btScalar hitFraction = 1.0);
void clearRays ();
void performBatchRaycast ();
const SpuRaycastTaskWorkUnitOut& operator [] (int i) const;
int getNumRays () const;
};
#endif

View File

@@ -0,0 +1,48 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuCollisionObjectWrapper.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
SpuCollisionObjectWrapper::SpuCollisionObjectWrapper ()
{
}
#ifndef __SPU__
SpuCollisionObjectWrapper::SpuCollisionObjectWrapper (const btCollisionObject* collisionObject)
{
m_shapeType = collisionObject->getCollisionShape()->getShapeType ();
m_collisionObjectPtr = (ppu_address_t)collisionObject;
m_margin = collisionObject->getCollisionShape()->getMargin ();
}
#endif
int
SpuCollisionObjectWrapper::getShapeType () const
{
return m_shapeType;
}
float
SpuCollisionObjectWrapper::getCollisionMargin () const
{
return m_margin;
}
ppu_address_t
SpuCollisionObjectWrapper::getCollisionObjectPtr () const
{
return m_collisionObjectPtr;
}

View File

@@ -0,0 +1,35 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "PlatformDefinitions.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper
{
protected:
int m_shapeType;
float m_margin;
ppu_address_t m_collisionObjectPtr;
public:
SpuCollisionObjectWrapper ();
SpuCollisionObjectWrapper (const btCollisionObject* collisionObject);
int getShapeType () const;
float getCollisionMargin () const;
ppu_address_t getCollisionObjectPtr () const;
};

View File

@@ -0,0 +1,304 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#define DEBUG_SPU_TASK_SCHEDULING 1
//class OptimizedBvhNode;
#include "SpuCollisionTaskProcess.h"
SpuCollisionTaskProcess::SpuCollisionTaskProcess(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks)
:m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
m_workUnitTaskBuffers = (unsigned char *)0;
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuGatherTaskDesc.resize(m_maxNumOutstandingTasks);
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_currentPage = 0;
m_currentPageEntry = 0;
#ifdef DEBUG_SpuCollisionTaskProcess
m_initialized = false;
#endif
m_threadInterface->startSPU();
//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput));
}
SpuCollisionTaskProcess::~SpuCollisionTaskProcess()
{
if (m_workUnitTaskBuffers != 0)
{
btAlignedFree(m_workUnitTaskBuffers);
m_workUnitTaskBuffers = 0;
}
m_threadInterface->stopSPU();
}
void SpuCollisionTaskProcess::initialize2(bool useEpa)
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("SpuCollisionTaskProcess::initialize()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
if (!m_workUnitTaskBuffers)
{
m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
}
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_currentPage = 0;
m_currentPageEntry = 0;
m_useEpa = useEpa;
#ifdef DEBUG_SpuCollisionTaskProcess
m_initialized = true;
assert(MIDPHASE_NUM_WORKUNITS_PER_TASK*sizeof(SpuGatherAndProcessWorkUnitInput) <= MIDPHASE_WORKUNIT_TASK_SIZE);
#endif
}
void SpuCollisionTaskProcess::issueTask2()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("SpuCollisionTaskProcess::issueTask (m_currentTask= %d\n)", m_currentTask);
#endif //DEBUG_SPU_TASK_SCHEDULING
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
SpuGatherAndProcessPairsTaskDesc& taskDesc = m_spuGatherTaskDesc[m_currentTask];
taskDesc.m_useEpa = m_useEpa;
{
// send task description in event message
// no error checking here...
// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
taskDesc.inPtr = reinterpret_cast<uint64_t>(MIDPHASE_TASK_PTR(m_currentTask));
taskDesc.taskId = m_currentTask;
taskDesc.numPages = m_currentPage+1;
taskDesc.numOnLastPage = m_currentPageEntry;
}
m_threadInterface->sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc,m_currentTask);
// if all tasks busy, wait for spu event to clear the task.
if (m_numBusyTasks >= m_maxNumOutstandingTasks)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
btAssert(taskId>=0);
m_threadInterface->waitForResponse(&taskId, &outputSize);
// printf("issueTask taskId %d completed, numBusy=%d\n",taskId,m_numBusyTasks);
//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
//postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}
void SpuCollisionTaskProcess::addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex)
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("#");
#endif //DEBUG_SPU_TASK_SCHEDULING
#ifdef DEBUG_SpuCollisionTaskProcess
assert(m_initialized);
assert(m_workUnitTaskBuffers);
#endif
bool batch = true;
if (batch)
{
if (m_currentPageEntry == MIDPHASE_NUM_WORKUNITS_PER_PAGE)
{
if (m_currentPage == MIDPHASE_NUM_WORKUNIT_PAGES-1)
{
// task buffer is full, issue current task.
// if all task buffers busy, this waits until SPU is done.
issueTask2();
// find new task buffer
for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
//init the task data
break;
}
}
m_currentPage = 0;
}
else
{
m_currentPage++;
}
m_currentPageEntry = 0;
}
}
{
SpuGatherAndProcessWorkUnitInput &wuInput =
*(reinterpret_cast<SpuGatherAndProcessWorkUnitInput*>
(MIDPHASE_ENTRY_PTR(m_currentTask, m_currentPage, m_currentPageEntry)));
wuInput.m_pairArrayPtr = reinterpret_cast<uint64_t>(pairArrayPtr);
wuInput.m_startIndex = startIndex;
wuInput.m_endIndex = endIndex;
m_currentPageEntry++;
if (!batch)
{
issueTask2();
// find new task buffer
for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
//init the task data
break;
}
}
m_currentPage = 0;
m_currentPageEntry =0;
}
}
}
void
SpuCollisionTaskProcess::flush2()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("\nSpuCollisionTaskProcess::flush()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
// if there's a partially filled task buffer, submit that task
if (m_currentPage > 0 || m_currentPageEntry > 0)
{
issueTask2();
}
// all tasks are issued, wait for all tasks to be complete
while(m_numBusyTasks > 0)
{
// Consolidating SPU code
unsigned int taskId=-1;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
btAssert(taskId>=0);
{
// SPURS support.
m_threadInterface->waitForResponse(&taskId, &outputSize);
}
// printf("flush2 taskId %d completed, numBusy =%d \n",taskId,m_numBusyTasks);
//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
//postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}

View File

@@ -0,0 +1,154 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_COLLISION_TASK_PROCESS_H
#define SPU_COLLISION_TASK_PROCESS_H
#include <assert.h>
#include <LinearMath/btScalar.h>
#include "PlatformDefinitions.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" // for definitions processCollisionTask and createCollisionLocalStoreMemory
#include "btThreadSupportInterface.h"
//#include "SPUAssert.h"
#include <string.h>
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "BulletCollision/CollisionShapes/btConvexShape.h"
#include <LinearMath/btAlignedAllocator.h>
#include <stdio.h>
#define DEBUG_SpuCollisionTaskProcess 1
#define CMD_GATHER_AND_PROCESS_PAIRLIST 1
class btCollisionObject;
class btPersistentManifold;
class btDispatcher;
/////Task Description for SPU collision detection
//struct SpuGatherAndProcessPairsTaskDesc
//{
// uint64_t inPtr;//m_pairArrayPtr;
// //mutex variable
// uint32_t m_someMutexVariableInMainMemory;
//
// uint64_t m_dispatcher;
//
// uint32_t numOnLastPage;
//
// uint16_t numPages;
// uint16_t taskId;
//
// struct CollisionTask_LocalStoreMemory* m_lsMemory;
//}
//
//#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
//__attribute__ ((aligned (16)))
//#endif
//;
///MidphaseWorkUnitInput stores individual primitive versus mesh collision detection input, to be processed by the SPU.
struct SpuGatherAndProcessWorkUnitInput
{
uint64_t m_pairArrayPtr;
int m_startIndex;
int m_endIndex;
};
/// SpuCollisionTaskProcess handles SPU processing of collision pairs.
/// Maintains a set of task buffers.
/// When the task is full, the task is issued for SPUs to process. Contact output goes into btPersistentManifold
/// associated with each task.
/// When PPU issues a task, it will look for completed task buffers
/// PPU will do postprocessing, dependent on workunit output (not likely)
class SpuCollisionTaskProcess
{
unsigned char *m_workUnitTaskBuffers;
// track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<SpuGatherAndProcessPairsTaskDesc> m_spuGatherTaskDesc;
class btThreadSupportInterface* m_threadInterface;
unsigned int m_maxNumOutstandingTasks;
unsigned int m_numBusyTasks;
// the current task and the current entry to insert a new work unit
unsigned int m_currentTask;
unsigned int m_currentPage;
unsigned int m_currentPageEntry;
bool m_useEpa;
#ifdef DEBUG_SpuCollisionTaskProcess
bool m_initialized;
#endif
void issueTask2();
//void postProcess(unsigned int taskId, int outputSize);
public:
SpuCollisionTaskProcess(btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks);
~SpuCollisionTaskProcess();
///call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize2(bool useEpa = false);
///batch up additional work to a current task for SPU processing. When batch is full, it issues the task.
void addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex);
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush2();
};
#define MIDPHASE_TASK_PTR(task) (&m_workUnitTaskBuffers[0] + MIDPHASE_WORKUNIT_TASK_SIZE*task)
#define MIDPHASE_ENTRY_PTR(task,page,entry) (MIDPHASE_TASK_PTR(task) + MIDPHASE_WORKUNIT_PAGE_SIZE*page + sizeof(SpuGatherAndProcessWorkUnitInput)*entry)
#define MIDPHASE_OUTPUT_PTR(task) (&m_contactOutputBuffers[0] + MIDPHASE_MAX_CONTACT_BUFFER_SIZE*task)
#define MIDPHASE_TREENODES_PTR(task) (&m_complexShapeBuffers[0] + MIDPHASE_COMPLEX_SHAPE_BUFFER_SIZE*task)
#define MIDPHASE_WORKUNIT_PAGE_SIZE (16)
#define MIDPHASE_NUM_WORKUNIT_PAGES 1
#define MIDPHASE_WORKUNIT_TASK_SIZE (MIDPHASE_WORKUNIT_PAGE_SIZE*MIDPHASE_NUM_WORKUNIT_PAGES)
#define MIDPHASE_NUM_WORKUNITS_PER_PAGE (MIDPHASE_WORKUNIT_PAGE_SIZE / sizeof(SpuGatherAndProcessWorkUnitInput))
#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
#endif // SPU_COLLISION_TASK_PROCESS_H

View File

@@ -0,0 +1,58 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuContactManifoldCollisionAlgorithm.h"
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm()
:m_manifoldPtr(0)
{
}
void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
{
btAssert(0);
}
btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
{
btAssert(0);
return 1.f;
}
#ifndef __SPU__
SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
:btCollisionAlgorithm(ci)
{
m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
m_shapeType0 = body0->getCollisionShape()->getShapeType();
m_shapeType1 = body1->getCollisionShape()->getShapeType();
m_collisionMargin0 = body0->getCollisionShape()->getMargin();
m_collisionMargin1 = body1->getCollisionShape()->getMargin();
}
#endif //__SPU__
SpuContactManifoldCollisionAlgorithm::~SpuContactManifoldCollisionAlgorithm()
{
if (m_manifoldPtr)
m_dispatcher->releaseManifold(m_manifoldPtr);
}

View File

@@ -0,0 +1,89 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
class btPersistentManifold;
/// SpuContactManifoldCollisionAlgorithm provides contact manifold and should be processed on SPU.
ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm
{
btPersistentManifold* m_manifoldPtr;
int m_shapeType0;
int m_shapeType1;
float m_collisionMargin0;
float m_collisionMargin1;
public:
virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
SpuContactManifoldCollisionAlgorithm();
SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
virtual ~SpuContactManifoldCollisionAlgorithm();
virtual void getAllContactManifolds(btManifoldArray& manifoldArray)
{
if (m_manifoldPtr)
manifoldArray.push_back(m_manifoldPtr);
}
btPersistentManifold* getContactManifoldPtr()
{
return m_manifoldPtr;
}
int getShapeType0() const
{
return m_shapeType0;
}
int getShapeType1() const
{
return m_shapeType1;
}
float getCollisionMargin0() const
{
return m_collisionMargin0;
}
float getCollisionMargin1() const
{
return m_collisionMargin1;
}
struct CreateFunc :public btCollisionAlgorithmCreateFunc
{
virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
{
void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
}
};
};
#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H

View File

@@ -0,0 +1,107 @@
#ifndef DOUBLE_BUFFER_H
#define DOUBLE_BUFFER_H
#include "SpuFakeDma.h"
#include <LinearMath/btScalar.h>
///DoubleBuffer
template<class T, int size>
class DoubleBuffer
{
#if defined(__SPU__) || defined(USE_LIBSPE2)
ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
#else
T m_buffer0[size];
T m_buffer1[size];
#endif
T *m_frontBuffer;
T *m_backBuffer;
unsigned int m_dmaTag;
bool m_dmaPending;
public:
bool isPending() const { return m_dmaPending;}
DoubleBuffer();
void init ();
// dma get and put commands
void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag);
void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag);
// gets pointer to a buffer
T *getFront();
T *getBack();
// if back buffer dma was started, wait for it to complete
// then move back to front and vice versa
T *swapBuffers();
};
template<class T, int size>
DoubleBuffer<T,size>::DoubleBuffer()
{
init ();
}
template<class T, int size>
void DoubleBuffer<T,size>::init()
{
this->m_dmaPending = false;
this->m_frontBuffer = &this->m_buffer0[0];
this->m_backBuffer = &this->m_buffer1[0];
}
template<class T, int size>
void
DoubleBuffer<T,size>::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag)
{
m_dmaPending = true;
m_dmaTag = tag;
cellDmaLargeGet(m_backBuffer, ea, numBytes, tag, 0, 0);
}
template<class T, int size>
void
DoubleBuffer<T,size>::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag)
{
m_dmaPending = true;
m_dmaTag = tag;
cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0);
}
template<class T, int size>
T *
DoubleBuffer<T,size>::getFront()
{
return m_frontBuffer;
}
template<class T, int size>
T *
DoubleBuffer<T,size>::getBack()
{
return m_backBuffer;
}
template<class T, int size>
T *
DoubleBuffer<T,size>::swapBuffers()
{
if (m_dmaPending)
{
cellDmaWaitTagStatusAll(1<<m_dmaTag);
m_dmaPending = false;
}
T *tmp = m_backBuffer;
m_backBuffer = m_frontBuffer;
m_frontBuffer = tmp;
return m_frontBuffer;
}
#endif

View File

@@ -0,0 +1,195 @@
#include "SpuFakeDma.h"
#include <LinearMath/btScalar.h> //for btAssert
//Disabling memcpy sometimes helps debugging DMA
#define USE_MEMCPY 1
#ifdef USE_MEMCPY
#endif
void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
#if defined (__CELLOS_LV2__) || defined (USE_LIBSPE2)
cellDmaLargeGet(ls,ea,size,tag,tid,rid);
return ls;
#else
return (void*)(uint32_t)ea;
#endif
}
void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
#if defined (__SPU__) || defined (USE_LIBSPE2)
mfc_get(ls,ea,size,tag,0,0);
return ls;
#else
return (void*)(uint32_t)ea;
#endif
}
void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
#if defined (__SPU__) || defined (USE_LIBSPE2)
cellDmaGet(ls,ea,size,tag,tid,rid);
return ls;
#else
return (void*)(uint32_t)ea;
#endif
}
///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
{
btAssert(size<32);
ATTRIBUTE_ALIGNED16(char tmpBuffer[32]);
char* mainMem = (char*)ea;
char* localStore = (char*)ls;
uint32_t i;
///make sure last 4 bits are the same, for cellDmaSmallGet
uint32_t last4BitsOffset = ea & 0x0f;
char* tmpTarget = tmpBuffer + last4BitsOffset;
#if defined (__SPU__) || defined (USE_LIBSPE2)
int remainingSize = size;
//#define FORCE_cellDmaUnalignedGet 1
#ifdef FORCE_cellDmaUnalignedGet
cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
#else
char* remainingTmpTarget = tmpTarget;
uint64_t remainingEa = ea;
while (remainingSize)
{
switch (remainingSize)
{
case 1:
case 2:
case 4:
case 8:
case 16:
{
mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0);
remainingSize=0;
break;
}
default:
{
//spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize);
int actualSize = 0;
if (remainingSize > 16)
actualSize = 16;
else
if (remainingSize >8)
actualSize=8;
else
if (remainingSize >4)
actualSize=4;
else
if (remainingSize >2)
actualSize=2;
mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0);
remainingSize-=actualSize;
remainingTmpTarget+=actualSize;
remainingEa += actualSize;
}
}
}
#endif//FORCE_cellDmaUnalignedGet
#else
//copy into final destination
#ifdef USE_MEMCPY
memcpy(tmpTarget,mainMem,size);
#else
for ( i=0;i<size;i++)
{
tmpTarget[i] = mainMem[i];
}
#endif //USE_MEMCPY
#endif
cellDmaWaitTagStatusAll(DMA_MASK(1));
//this is slowish, perhaps memcpy on SPU is smarter?
for (i=0; btLikely( i<size );i++)
{
localStore[i] = tmpTarget[i];
}
return 0;
}
#if defined (__SPU__) || defined (USE_LIBSPE2)
#else
int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
char* mainMem = (char*)ea;
char* localStore = (char*)ls;
#ifdef USE_MEMCPY
memcpy(localStore,mainMem,size);
#else
for (uint32_t i=0;i<size;i++)
{
localStore[i] = mainMem[i];
}
#endif
return 0;
}
int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
char* mainMem = (char*)ea;
char* localStore = (char*)ls;
#ifdef USE_MEMCPY
memcpy(localStore,mainMem,size);
#else
for (uint32_t i=0;i<size;i++)
{
localStore[i] = mainMem[i];
}
#endif //#ifdef USE_MEMCPY
return 0;
}
int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
{
char* mainMem = (char*)ea;
const char* localStore = (const char*)ls;
#ifdef USE_MEMCPY
memcpy(mainMem,localStore,size);
#else
for (uint32_t i=0;i<size;i++)
{
mainMem[i] = localStore[i];
}
#endif //#ifdef USE_MEMCPY
return 0;
}
void cellDmaWaitTagStatusAll(int ignore)
{
}
#endif

View File

@@ -0,0 +1,121 @@
#ifndef FAKE_DMA_H
#define FAKE_DMA_H
#include "PlatformDefinitions.h"
#include "LinearMath/btScalar.h"
#ifdef __SPU__
#ifndef USE_LIBSPE2
#include <cell/dma.h>
#include <stdint.h>
#define DMA_TAG(xfer) (xfer + 1)
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
#else // !USE_LIBSPE2
#define DMA_TAG(xfer) (xfer + 1)
#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
#include <spu_mfcio.h>
#define DEBUG_DMA
#ifdef DEBUG_DMA
#define dUASSERT(a,b) if (!(a)) { printf(b);}
#define uintsize ppu_address_t
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
} \
mfc_put(ls, ea, size, tag, tid, rid)
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if ( (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0)) || (size > 16), "Not naturally aligned: "); \
dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
dUASSERT(size < 16384, "size too big: "); \
dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
} \
mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
#else
#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
#endif // DEBUG_DMA
#endif // USE_LIBSPE2
#else // !__SPU__
//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
//Potential to add networked simulation using this interface
#define DMA_TAG(a) (a)
#define DMA_MASK(a) (a)
/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
int cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
int cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
void cellDmaWaitTagStatusAll(int ignore);
#endif //__CELLOS_LV2__
///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
void* cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
void* cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
void* cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
#endif //FAKE_DMA_H

View File

@@ -0,0 +1,216 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuGatheringCollisionDispatcher.h"
#include "SpuCollisionTaskProcess.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h"
#include "SpuContactManifoldCollisionAlgorithm.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
SpuGatheringCollisionDispatcher::SpuGatheringCollisionDispatcher(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration)
:btCollisionDispatcher(collisionConfiguration),
m_spuCollisionTaskProcess(0),
m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
}
bool SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,int proxyType1)
{
bool supported0 = (
(proxyType0 == BOX_SHAPE_PROXYTYPE) ||
(proxyType0 == TRIANGLE_SHAPE_PROXYTYPE) ||
(proxyType0 == SPHERE_SHAPE_PROXYTYPE) ||
(proxyType0 == CAPSULE_SHAPE_PROXYTYPE) ||
(proxyType0 == CYLINDER_SHAPE_PROXYTYPE) ||
// (proxyType0 == CONE_SHAPE_PROXYTYPE) ||
(proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
(proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
(proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
);
bool supported1 = (
(proxyType1 == BOX_SHAPE_PROXYTYPE) ||
(proxyType1 == TRIANGLE_SHAPE_PROXYTYPE) ||
(proxyType1 == SPHERE_SHAPE_PROXYTYPE) ||
(proxyType1 == CAPSULE_SHAPE_PROXYTYPE) ||
(proxyType1 == CYLINDER_SHAPE_PROXYTYPE) ||
// (proxyType1 == CONE_SHAPE_PROXYTYPE) ||
(proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
(proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
(proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
);
return supported0 && supported1;
}
SpuGatheringCollisionDispatcher::~SpuGatheringCollisionDispatcher()
{
if (m_spuCollisionTaskProcess)
delete m_spuCollisionTaskProcess;
}
#include "stdio.h"
///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc)
///this is useful for the collision dispatcher.
class btSpuCollisionPairCallback : public btOverlapCallback
{
const btDispatcherInfo& m_dispatchInfo;
SpuGatheringCollisionDispatcher* m_dispatcher;
public:
btSpuCollisionPairCallback(const btDispatcherInfo& dispatchInfo, SpuGatheringCollisionDispatcher* dispatcher)
:m_dispatchInfo(dispatchInfo),
m_dispatcher(dispatcher)
{
}
virtual bool processOverlap(btBroadphasePair& collisionPair)
{
//PPU version
//(*m_dispatcher->getNearCallback())(collisionPair,*m_dispatcher,m_dispatchInfo);
//only support discrete collision detection for now, we could fallback on PPU/unoptimized version for TOI/CCD
btAssert(m_dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE);
//by default, Bullet will use this near callback
{
///userInfo is used to determine if the SPU has to handle this case or not (skip PPU tasks)
if (!collisionPair.m_userInfo)
{
collisionPair.m_userInfo = (void*) 1;
}
if (!collisionPair.m_algorithm)
{
btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
btCollisionAlgorithmConstructionInfo ci;
ci.m_dispatcher1 = m_dispatcher;
ci.m_manifold = 0;
if (m_dispatcher->needsCollision(colObj0,colObj1))
{
int proxyType0 = colObj0->getCollisionShape()->getShapeType();
int proxyType1 = colObj1->getCollisionShape()->getShapeType();
if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1))
{
int so = sizeof(SpuContactManifoldCollisionAlgorithm);
void* mem = m_dispatcher->allocateCollisionAlgorithm(so);
collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1);
collisionPair.m_userInfo = (void*) 2;
} else
{
collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
collisionPair.m_userInfo = (void*)3;
}
}
}
}
return false;
}
};
void SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo, btDispatcher* dispatcher)
{
if (dispatchInfo.m_enableSPU)
{
if (!m_spuCollisionTaskProcess)
m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
///modified version of btCollisionDispatcher::dispatchAllCollisionPairs:
{
btSpuCollisionPairCallback collisionCallback(dispatchInfo,this);
pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher);
}
//send one big batch
int numTotalPairs = pairCache->getNumOverlappingPairs();
btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
int i;
for (i=0;i<numTotalPairs;)
{
//Performance Hint: tweak this number during benchmarking
static const int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS;
int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
i = endIndex;
}
//handle PPU fallback pairs
for (i=0;i<numTotalPairs;i++)
{
btBroadphasePair& collisionPair = pairPtr[i];
if (collisionPair.m_userInfo == (void*)3)
{
if (collisionPair.m_algorithm)
{
btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
if (dispatcher->needsCollision(colObj0,colObj1))
{
btManifoldResult contactPointResult(colObj0,colObj1);
if (dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE)
{
//discrete collision detection query
collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
} else
{
//continuous collision detection query, time of impact (toi)
btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
if (dispatchInfo.m_timeOfImpact > toi)
dispatchInfo.m_timeOfImpact = toi;
}
}
}
}
}
//make sure all SPU work is done
m_spuCollisionTaskProcess->flush2();
} else
{
///PPU fallback
///!Need to make sure to clear all 'algorithms' when switching between SPU and PPU
btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher);
}
}

View File

@@ -0,0 +1,64 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H
#define SPU_GATHERING_COLLISION__DISPATCHER_H
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
///Tuning value to optimized SPU utilization
///Too small value means Task overhead is large compared to computation (too fine granularity)
///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
class SpuCollisionTaskProcess;
///SpuGatheringCollisionDispatcher can use SPU to gather and calculate collision detection
///Time of Impact, Closest Points and Penetration Depth.
class SpuGatheringCollisionDispatcher : public btCollisionDispatcher
{
SpuCollisionTaskProcess* m_spuCollisionTaskProcess;
protected:
class btThreadSupportInterface* m_threadInterface;
unsigned int m_maxNumOutstandingTasks;
public:
//can be used by SPU collision algorithms
SpuCollisionTaskProcess* getSpuCollisionTaskProcess()
{
return m_spuCollisionTaskProcess;
}
SpuGatheringCollisionDispatcher (class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration);
virtual ~SpuGatheringCollisionDispatcher();
bool supportsDispatchPairOnSpu(int proxyType0,int proxyType1);
virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
};
#endif //SPU_GATHERING_COLLISION__DISPATCHER_H

View File

@@ -0,0 +1 @@
Empty placeholder for future Libspe2 SPU task

View File

@@ -0,0 +1,257 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifdef USE_LIBSPE2
#include "SpuLibspe2Support.h"
//SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
///Setup and initialize SPU/CELL/Libspe2
SpuLibspe2Support::SpuLibspe2Support(spe_program_handle_t *speprog, int numThreads)
{
this->program = speprog;
this->numThreads = ((numThreads <= spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)) ? numThreads : spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1));
}
///cleanup/shutdown Libspe2
SpuLibspe2Support::~SpuLibspe2Support()
{
stopSPU();
}
///send messages to SPUs
void SpuLibspe2Support::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1)
{
spe_context_ptr_t context;
switch (uiCommand)
{
case CMD_SAMPLE_TASK_COMMAND:
{
//get taskdescription
SpuSampleTaskDesc* taskDesc = (SpuSampleTaskDesc*) uiArgument0;
btAssert(taskDesc->m_taskId<m_activeSpuStatus.size());
//get status of SPU on which task should run
btSpuStatus& spuStatus = m_activeSpuStatus[taskDesc->m_taskId];
//set data for spuStatus
spuStatus.m_commandId = uiCommand;
spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
spuStatus.m_taskDesc.p = taskDesc;
//get context
context = data[taskDesc->m_taskId].context;
taskDesc->m_mainMemoryPtr = reinterpret_cast<uint64_t> (spuStatus.m_lsMemory.p);
break;
}
case CMD_GATHER_AND_PROCESS_PAIRLIST:
{
//get taskdescription
SpuGatherAndProcessPairsTaskDesc* taskDesc = (SpuGatherAndProcessPairsTaskDesc*) uiArgument0;
btAssert(taskDesc->taskId<m_activeSpuStatus.size());
//get status of SPU on which task should run
btSpuStatus& spuStatus = m_activeSpuStatus[taskDesc->taskId];
//set data for spuStatus
spuStatus.m_commandId = uiCommand;
spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
spuStatus.m_taskDesc.p = taskDesc;
//get context
context = data[taskDesc->taskId].context;
taskDesc->m_lsMemory = (CollisionTask_LocalStoreMemory*)spuStatus.m_lsMemory.p;
break;
}
default:
{
///not implemented
btAssert(0);
}
};
//write taskdescription in mailbox
unsigned int event = Spu_Mailbox_Event_Task;
spe_in_mbox_write(context, &event, 1, SPE_MBOX_ANY_NONBLOCKING);
}
///check for messages from SPUs
void SpuLibspe2Support::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert(m_activeSpuStatus.size());
int last = -1;
//find an active spu/thread
while(last < 0)
{
for (int i=0;i<m_activeSpuStatus.size();i++)
{
if ( m_activeSpuStatus[i].m_status == Spu_Status_Free)
{
last = i;
break;
}
}
if(last < 0)
sched_yield();
}
btSpuStatus& spuStatus = m_activeSpuStatus[last];
///need to find an active spu
btAssert(last>=0);
*puiArgument0 = spuStatus.m_taskId;
*puiArgument1 = spuStatus.m_status;
}
void SpuLibspe2Support::startSPU()
{
this->internal_startSPU();
}
///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
void SpuLibspe2Support::internal_startSPU()
{
m_activeSpuStatus.resize(numThreads);
for (int i=0; i < numThreads; i++)
{
if(data[i].context == NULL)
{
/* Create context */
if ((data[i].context = spe_context_create(0, NULL)) == NULL)
{
perror ("Failed creating context");
exit(1);
}
/* Load program into context */
if(spe_program_load(data[i].context, this->program))
{
perror ("Failed loading program");
exit(1);
}
m_activeSpuStatus[i].m_status = Spu_Status_Startup;
m_activeSpuStatus[i].m_taskId = i;
m_activeSpuStatus[i].m_commandId = 0;
m_activeSpuStatus[i].m_lsMemory.p = NULL;
data[i].entry = SPE_DEFAULT_ENTRY;
data[i].flags = 0;
data[i].argp.p = &m_activeSpuStatus[i];
data[i].envp.p = NULL;
/* Create thread for each SPE context */
if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) ))
{
perror ("Failed creating thread");
exit(1);
}
/*
else
{
printf("started thread %d\n",i);
}*/
}
}
for (int i=0; i < numThreads; i++)
{
if(data[i].context != NULL)
{
while( m_activeSpuStatus[i].m_status == Spu_Status_Startup)
{
// wait for spu to set up
sched_yield();
}
printf("Spu %d is ready\n", i);
}
}
}
///tell the task scheduler we are done with the SPU tasks
void SpuLibspe2Support::stopSPU()
{
// wait for all threads to finish
int i;
for ( i = 0; i < this->numThreads; i++ )
{
unsigned int event = Spu_Mailbox_Event_Shutdown;
spe_context_ptr_t context = data[i].context;
spe_in_mbox_write(context, &event, 1, SPE_MBOX_ALL_BLOCKING);
pthread_join (data[i].pthread, NULL);
}
// close SPE program
spe_image_close(program);
// destroy SPE contexts
for ( i = 0; i < this->numThreads; i++ )
{
if(data[i].context != NULL)
{
spe_context_destroy (data[i].context);
}
}
m_activeSpuStatus.clear();
}
#endif //USE_LIBSPE2

View File

@@ -0,0 +1,173 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_LIBSPE2_SUPPORT_H
#define SPU_LIBSPE2_SUPPORT_H
#include <LinearMath/btScalar.h> //for uint32_t etc.
#ifdef USE_LIBSPE2
#include <stdlib.h>
#include <stdio.h>
//#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
#include "PlatformDefinitions.h"
//extern struct SpuGatherAndProcessPairsTaskDesc;
enum
{
Spu_Mailbox_Event_Nothing = 0,
Spu_Mailbox_Event_Task = 1,
Spu_Mailbox_Event_Shutdown = 2,
Spu_Mailbox_Event_ForceDword = 0xFFFFFFFF
};
enum
{
Spu_Status_Free = 0,
Spu_Status_Occupied = 1,
Spu_Status_Startup = 2,
Spu_Status_ForceDword = 0xFFFFFFFF
};
struct btSpuStatus
{
uint32_t m_taskId;
uint32_t m_commandId;
uint32_t m_status;
addr64 m_taskDesc;
addr64 m_lsMemory;
}
__attribute__ ((aligned (128)))
;
#ifndef __SPU__
#include "LinearMath/btAlignedObjectArray.h"
#include "SpuCollisionTaskProcess.h"
#include "SpuSampleTaskProcess.h"
#include "btThreadSupportInterface.h"
#include <libspe2.h>
#include <pthread.h>
#include <sched.h>
#define MAX_SPUS 4
typedef struct ppu_pthread_data
{
spe_context_ptr_t context;
pthread_t pthread;
unsigned int entry;
unsigned int flags;
addr64 argp;
addr64 envp;
spe_stop_info_t stopinfo;
} ppu_pthread_data_t;
static void *ppu_pthread_function(void *arg)
{
ppu_pthread_data_t * datap = (ppu_pthread_data_t *)arg;
/*
int rc;
do
{*/
spe_context_run(datap->context, &datap->entry, datap->flags, datap->argp.p, datap->envp.p, &datap->stopinfo);
if (datap->stopinfo.stop_reason == SPE_EXIT)
{
if (datap->stopinfo.result.spe_exit_code != 0)
{
perror("FAILED: SPE returned a non-zero exit status: \n");
exit(1);
}
}
else
{
perror("FAILED: SPE abnormally terminated\n");
exit(1);
}
//} while (rc > 0); // loop until exit or error, and while any stop & signal
pthread_exit(NULL);
}
///SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class SpuLibspe2Support : public btThreadSupportInterface
{
btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
public:
//Setup and initialize SPU/CELL/Libspe2
SpuLibspe2Support(spe_program_handle_t *speprog,int numThreads);
// SPE program handle ptr.
spe_program_handle_t *program;
// SPE program data
ppu_pthread_data_t data[MAX_SPUS];
//cleanup/shutdown Libspe2
~SpuLibspe2Support();
///send messages to SPUs
void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1=0);
//check for messages from SPUs
void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
//start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
virtual void startSPU();
//tell the task scheduler we are done with the SPU tasks
virtual void stopSPU();
private:
///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
void internal_startSPU();
int numThreads;
};
#endif // NOT __SPU__
#endif //USE_LIBSPE2
#endif //SPU_LIBSPE2_SUPPORT_H

View File

@@ -0,0 +1,487 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuCollisionShapes.h"
btPoint3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex)
{
switch (shapeType)
{
case SPHERE_SHAPE_PROXYTYPE:
{
return btPoint3(0,0,0);
}
case BOX_SHAPE_PROXYTYPE:
{
// spu_printf("SPU: getSupport BOX_SHAPE_PROXYTYPE\n");
btConvexInternalShape* convexShape = (btConvexInternalShape*)shape;
const btVector3& halfExtents = convexShape->getImplicitShapeDimensions();
return btPoint3(
localDir.getX() < 0.0f ? -halfExtents.x() : halfExtents.x(),
localDir.getY() < 0.0f ? -halfExtents.y() : halfExtents.y(),
localDir.getZ() < 0.0f ? -halfExtents.z() : halfExtents.z());
}
case TRIANGLE_SHAPE_PROXYTYPE:
{
btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ());
btVector3* vertices = (btVector3*)shape;
btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2]));
btVector3 sup = vertices[dots.maxAxis()];
return btPoint3(sup.getX(),sup.getY(),sup.getZ());
break;
}
case CYLINDER_SHAPE_PROXYTYPE:
{
btCylinderShape* cylShape = (btCylinderShape*)shape;
//mapping of halfextents/dimension onto radius/height depends on how cylinder local orientation is (upAxis)
btVector3 halfExtents = cylShape->getImplicitShapeDimensions();
btVector3 v(localDir.getX(),localDir.getY(),localDir.getZ());
int cylinderUpAxis = cylShape->getUpAxis();
int XX(1),YY(0),ZZ(2);
switch (cylinderUpAxis)
{
case 0:
{
XX = 1;
YY = 0;
ZZ = 2;
break;
}
case 1:
{
XX = 0;
YY = 1;
ZZ = 2;
break;
}
case 2:
{
XX = 0;
YY = 2;
ZZ = 1;
break;
}
default:
btAssert(0);
//printf("SPU:localGetSupportingVertexWithoutMargin unknown Cylinder up-axis\n");
};
btScalar radius = halfExtents[XX];
btScalar halfHeight = halfExtents[cylinderUpAxis];
btVector3 tmp;
btScalar d ;
btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]);
if (s != btScalar(0.0))
{
d = radius / s;
tmp[XX] = v[XX] * d;
tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
tmp[ZZ] = v[ZZ] * d;
return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ());
}
else
{
tmp[XX] = radius;
tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
tmp[ZZ] = btScalar(0.0);
return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ());
}
}
case CAPSULE_SHAPE_PROXYTYPE:
{
//spu_printf("SPU: todo: getSupport CAPSULE_SHAPE_PROXYTYPE\n");
btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ());
btCapsuleShape* capsuleShape = (btCapsuleShape*)shape;
btVector3 halfExtents = capsuleShape->getImplicitShapeDimensions();
btScalar halfHeight = capsuleShape->getHalfHeight();
int capsuleUpAxis = capsuleShape->getUpAxis();
btScalar radius = capsuleShape->getRadius();
btVector3 supVec(0,0,0);
btScalar maxDot(btScalar(-1e30));
btVector3 vec = vec0;
btScalar lenSqr = vec.length2();
if (lenSqr < btScalar(0.0001))
{
vec.setValue(1,0,0);
} else
{
btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
vec *= rlen;
}
btVector3 vtx;
btScalar newDot;
{
btVector3 pos(0,0,0);
pos[capsuleUpAxis] = halfHeight;
vtx = pos +vec*(radius);
newDot = vec.dot(vtx);
if (newDot > maxDot)
{
maxDot = newDot;
supVec = vtx;
}
}
{
btVector3 pos(0,0,0);
pos[capsuleUpAxis] = -halfHeight;
vtx = pos +vec*(radius);
newDot = vec.dot(vtx);
if (newDot > maxDot)
{
maxDot = newDot;
supVec = vtx;
}
}
return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ());
break;
};
case CONVEX_HULL_SHAPE_PROXYTYPE:
{
//spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n");
btPoint3* points = 0;
int numPoints = 0;
points = convexVertexData->gConvexPoints;
numPoints = convexVertexData->gNumConvexPoints;
// spu_printf("numPoints = %d\n",numPoints);
btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
btScalar newDot,maxDot = btScalar(-1e30);
btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ());
btVector3 vec = vec0;
btScalar lenSqr = vec.length2();
if (lenSqr < btScalar(0.0001))
{
vec.setValue(1,0,0);
} else
{
btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
vec *= rlen;
}
for (int i=0;i<numPoints;i++)
{
btPoint3 vtx = points[i];// * m_localScaling;
newDot = vec.dot(vtx);
if (newDot > maxDot)
{
maxDot = newDot;
supVec = vtx;
}
}
return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ());
break;
};
default:
//spu_printf("SPU:(type %i) missing support function\n",shapeType);
#if __ASSERT
// spu_printf("localGetSupportingVertexWithoutMargin() - Unsupported bound type: %d.\n", shapeType);
#endif // __ASSERT
return btPoint3(0.f, 0.f, 0.f);
}
}
void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform)
{
//calculate the aabb, given the types...
switch (shapeType)
{
case CYLINDER_SHAPE_PROXYTYPE:
/* fall through */
case BOX_SHAPE_PROXYTYPE:
{
float margin=convexShape->getMarginNV();
btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
halfExtents += btVector3(margin,margin,margin);
btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute();
btPoint3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
aabbMin = center - extent;
aabbMax = center + extent;
break;
}
case CAPSULE_SHAPE_PROXYTYPE:
{
float margin=convexShape->getMarginNV();
btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
//add the radius to y-axis to get full height
btScalar radius = halfExtents[0];
halfExtents[1] += radius;
halfExtents += btVector3(margin,margin,margin);
#if 0
int capsuleUpAxis = convexShape->getUpAxis();
btScalar halfHeight = convexShape->getHalfHeight();
btScalar radius = convexShape->getRadius();
halfExtents[capsuleUpAxis] = radius + halfHeight;
#endif
btTransform& t = xform;
btMatrix3x3 abs_b = t.getBasis().absolute();
btPoint3 center = t.getOrigin();
btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
aabbMin = center - extent;
aabbMax = center + extent;
break;
}
case SPHERE_SHAPE_PROXYTYPE:
{
float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
float margin = radius + convexShape->getMarginNV();
btTransform& t = xform;
const btVector3& center = t.getOrigin();
btVector3 extent(margin,margin,margin);
aabbMin = center - extent;
aabbMax = center + extent;
break;
}
case CONVEX_HULL_SHAPE_PROXYTYPE:
{
ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
cellDmaGet(&convexHullShape0, convexShapePtr , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0;
btTransform& t = xform;
btScalar margin = convexShape->getMarginNV();
localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin);
//spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ());
//spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ());
break;
}
default:
{
// spu_printf("SPU: unsupported shapetype %d in AABB calculation\n");
}
};
}
void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape)
{
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
dmaSize = sizeof(btTriangleIndexVertexArray);
dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getMeshInterface());
// spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
#ifdef __SPU__
cellDmaGet(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
bvhMeshShape->gTriangleMeshInterfacePtr = &bvhMeshShape->gTriangleMeshInterfaceStorage;
#else
bvhMeshShape->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0);
#endif
//cellDmaWaitTagStatusAll(DMA_MASK(1));
///now DMA over the BVH
dmaSize = sizeof(btOptimizedBvh);
dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getOptimizedBvh());
//spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2);
cellDmaGet(&bvhMeshShape->gOptimizedBvh, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
//cellDmaWaitTagStatusAll(DMA_MASK(2));
cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
}
void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag)
{
cellDmaGet(IndexMesh, (ppu_address_t)&indexArray[index] , sizeof(btIndexedMesh), DMA_TAG(dmaTag), 0, 0);
}
void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag)
{
cellDmaGet(subTreeHeaders, subTreePtr, batchSize * sizeof(btBvhSubtreeInfo), DMA_TAG(dmaTag), 0, 0);
}
void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag)
{
cellDmaGet(nodes, reinterpret_cast<ppu_address_t>(&nodeArray[subtree.m_rootNodeIndex]) , subtree.m_subtreeSize* sizeof(btQuantizedBvhNode), DMA_TAG(2), 0, 0);
}
///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck
int getShapeTypeSize(int shapeType)
{
switch (shapeType)
{
case CYLINDER_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btCylinderShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case BOX_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btBoxShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case SPHERE_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btSphereShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case TRIANGLE_MESH_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btBvhTriangleMeshShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case CAPSULE_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btCapsuleShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case CONVEX_HULL_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btConvexHullShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
case COMPOUND_SHAPE_PROXYTYPE:
{
int shapeSize = sizeof(btCompoundShape);
btAssert(shapeSize < MAX_SHAPE_SIZE);
return shapeSize;
}
default:
btAssert(0);
//unsupported shapetype, please add here
return 0;
}
}
void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU)
{
convexVertexData->gNumConvexPoints = convexShapeSPU->getNumPoints();
if (convexVertexData->gNumConvexPoints>MAX_NUM_SPU_CONVEX_POINTS)
{
btAssert(0);
// spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,convexVertexData->gNumConvexPoints);
return;
}
register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btPoint3);
ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getPoints();
cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU , dmaSize, DMA_TAG(2), 0, 0);
}
void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType)
{
register int dmaSize = getShapeTypeSize(shapeType);
cellDmaGet(collisionShapeLocation, collisionShapePtr , dmaSize, DMA_TAG(dmaTag), 0, 0);
//cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
}
void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
{
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
int childShapeCount = spuCompoundShape->getNumChildShapes();
dmaSize = childShapeCount * sizeof(btCompoundShapeChild);
dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList();
cellDmaGet(&compoundShapeLocation->gSubshapes[0], dmaPpuAddress2, dmaSize, DMA_TAG(dmaTag), 0, 0);
}
void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
{
int childShapeCount = spuCompoundShape->getNumChildShapes();
int i;
// DMA all the subshapes
for ( i = 0; i < childShapeCount; ++i)
{
btCompoundShapeChild& childShape = compoundShapeLocation->gSubshapes[i];
dmaCollisionShape (&compoundShapeLocation->gSubshapeShape[i],(ppu_address_t)childShape.m_childShape, dmaTag, childShape.m_childShapeType);
}
}
void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex)
{
int curIndex = startNodeIndex;
int walkIterations = 0;
int subTreeSize = endNodeIndex - startNodeIndex;
int escapeIndex;
unsigned int aabbOverlap, isLeafNode;
while (curIndex < endNodeIndex)
{
//catch bugs in tree data
assert (walkIterations < subTreeSize);
walkIterations++;
aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
isLeafNode = rootNode->isLeafNode();
if (isLeafNode && aabbOverlap)
{
//printf("overlap with node %d\n",rootNode->getTriangleIndex());
nodeCallback->processNode(0,rootNode->getTriangleIndex());
// spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex());
}
if (aabbOverlap || isLeafNode)
{
rootNode++;
curIndex++;
} else
{
escapeIndex = rootNode->getEscapeIndex();
rootNode += escapeIndex;
curIndex += escapeIndex;
}
}
}

View File

@@ -0,0 +1,102 @@
#ifndef __SPU_COLLISION_SHAPES_H
#define __SPU_COLLISION_SHAPES_H
#include "../SpuDoubleBuffer.h"
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
#include "BulletCollision/CollisionShapes/btCylinderShape.h"
#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
#include "BulletCollision/CollisionShapes/btSphereShape.h"
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
#include "BulletCollision/CollisionShapes/btConvexShape.h"
#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
#include "BulletCollision/CollisionShapes/btCompoundShape.h"
#define MAX_NUM_SPU_CONVEX_POINTS 128
struct SpuConvexPolyhedronVertexData
{
void* gSpuConvexShapePtr;
btPoint3* gConvexPoints;
int gNumConvexPoints;
ATTRIBUTE_ALIGNED16(btPoint3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
};
#define MAX_SHAPE_SIZE 256
struct CollisionShape_LocalStoreMemory
{
ATTRIBUTE_ALIGNED16(char collisionShape[MAX_SHAPE_SIZE]);
};
struct CompoundShape_LocalStoreMemory
{
// Compound data
#define MAX_SPU_COMPOUND_SUBSHAPES 16
ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
};
struct bvhMeshShape_LocalStoreMemory
{
//ATTRIBUTE_ALIGNED16(btOptimizedBvh gOptimizedBvh);
ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]);
btOptimizedBvh* getOptimizedBvh()
{
return (btOptimizedBvh*) gOptimizedBvh;
}
ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterfaceStorage);
btTriangleIndexVertexArray* gTriangleMeshInterfacePtr;
///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment
ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh);
#define MAX_SPU_SUBTREE_HEADERS 32
//1024
ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]);
ATTRIBUTE_ALIGNED16(btQuantizedBvhNode gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]);
};
btPoint3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, const btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData);//, int *featureIndex)
void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform);
void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape);
void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag);
void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag);
void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray& nodeArray, int dmaTag);
int getShapeTypeSize(int shapeType);
void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU);
void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType);
void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
#define USE_BRANCHFREE_TEST 1
#ifdef USE_BRANCHFREE_TEST
SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{
return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
1, 0);
}
#else
unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{
unsigned int overlap = 1;
overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap;
overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap;
overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap;
return overlap;
}
#endif
void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex);
#endif

View File

@@ -0,0 +1,227 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuContactResult.h"
//#define DEBUG_SPU_COLLISION_DETECTION 1
SpuContactResult::SpuContactResult()
{
m_manifoldAddress = 0;
m_spuManifold = NULL;
m_RequiresWriteBack = false;
}
SpuContactResult::~SpuContactResult()
{
g_manifoldDmaExport.swapBuffers();
}
///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
inline btScalar calculateCombinedFriction(btScalar friction0,btScalar friction1)
{
btScalar friction = friction0*friction1;
const btScalar MAX_FRICTION = btScalar(10.);
if (friction < -MAX_FRICTION)
friction = -MAX_FRICTION;
if (friction > MAX_FRICTION)
friction = MAX_FRICTION;
return friction;
}
inline btScalar calculateCombinedRestitution(btScalar restitution0,btScalar restitution1)
{
return restitution0*restitution1;
}
void SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped)
{
//spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress);
m_rootWorldTransform0 = worldTrans0;
m_rootWorldTransform1 = worldTrans1;
m_manifoldAddress = manifoldAddress;
m_spuManifold = spuManifold;
m_combinedFriction = calculateCombinedFriction(friction0,friction1);
m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1);
m_isSwapped = isSwapped;
}
void SpuContactResult::setShapeIdentifiers(int partId0,int index0, int partId1,int index1)
{
}
///return true if it requires a dma transfer back
bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
const btVector3& pointInWorld,
float depth,
btPersistentManifold* manifoldPtr,
btTransform& transA,
btTransform& transB,
btScalar combinedFriction,
btScalar combinedRestitution,
bool isSwapped)
{
float contactTreshold = manifoldPtr->getContactBreakingThreshold();
//spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr);
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("SPU: contactTreshold %f\n",contactTreshold);
#endif //DEBUG_SPU_COLLISION_DETECTION
if (depth > manifoldPtr->getContactBreakingThreshold())
return false;
//provide inverses or just calculate?
btTransform transAInv = transA.inverse();//m_body0->m_cachedInvertedWorldTransform;
btTransform transBInv= transB.inverse();//m_body1->m_cachedInvertedWorldTransform;
btVector3 pointA;
btVector3 localA;
btVector3 localB;
btVector3 normal;
if (isSwapped)
{
normal = normalOnBInWorld * -1;
pointA = pointInWorld + normal * depth;
localA = transAInv(pointA );
localB = transBInv(pointInWorld);
/*localA = transBInv(pointA );
localB = transAInv(pointInWorld);*/
}
else
{
normal = normalOnBInWorld;
pointA = pointInWorld + normal * depth;
localA = transAInv(pointA );
localB = transBInv(pointInWorld);
}
btManifoldPoint newPt(localA,localB,normal,depth);
int insertIndex = manifoldPtr->getCacheEntry(newPt);
if (insertIndex >= 0)
{
// manifoldPtr->replaceContactPoint(newPt,insertIndex);
// return true;
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("SPU: same contact detected, nothing done\n");
#endif //DEBUG_SPU_COLLISION_DETECTION
// This is not needed, just use the old info! saves a DMA transfer as well
} else
{
newPt.m_combinedFriction = combinedFriction;
newPt.m_combinedRestitution = combinedRestitution;
/*
//potential TODO: SPU callbacks, either immediate (local on the SPU), or deferred
//User can override friction and/or restitution
if (gContactAddedCallback &&
//and if either of the two bodies requires custom material
((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) ||
(m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback)))
{
//experimental feature info, for per-triangle material etc.
(*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
}
*/
manifoldPtr->addManifoldPoint(newPt);
return true;
}
return false;
}
void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold)
{
memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold));
g_manifoldDmaExport.swapBuffers();
uint64_t mmAddr = (uint32_t)mmManifold;
g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9));
// Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon?
//no, the swapBuffers does the wait
}
void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btPoint3& pointInWorld,float depth)
{
//spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
#ifdef DEBUG_SPU_COLLISION_DETECTION
// int sman = sizeof(rage::phManifold);
// spu_printf("sizeof_manifold = %i\n",sman);
#endif //DEBUG_SPU_COLLISION_DETECTION
btPersistentManifold* localManifold = m_spuManifold;
btVector3 normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
btVector3 pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
//process the contact point
const bool retVal = ManifoldResultAddContactPoint(normalB,
pointWrld,
depth,
localManifold,
m_rootWorldTransform0,
m_rootWorldTransform1,
m_combinedFriction,
m_combinedRestitution,
m_isSwapped);
m_RequiresWriteBack = m_RequiresWriteBack || retVal;
}
void SpuContactResult::flush()
{
if (m_spuManifold && m_spuManifold->getNumContacts())
{
m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1);
m_RequiresWriteBack = true;
}
if (m_RequiresWriteBack)
{
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n");
spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts());
spu_printf("Manifold address: %llu\n", m_manifoldAddress);
#endif //DEBUG_SPU_COLLISION_DETECTION
// spu_printf("writeDoubleBufferedManifold\n");
writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress);
#ifdef DEBUG_SPU_COLLISION_DETECTION
spu_printf("SPU: Finished (Put) DMA\n");
#endif //DEBUG_SPU_COLLISION_DETECTION
}
m_spuManifold = NULL;
m_RequiresWriteBack = false;
}

View File

@@ -0,0 +1,113 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_CONTACT_RESULT2_H
#define SPU_CONTACT_RESULT2_H
#ifndef WIN32
#include <stdint.h>
#endif
#include "../SpuDoubleBuffer.h"
#include "LinearMath/btTransform.h"
#include "LinearMath/btPoint3.h"
#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
struct SpuCollisionPairInput
{
ppu_address_t m_collisionShapes[2];
void* m_spuCollisionShapes[2];
ppu_address_t m_persistentManifoldPtr;
btVector3 m_primitiveDimensions0;
btVector3 m_primitiveDimensions1;
int m_shapeType0;
int m_shapeType1;
float m_collisionMargin0;
float m_collisionMargin1;
btTransform m_worldTransform0;
btTransform m_worldTransform1;
bool m_isSwapped;
bool m_useEpa;
};
struct SpuClosestPointInput
{
SpuClosestPointInput()
:m_maximumDistanceSquared(float(1e30)),
m_stackAlloc(0)
{
}
btTransform m_transformA;
btTransform m_transformB;
float m_maximumDistanceSquared;
class btStackAlloc* m_stackAlloc;
struct SpuConvexPolyhedronVertexData* m_convexVertexData[2];
};
///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed
///So when an existing contact point is duplicated, no transfer/refresh is performed.
class SpuContactResult
{
btTransform m_rootWorldTransform0;
btTransform m_rootWorldTransform1;
ppu_address_t m_manifoldAddress;
btPersistentManifold* m_spuManifold;
bool m_RequiresWriteBack;
btScalar m_combinedFriction;
btScalar m_combinedRestitution;
bool m_isSwapped;
DoubleBuffer<btPersistentManifold, 1> g_manifoldDmaExport;
public:
SpuContactResult();
virtual ~SpuContactResult();
btPersistentManifold* GetSpuManifold() const
{
return m_spuManifold;
}
virtual void setShapeIdentifiers(int partId0,int index0, int partId1,int index1);
void setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped);
void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold);
virtual void addContactPoint(const btVector3& normalOnBInWorld,const btPoint3& pointInWorld,float depth);
void flush();
};
#endif //SPU_CONTACT_RESULT2_H

View File

@@ -0,0 +1,52 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef CONVEX_PENETRATION_DEPTH_H
#define CONVEX_PENETRATION_DEPTH_H
class btStackAlloc;
class btIDebugDraw;
class SpuVoronoiSimplexSolver;
#include <LinearMath/btTransform.h>
#include <LinearMath/btPoint3.h>
///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
class SpuConvexPenetrationDepthSolver
{
public:
virtual ~SpuConvexPenetrationDepthSolver() {};
virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
btTransform& transA,const btTransform& transB,
btVector3& v, btPoint3& pa, btPoint3& pb,
class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
struct SpuConvexPolyhedronVertexData* convexVertexDataA,
struct SpuConvexPolyhedronVertexData* convexVertexDataB
) const = 0;
};
#endif //CONVEX_PENETRATION_DEPTH_H

View File

@@ -0,0 +1,37 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuEpaPenetrationDepthSolver.h"
#include "SpuVoronoiSimplexSolver.h"
#include "SpuGjkPairDetector.h"
#include "SpuContactResult.h"
#include "SpuGjkEpa2.h"
bool SpuEpaPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
btTransform& transA,const btTransform& transB,
btVector3& v, btPoint3& pa, btPoint3& pb,
class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
struct SpuConvexPolyhedronVertexData* convexVertexDataA,
struct SpuConvexPolyhedronVertexData* convexVertexDataB
) const
{
bool r;
SpuGjkEpaSolver2::sResults results;
r = SpuGjkEpaSolver2::Penetration (convexA, convexVertexDataA, shapeTypeA, marginA, transA, convexB, convexVertexDataB, shapeTypeB, marginB, transB, btVector3(1.0f, 0.0f, 0.0f), results);
pa = results.witnesses[0];
pb = results.witnesses[1];
return r;
}

View File

@@ -0,0 +1,47 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_EPA_PENETRATION_DEPTH_SOLVER_H
#define SPU_EPA_PENETRATION_DEPTH_SOLVER_H
#include "SpuConvexPenetrationDepthSolver.h"
class btStackAlloc;
class btIDebugDraw;
class SpuVoronoiSimplexSolver;
///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
class SpuEpaPenetrationDepthSolver : public SpuConvexPenetrationDepthSolver
{
public:
virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
btTransform& transA,const btTransform& transB,
btVector3& v, btPoint3& pa, btPoint3& pb,
class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
struct SpuConvexPolyhedronVertexData* convexVertexDataA,
struct SpuConvexPolyhedronVertexData* convexVertexDataB
) const;
};
#endif //SPU_EPA_PENETRATION_DEPTH_SOLVER_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_GATHERING_COLLISION_TASK_H
#define SPU_GATHERING_COLLISION_TASK_H
#include "../PlatformDefinitions.h"
//#define DEBUG_SPU_COLLISION_DETECTION 1
///Task Description for SPU collision detection
struct SpuGatherAndProcessPairsTaskDesc
{
ppu_address_t inPtr;//m_pairArrayPtr;
//mutex variable
uint32_t m_someMutexVariableInMainMemory;
ppu_address_t m_dispatcher;
uint32_t numOnLastPage;
uint16_t numPages;
uint16_t taskId;
bool m_useEpa;
struct CollisionTask_LocalStoreMemory* m_lsMemory;
}
#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
__attribute__ ((aligned (128)))
#endif
;
void processCollisionTask(void* userPtr, void* lsMemory);
void* createCollisionLocalStoreMemory();
#if defined(USE_LIBSPE2) && defined(__SPU__)
#include "../SpuLibspe2Support.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <SpuFakeDma.h>
//#define DEBUG_LIBSPE2_SPU_TASK
int main(unsigned long long speid, addr64 argp, addr64 envp)
{
printf("SPU: hello \n");
ATTRIBUTE_ALIGNED128(btSpuStatus status);
ATTRIBUTE_ALIGNED16( SpuGatherAndProcessPairsTaskDesc taskDesc ) ;
unsigned int received_message = Spu_Mailbox_Event_Nothing;
bool shutdown = false;
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
status.m_status = Spu_Status_Free;
status.m_lsMemory.p = createCollisionLocalStoreMemory();
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
while ( btLikely( !shutdown ) )
{
received_message = spu_read_in_mbox();
if( btLikely( received_message == Spu_Mailbox_Event_Task ))
{
#ifdef DEBUG_LIBSPE2_SPU_TASK
printf("SPU: received Spu_Mailbox_Event_Task\n");
#endif //DEBUG_LIBSPE2_SPU_TASK
// refresh the status
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
btAssert(status.m_status==Spu_Status_Occupied);
cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuGatherAndProcessPairsTaskDesc), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
#ifdef DEBUG_LIBSPE2_SPU_TASK
printf("SPU:processCollisionTask\n");
#endif //DEBUG_LIBSPE2_SPU_TASK
processCollisionTask((void*)&taskDesc, taskDesc.m_lsMemory);
#ifdef DEBUG_LIBSPE2_SPU_TASK
printf("SPU:finished processCollisionTask\n");
#endif //DEBUG_LIBSPE2_SPU_TASK
}
else
{
#ifdef DEBUG_LIBSPE2_SPU_TASK
printf("SPU: received ShutDown\n");
#endif //DEBUG_LIBSPE2_SPU_TASK
if( btLikely( received_message == Spu_Mailbox_Event_Shutdown ) )
{
shutdown = true;
}
else
{
//printf("SPU - Sth. recieved\n");
}
}
// set to status free and wait for next task
status.m_status = Spu_Status_Free;
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
}
printf("SPU: shutdown\n");
return 0;
}
#endif // USE_LIBSPE2
#endif //SPU_GATHERING_COLLISION_TASK_H

View File

@@ -0,0 +1,850 @@
#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
#include "BulletCollision/CollisionShapes/btSphereShape.h"
#include "SpuCollisionShapes.h"
#include "SpuGjkEpa2.h"
#if defined(DEBUG) || defined (_DEBUG)
#include <stdio.h> //for debug printf
#ifdef __SPU__
#include <spu_printf.h>
#define printf spu_printf
#endif //__SPU__
#endif
namespace gjkepa2_spu_impl
{
// Config
/* GJK */
#define GJK_MAX_ITERATIONS 128
#define GJK_ACCURARY ((btScalar)0.0001)
#define GJK_MIN_DISTANCE ((btScalar)0.0001)
#define GJK_DUPLICATED_EPS ((btScalar)0.0001)
#define GJK_SIMPLEX2_EPS ((btScalar)0.0)
#define GJK_SIMPLEX3_EPS ((btScalar)0.0)
#define GJK_SIMPLEX4_EPS ((btScalar)0.0)
/* EPA */
#define EPA_MAX_VERTICES 64
#define EPA_MAX_FACES (EPA_MAX_VERTICES*2)
#define EPA_MAX_ITERATIONS 255
#define EPA_ACCURACY ((btScalar)0.0001)
#define EPA_FALLBACK (10*EPA_ACCURACY)
#define EPA_PLANE_EPS ((btScalar)0.00001)
#define EPA_INSIDE_EPS ((btScalar)0.01)
// Shorthands
typedef unsigned int U;
typedef unsigned char U1;
struct convexShape
{
void* shape;
SpuConvexPolyhedronVertexData* convexData;
int shapeType;
float margin;
};
// MinkowskiDiff
struct MinkowskiDiff
{
convexShape m_shapes[2];
btMatrix3x3 m_toshape1;
btTransform m_toshape0;
btVector3 (btConvexShape::*Ls)(const btVector3&) const;
void EnableMargin(bool enable)
{
#if 0
if(enable)
Ls=&btConvexShape::localGetSupportingVertex;
else
Ls=&btConvexShape::localGetSupportingVertexWithoutMargin;
#endif
}
inline btVector3 Support0(const btVector3& d) const
{
btVector3 sp = localGetSupportingVertexWithoutMargin (m_shapes[0].shapeType, m_shapes[0].shape, d, m_shapes[0].convexData);
btVector3 ud = d;
ud.normalize();
sp += ud * m_shapes[0].margin;
return sp;
// return(((m_shapes[0])->*(Ls))(d));
}
inline btVector3 Support1(const btVector3& d) const
{
btVector3 nd = m_toshape1*d;
btVector3 ud = nd;
ud.normalize ();
btVector3 sp = localGetSupportingVertexWithoutMargin (m_shapes[1].shapeType, m_shapes[1].shape, nd, m_shapes[1].convexData);
sp += ud * m_shapes[1].margin;
return m_toshape0 * sp;
// return(m_toshape0*((m_shapes[1])->*(Ls))(m_toshape1*d));
}
inline btVector3 Support(const btVector3& d) const
{
return(Support0(d)-Support1(-d));
}
btVector3 Support(const btVector3& d,U index) const
{
if(index)
return(Support1(d));
else
return(Support0(d));
}
};
typedef MinkowskiDiff tShape;
// GJK
struct GJK
{
/* Types */
struct sSV
{
btVector3 d,w;
};
struct sSimplex
{
sSV* c[4];
btScalar p[4];
U rank;
};
struct eStatus { enum _ {
Valid,
Inside,
Failed };};
/* Fields */
tShape m_shape;
btVector3 m_ray;
btScalar m_distance;
sSimplex m_simplices[2];
sSV m_store[4];
sSV* m_free[4];
U m_nfree;
U m_current;
sSimplex* m_simplex;
eStatus::_ m_status;
/* Methods */
GJK()
{
Initialize();
}
void Initialize()
{
m_ray = btVector3(0,0,0);
m_nfree = 0;
m_status = eStatus::Failed;
m_current = 0;
m_distance = 0;
}
eStatus::_ Evaluate(const tShape& shapearg,const btVector3& guess)
{
U iterations=0;
btScalar sqdist=0;
btScalar alpha=0;
btVector3 lastw[4];
U clastw=0;
/* Initialize solver */
m_free[0] = &m_store[0];
m_free[1] = &m_store[1];
m_free[2] = &m_store[2];
m_free[3] = &m_store[3];
m_nfree = 4;
m_current = 0;
m_status = eStatus::Valid;
m_shape = shapearg;
m_distance = 0;
/* Initialize simplex */
m_simplices[0].rank = 0;
m_ray = guess;
const btScalar sqrl= m_ray.length2();
appendvertice(m_simplices[0],sqrl>0?-m_ray:btVector3(1,0,0));
m_simplices[0].p[0] = 1;
m_ray = m_simplices[0].c[0]->w;
sqdist = sqrl;
lastw[0] =
lastw[1] =
lastw[2] =
lastw[3] = m_ray;
/* Loop */
do {
const U next=1-m_current;
sSimplex& cs=m_simplices[m_current];
sSimplex& ns=m_simplices[next];
/* Check zero */
const btScalar rl=m_ray.length();
if(rl<GJK_MIN_DISTANCE)
{/* Touching or inside */
m_status=eStatus::Inside;
break;
}
/* Append new vertice in -'v' direction */
appendvertice(cs,-m_ray);
const btVector3& w=cs.c[cs.rank-1]->w;
bool found=false;
for(U i=0;i<4;++i)
{
if((w-lastw[i]).length2()<GJK_DUPLICATED_EPS)
{ found=true;break; }
}
if(found)
{/* Return old simplex */
removevertice(m_simplices[m_current]);
break;
}
else
{/* Update lastw */
lastw[clastw=(clastw+1)&3]=w;
}
/* Check for termination */
const btScalar omega=dot(m_ray,w)/rl;
alpha=btMax(omega,alpha);
if(((rl-alpha)-(GJK_ACCURARY*rl))<=0)
{/* Return old simplex */
removevertice(m_simplices[m_current]);
break;
}
/* Reduce simplex */
btScalar weights[4];
U mask=0;
switch(cs.rank)
{
case 2: sqdist=projectorigin( cs.c[0]->w,
cs.c[1]->w,
weights,mask);break;
case 3: sqdist=projectorigin( cs.c[0]->w,
cs.c[1]->w,
cs.c[2]->w,
weights,mask);break;
case 4: sqdist=projectorigin( cs.c[0]->w,
cs.c[1]->w,
cs.c[2]->w,
cs.c[3]->w,
weights,mask);break;
}
if(sqdist>=0)
{/* Valid */
ns.rank = 0;
m_ray = btVector3(0,0,0);
m_current = next;
for(U i=0,ni=cs.rank;i<ni;++i)
{
if(mask&(1<<i))
{
ns.c[ns.rank] = cs.c[i];
ns.p[ns.rank++] = weights[i];
m_ray += cs.c[i]->w*weights[i];
}
else
{
m_free[m_nfree++] = cs.c[i];
}
}
if(mask==15) m_status=eStatus::Inside;
}
else
{/* Return old simplex */
removevertice(m_simplices[m_current]);
break;
}
m_status=((++iterations)<GJK_MAX_ITERATIONS)?m_status:eStatus::Failed;
} while(m_status==eStatus::Valid);
m_simplex=&m_simplices[m_current];
switch(m_status)
{
case eStatus::Valid: m_distance=m_ray.length();break;
case eStatus::Inside: m_distance=0;break;
}
return(m_status);
}
bool EncloseOrigin()
{
switch(m_simplex->rank)
{
case 1:
{
for(U i=0;i<3;++i)
{
btVector3 axis=btVector3(0,0,0);
axis[i]=1;
appendvertice(*m_simplex, axis);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
appendvertice(*m_simplex,-axis);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
}
}
break;
case 2:
{
const btVector3 d=m_simplex->c[1]->w-m_simplex->c[0]->w;
for(U i=0;i<3;++i)
{
btVector3 axis=btVector3(0,0,0);
axis[i]=1;
if(btFabs(dot(axis,d))>0)
{
const btVector3 p=cross(d,axis);
appendvertice(*m_simplex, p);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
appendvertice(*m_simplex,-p);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
}
}
}
break;
case 3:
{
const btVector3 n=cross(m_simplex->c[1]->w-m_simplex->c[0]->w,
m_simplex->c[2]->w-m_simplex->c[0]->w);
const btScalar l=n.length();
if(l>0)
{
appendvertice(*m_simplex,n);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
appendvertice(*m_simplex,-n);
if(EncloseOrigin()) return(true);
removevertice(*m_simplex);
}
}
break;
case 4:
{
if(btFabs(det( m_simplex->c[0]->w-m_simplex->c[3]->w,
m_simplex->c[1]->w-m_simplex->c[3]->w,
m_simplex->c[2]->w-m_simplex->c[3]->w))>0)
return(true);
}
break;
}
return(false);
}
/* Internals */
void getsupport(const btVector3& d,sSV& sv) const
{
sv.d = d/d.length();
sv.w = m_shape.Support(sv.d);
}
void removevertice(sSimplex& simplex)
{
m_free[m_nfree++]=simplex.c[--simplex.rank];
}
void appendvertice(sSimplex& simplex,const btVector3& v)
{
simplex.p[simplex.rank]=0;
simplex.c[simplex.rank]=m_free[--m_nfree];
getsupport(v,*simplex.c[simplex.rank++]);
}
static btScalar det(const btVector3& a,const btVector3& b,const btVector3& c)
{
return( a.y()*b.z()*c.x()+a.z()*b.x()*c.y()-
a.x()*b.z()*c.y()-a.y()*b.x()*c.z()+
a.x()*b.y()*c.z()-a.z()*b.y()*c.x());
}
static btScalar projectorigin( const btVector3& a,
const btVector3& b,
btScalar* w,U& m)
{
const btVector3 d=b-a;
const btScalar l=d.length2();
if(l>GJK_SIMPLEX2_EPS)
{
const btScalar t(l>0?-dot(a,d)/l:0);
if(t>=1) { w[0]=0;w[1]=1;m=2;return(b.length2()); }
else if(t<=0) { w[0]=1;w[1]=0;m=1;return(a.length2()); }
else { w[0]=1-(w[1]=t);m=3;return((a+d*t).length2()); }
}
return(-1);
}
static btScalar projectorigin( const btVector3& a,
const btVector3& b,
const btVector3& c,
btScalar* w,U& m)
{
static const U imd3[]={1,2,0};
const btVector3* vt[]={&a,&b,&c};
const btVector3 dl[]={a-b,b-c,c-a};
const btVector3 n=cross(dl[0],dl[1]);
const btScalar l=n.length2();
if(l>GJK_SIMPLEX3_EPS)
{
btScalar mindist=-1;
btScalar subw[2] = { btScalar(0.0f), btScalar(0.0f) };
U subm;
for(U i=0;i<3;++i)
{
if(dot(*vt[i],cross(dl[i],n))>0)
{
const U j=imd3[i];
const btScalar subd(projectorigin(*vt[i],*vt[j],subw,subm));
if((mindist<0)||(subd<mindist))
{
mindist = subd;
m = ((subm&1)?1<<i:0)+((subm&2)?1<<j:0);
w[i] = subw[0];
w[j] = subw[1];
w[imd3[j]] = 0;
}
}
}
if(mindist<0)
{
const btScalar d=dot(a,n);
const btScalar s=btSqrt(l);
const btVector3 p=n*(d/l);
mindist = p.length2();
m = 7;
w[0] = (cross(dl[1],b-p)).length()/s;
w[1] = (cross(dl[2],c-p)).length()/s;
w[2] = 1-(w[0]+w[1]);
}
return(mindist);
}
return(-1);
}
static btScalar projectorigin( const btVector3& a,
const btVector3& b,
const btVector3& c,
const btVector3& d,
btScalar* w,U& m)
{
static const U imd3[]={1,2,0};
const btVector3* vt[]={&a,&b,&c,&d};
const btVector3 dl[]={a-d,b-d,c-d};
const btScalar vl=det(dl[0],dl[1],dl[2]);
const bool ng=(vl*dot(a,cross(b-c,a-b)))<=0;
if(ng&&(btFabs(vl)>GJK_SIMPLEX4_EPS))
{
btScalar mindist=-1;
btScalar subw[3];
U subm;
for(U i=0;i<3;++i)
{
const U j=imd3[i];
const btScalar s=vl*dot(d,cross(dl[i],dl[j]));
if(s>0)
{
const btScalar subd=projectorigin(*vt[i],*vt[j],d,subw,subm);
if((mindist<0)||(subd<mindist))
{
mindist = subd;
m = (subm&1?1<<i:0)+
(subm&2?1<<j:0)+
(subm&4?8:0);
w[i] = subw[0];
w[j] = subw[1];
w[imd3[j]] = 0;
w[3] = subw[2];
}
}
}
if(mindist<0)
{
mindist = 0;
m = 15;
w[0] = det(c,b,d)/vl;
w[1] = det(a,c,d)/vl;
w[2] = det(b,a,d)/vl;
w[3] = 1-(w[0]+w[1]+w[2]);
}
return(mindist);
}
return(-1);
}
};
// EPA
struct EPA
{
/* Types */
typedef GJK::sSV sSV;
struct sFace
{
btVector3 n;
btScalar d;
btScalar p;
sSV* c[3];
sFace* f[3];
sFace* l[2];
U1 e[3];
U1 pass;
};
struct sList
{
sFace* root;
U count;
sList() : root(0),count(0) {}
};
struct sHorizon
{
sFace* cf;
sFace* ff;
U nf;
sHorizon() : cf(0),ff(0),nf(0) {}
};
struct eStatus { enum _ {
Valid,
Touching,
Degenerated,
NonConvex,
InvalidHull,
OutOfFaces,
OutOfVertices,
AccuraryReached,
FallBack,
Failed, };};
/* Fields */
eStatus::_ m_status;
GJK::sSimplex m_result;
btVector3 m_normal;
btScalar m_depth;
sSV m_sv_store[EPA_MAX_VERTICES];
sFace m_fc_store[EPA_MAX_FACES];
U m_nextsv;
sList m_hull;
sList m_stock;
/* Methods */
EPA()
{
Initialize();
}
void Initialize()
{
m_status = eStatus::Failed;
m_normal = btVector3(0,0,0);
m_depth = 0;
m_nextsv = 0;
for(U i=0;i<EPA_MAX_FACES;++i)
{
append(m_stock,&m_fc_store[EPA_MAX_FACES-i-1]);
}
}
eStatus::_ Evaluate(GJK& gjk,const btVector3& guess)
{
GJK::sSimplex& simplex=*gjk.m_simplex;
if((simplex.rank>1)&&gjk.EncloseOrigin())
{
/* Clean up */
while(m_hull.root)
{
sFace* f(m_hull.root);
remove(m_hull,f);
append(m_stock,f);
}
m_status = eStatus::Valid;
m_nextsv = 0;
/* Orient simplex */
if(gjk.det( simplex.c[0]->w-simplex.c[3]->w,
simplex.c[1]->w-simplex.c[3]->w,
simplex.c[2]->w-simplex.c[3]->w)<0)
{
btSwap(simplex.c[0],simplex.c[1]);
btSwap(simplex.p[0],simplex.p[1]);
}
/* Build initial hull */
sFace* tetra[]={newface(simplex.c[0],simplex.c[1],simplex.c[2],true),
newface(simplex.c[1],simplex.c[0],simplex.c[3],true),
newface(simplex.c[2],simplex.c[1],simplex.c[3],true),
newface(simplex.c[0],simplex.c[2],simplex.c[3],true)};
if(m_hull.count==4)
{
sFace* best=findbest();
sFace outer=*best;
U pass=0;
U iterations=0;
bind(tetra[0],0,tetra[1],0);
bind(tetra[0],1,tetra[2],0);
bind(tetra[0],2,tetra[3],0);
bind(tetra[1],1,tetra[3],2);
bind(tetra[1],2,tetra[2],1);
bind(tetra[2],2,tetra[3],1);
m_status=eStatus::Valid;
for(;iterations<EPA_MAX_ITERATIONS;++iterations)
{
if(m_nextsv<EPA_MAX_VERTICES)
{
sHorizon horizon;
sSV* w=&m_sv_store[m_nextsv++];
bool valid=true;
best->pass = (U1)(++pass);
gjk.getsupport(best->n,*w);
const btScalar wdist=dot(best->n,w->w)-best->d;
if(wdist>EPA_ACCURACY)
{
for(U j=0;(j<3)&&valid;++j)
{
valid&=expand( pass,w,
best->f[j],best->e[j],
horizon);
}
if(valid&&(horizon.nf>=3))
{
bind(horizon.cf,1,horizon.ff,2);
remove(m_hull,best);
append(m_stock,best);
best=findbest();
if(best->p>=outer.p) outer=*best;
} else { m_status=eStatus::InvalidHull;break; }
} else { m_status=eStatus::AccuraryReached;break; }
} else { m_status=eStatus::OutOfVertices;break; }
}
const btVector3 projection=outer.n*outer.d;
m_normal = outer.n;
m_depth = outer.d;
m_result.rank = 3;
m_result.c[0] = outer.c[0];
m_result.c[1] = outer.c[1];
m_result.c[2] = outer.c[2];
m_result.p[0] = cross( outer.c[1]->w-projection,
outer.c[2]->w-projection).length();
m_result.p[1] = cross( outer.c[2]->w-projection,
outer.c[0]->w-projection).length();
m_result.p[2] = cross( outer.c[0]->w-projection,
outer.c[1]->w-projection).length();
const btScalar sum=m_result.p[0]+m_result.p[1]+m_result.p[2];
m_result.p[0] /= sum;
m_result.p[1] /= sum;
m_result.p[2] /= sum;
return(m_status);
}
}
/* Fallback */
m_status = eStatus::FallBack;
m_normal = -guess;
const btScalar nl=m_normal.length();
if(nl>0)
m_normal = m_normal/nl;
else
m_normal = btVector3(1,0,0);
m_depth = 0;
m_result.rank=1;
m_result.c[0]=simplex.c[0];
m_result.p[0]=1;
return(m_status);
}
sFace* newface(sSV* a,sSV* b,sSV* c,bool forced)
{
if(m_stock.root)
{
sFace* face=m_stock.root;
remove(m_stock,face);
append(m_hull,face);
face->pass = 0;
face->c[0] = a;
face->c[1] = b;
face->c[2] = c;
face->n = cross(b->w-a->w,c->w-a->w);
const btScalar l=face->n.length();
const bool v=l>EPA_ACCURACY;
face->p = btMin(btMin(
dot(a->w,cross(face->n,a->w-b->w)),
dot(b->w,cross(face->n,b->w-c->w))),
dot(c->w,cross(face->n,c->w-a->w))) /
(v?l:1);
face->p = face->p>=-EPA_INSIDE_EPS?0:face->p;
if(v)
{
face->d = dot(a->w,face->n)/l;
face->n /= l;
if(forced||(face->d>=-EPA_PLANE_EPS))
{
return(face);
} else m_status=eStatus::NonConvex;
} else m_status=eStatus::Degenerated;
remove(m_hull,face);
append(m_stock,face);
return(0);
}
m_status=m_stock.root?eStatus::OutOfVertices:eStatus::OutOfFaces;
return(0);
}
sFace* findbest()
{
sFace* minf=m_hull.root;
btScalar mind=minf->d*minf->d;
btScalar maxp=minf->p;
for(sFace* f=minf->l[1];f;f=f->l[1])
{
const btScalar sqd=f->d*f->d;
if((f->p>=maxp)&&(sqd<mind))
{
minf=f;
mind=sqd;
maxp=f->p;
}
}
return(minf);
}
bool expand(U pass,sSV* w,sFace* f,U e,sHorizon& horizon)
{
static const U i1m3[]={1,2,0};
static const U i2m3[]={2,0,1};
if(f->pass!=pass)
{
const U e1=i1m3[e];
if((dot(f->n,w->w)-f->d)<-EPA_PLANE_EPS)
{
sFace* nf=newface(f->c[e1],f->c[e],w,false);
if(nf)
{
bind(nf,0,f,e);
if(horizon.cf) bind(horizon.cf,1,nf,2); else horizon.ff=nf;
horizon.cf=nf;
++horizon.nf;
return(true);
}
}
else
{
const U e2=i2m3[e];
f->pass = (U1)pass;
if( expand(pass,w,f->f[e1],f->e[e1],horizon)&&
expand(pass,w,f->f[e2],f->e[e2],horizon))
{
remove(m_hull,f);
append(m_stock,f);
return(true);
}
}
}
return(false);
}
static inline void bind(sFace* fa,U ea,sFace* fb,U eb)
{
fa->e[ea]=(U1)eb;fa->f[ea]=fb;
fb->e[eb]=(U1)ea;fb->f[eb]=fa;
}
static inline void append(sList& list,sFace* face)
{
face->l[0] = 0;
face->l[1] = list.root;
if(list.root) list.root->l[0]=face;
list.root = face;
++list.count;
}
static inline void remove(sList& list,sFace* face)
{
if(face->l[1]) face->l[1]->l[0]=face->l[0];
if(face->l[0]) face->l[0]->l[1]=face->l[1];
if(face==list.root) list.root=face->l[1];
--list.count;
}
};
//
static void Initialize(void* shapeA,
SpuConvexPolyhedronVertexData* convexDataA,
int shapeTypeA,
float marginA,
const btTransform& wtrs0,
void* shapeB,
SpuConvexPolyhedronVertexData* convexDataB,
int shapeTypeB,
float marginB,
const btTransform& wtrs1,
SpuGjkEpaSolver2::sResults& results,
tShape& shape,
bool withmargins)
{
/* Results */
results.witnesses[0] =
results.witnesses[1] = btVector3(0,0,0);
results.status = SpuGjkEpaSolver2::sResults::Separated;
/* Shape */
shape.m_shapes[0].margin = marginA;
shape.m_shapes[0].shape = shapeA;
shape.m_shapes[0].shapeType = shapeTypeA;
shape.m_shapes[0].convexData = convexDataA;
shape.m_shapes[1].margin = marginB;
shape.m_shapes[1].shape = shapeB;
shape.m_shapes[1].shapeType = shapeTypeB;
shape.m_shapes[1].convexData = convexDataB;
shape.m_toshape1 = wtrs1.getBasis().transposeTimes(wtrs0.getBasis());
shape.m_toshape0 = wtrs0.inverseTimes(wtrs1);
shape.EnableMargin(withmargins);
}
}
//
// Api
//
using namespace gjkepa2_spu_impl;
//
int SpuGjkEpaSolver2::StackSizeRequirement()
{
return(sizeof(GJK)+sizeof(EPA));
}
//
bool SpuGjkEpaSolver2::Penetration(void* shapeA,
SpuConvexPolyhedronVertexData* convexDataA,
int shapeTypeA,
float marginA,
const btTransform& wtrs0,
void* shapeB,
SpuConvexPolyhedronVertexData* convexDataB,
int shapeTypeB,
float marginB,
const btTransform& wtrs1,
const btVector3& guess,
sResults& results)
{
tShape shape;
Initialize(shapeA, convexDataA, shapeTypeA, marginA, wtrs0, shapeB, convexDataB, shapeTypeB, marginB, wtrs1, results,shape,true);
GJK gjk;
GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,-guess);
switch(gjk_status)
{
case GJK::eStatus::Inside:
{
EPA epa;
EPA::eStatus::_ epa_status=epa.Evaluate(gjk,-guess);
if(epa_status!=EPA::eStatus::Failed)
{
btVector3 w0=btVector3(0,0,0);
for(U i=0;i<epa.m_result.rank;++i)
{
w0+=shape.Support(epa.m_result.c[i]->d,0)*epa.m_result.p[i];
}
results.status = sResults::Penetrating;
results.witnesses[0] = wtrs0*w0;
results.witnesses[1] = wtrs0*(w0-epa.m_normal*epa.m_depth);
return(true);
} else results.status=sResults::EPA_Failed;
}
break;
case GJK::eStatus::Failed:
results.status=sResults::GJK_Failed;
break;
}
return(false);
}
/* Symbols cleanup */
#undef GJK_MAX_ITERATIONS
#undef GJK_ACCURARY
#undef GJK_MIN_DISTANCE
#undef GJK_DUPLICATED_EPS
#undef GJK_SIMPLEX2_EPS
#undef GJK_SIMPLEX3_EPS
#undef GJK_SIMPLEX4_EPS
#undef EPA_MAX_VERTICES
#undef EPA_MAX_FACES
#undef EPA_MAX_ITERATIONS
#undef EPA_ACCURACY
#undef EPA_FALLBACK
#undef EPA_PLANE_EPS
#undef EPA_INSIDE_EPS

View File

@@ -0,0 +1,38 @@
#ifndef _68DA1F85_90B7_4bb0_A705_83B4040A75C6_
#define _68DA1F85_90B7_4bb0_A705_83B4040A75C6_
#include "BulletCollision/CollisionShapes/btConvexShape.h"
///btGjkEpaSolver contributed under zlib by Nathanael Presson
struct SpuGjkEpaSolver2
{
struct sResults
{
enum eStatus
{
Separated, /* Shapes doesnt penetrate */
Penetrating, /* Shapes are penetrating */
GJK_Failed, /* GJK phase fail, no big issue, shapes are probably just 'touching' */
EPA_Failed, /* EPA phase fail, bigger problem, need to save parameters, and debug */
} status;
btVector3 witnesses[2];
btVector3 normal;
};
static int StackSizeRequirement();
static bool Penetration(void* shapeA,
SpuConvexPolyhedronVertexData* convexDataA,
int shapeTypeA,
float marginA,
const btTransform& xformA,
void* shapeB,
SpuConvexPolyhedronVertexData* convexDataB,
int shapeTypeB,
float marginB,
const btTransform& xformB,
const btVector3& guess,
sResults& results);
};
#endif

View File

@@ -0,0 +1,311 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuGjkPairDetector.h"
#include "SpuConvexPenetrationDepthSolver.h"
#include "SpuCollisionShapes.h"
#if defined(DEBUG) || defined (_DEBUG)
#include <stdio.h> //for debug printf
#ifdef __SPU__
#include <spu_printf.h>
#define printf spu_printf
#endif //__SPU__
#endif
//must be above the machine epsilon
#define REL_ERROR2 btScalar(1.0e-6)
//temp globals, to improve GJK/EPA/penetration calculations
int gSpuNumDeepPenetrationChecks = 0;
int gSpuNumGjkChecks = 0;
SpuGjkPairDetector::SpuGjkPairDetector(void* objectA,void* objectB,int shapeTypeA, int shapeTypeB, float marginA,float marginB,SpuVoronoiSimplexSolver* simplexSolver, const SpuConvexPenetrationDepthSolver* penetrationDepthSolver)
:m_cachedSeparatingAxis(float(0.),float(0.),float(1.)),
m_penetrationDepthSolver(penetrationDepthSolver),
m_simplexSolver(simplexSolver),
m_minkowskiA(objectA),
m_minkowskiB(objectB),
m_shapeTypeA(shapeTypeA),
m_shapeTypeB(shapeTypeB),
m_marginA(marginA),
m_marginB(marginB),
m_ignoreMargin(false),
m_lastUsedMethod(-1),
m_catchDegeneracies(1)
{
}
void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuContactResult& output)
{
btScalar distance=btScalar(0.);
btVector3 normalInB(btScalar(0.),btScalar(0.),btScalar(0.));
btVector3 pointOnA,pointOnB;
btTransform localTransA = input.m_transformA;
btTransform localTransB = input.m_transformB;
btVector3 positionOffset = (localTransA.getOrigin() + localTransB.getOrigin()) * btScalar(0.5);
localTransA.getOrigin() -= positionOffset;
localTransB.getOrigin() -= positionOffset;
btScalar marginA = m_marginA;
btScalar marginB = m_marginB;
gSpuNumGjkChecks++;
//for CCD we don't use margins
if (m_ignoreMargin)
{
marginA = btScalar(0.);
marginB = btScalar(0.);
}
m_curIter = 0;
int gGjkMaxIter = 1000;//this is to catch invalid input, perhaps check for #NaN?
m_cachedSeparatingAxis.setValue(0,1,0);
bool isValid = false;
bool checkSimplex = false;
bool checkPenetration = true;
m_degenerateSimplex = 0;
m_lastUsedMethod = -1;
{
btScalar squaredDistance = SIMD_INFINITY;
btScalar delta = btScalar(0.);
btScalar margin = marginA + marginB;
m_simplexSolver->reset();
for ( ; ; )
//while (true)
{
btVector3 seperatingAxisInA = (-m_cachedSeparatingAxis)* input.m_transformA.getBasis();
btVector3 seperatingAxisInB = m_cachedSeparatingAxis* input.m_transformB.getBasis();
// btVector3 pInA = m_minkowskiA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
// btVector3 qInB = m_minkowskiB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
btVector3 pInA = localGetSupportingVertexWithoutMargin(m_shapeTypeA, m_minkowskiA, seperatingAxisInA,input.m_convexVertexData[0]);//, &featureIndexA);
btVector3 qInB = localGetSupportingVertexWithoutMargin(m_shapeTypeB, m_minkowskiB, seperatingAxisInB,input.m_convexVertexData[1]);//, &featureIndexB);
btPoint3 pWorld = localTransA(pInA);
btPoint3 qWorld = localTransB(qInB);
btVector3 w = pWorld - qWorld;
delta = m_cachedSeparatingAxis.dot(w);
// potential exit, they don't overlap
if ((delta > btScalar(0.0)) && (delta * delta > squaredDistance * input.m_maximumDistanceSquared))
{
checkPenetration = false;
break;
}
//exit 0: the new point is already in the simplex, or we didn't come any closer
if (m_simplexSolver->inSimplex(w))
{
m_degenerateSimplex = 1;
checkSimplex = true;
break;
}
// are we getting any closer ?
btScalar f0 = squaredDistance - delta;
btScalar f1 = squaredDistance * REL_ERROR2;
if (f0 <= f1)
{
if (f0 <= btScalar(0.))
{
m_degenerateSimplex = 2;
}
checkSimplex = true;
break;
}
//add current vertex to simplex
m_simplexSolver->addVertex(w, pWorld, qWorld);
//calculate the closest point to the origin (update vector v)
if (!m_simplexSolver->closest(m_cachedSeparatingAxis))
{
m_degenerateSimplex = 3;
checkSimplex = true;
break;
}
btScalar previousSquaredDistance = squaredDistance;
squaredDistance = m_cachedSeparatingAxis.length2();
//redundant m_simplexSolver->compute_points(pointOnA, pointOnB);
//are we getting any closer ?
if (previousSquaredDistance - squaredDistance <= SIMD_EPSILON * previousSquaredDistance)
{
m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
checkSimplex = true;
break;
}
//degeneracy, this is typically due to invalid/uninitialized worldtransforms for a btCollisionObject
if (m_curIter++ > gGjkMaxIter)
{
#if defined(DEBUG) || defined (_DEBUG)
printf("SpuGjkPairDetector maxIter exceeded:%i\n",m_curIter);
printf("sepAxis=(%f,%f,%f), squaredDistance = %f, shapeTypeA=%i,shapeTypeB=%i\n",
m_cachedSeparatingAxis.getX(),
m_cachedSeparatingAxis.getY(),
m_cachedSeparatingAxis.getZ(),
squaredDistance,
m_shapeTypeA,
m_shapeTypeB);
#endif
break;
}
bool check = (!m_simplexSolver->fullSimplex());
//bool check = (!m_simplexSolver->fullSimplex() && squaredDistance > SIMD_EPSILON * m_simplexSolver->maxVertex());
if (!check)
{
//do we need this backup_closest here ?
m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
break;
}
}
if (checkSimplex)
{
m_simplexSolver->compute_points(pointOnA, pointOnB);
normalInB = pointOnA-pointOnB;
btScalar lenSqr = m_cachedSeparatingAxis.length2();
//valid normal
if (lenSqr < 0.0001)
{
m_degenerateSimplex = 5;
}
if (lenSqr > SIMD_EPSILON*SIMD_EPSILON)
{
btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
normalInB *= rlen; //normalize
btScalar s = btSqrt(squaredDistance);
btAssert(s > btScalar(0.0));
pointOnA -= m_cachedSeparatingAxis * (marginA / s);
pointOnB += m_cachedSeparatingAxis * (marginB / s);
distance = ((btScalar(1.)/rlen) - margin);
isValid = true;
m_lastUsedMethod = 1;
} else
{
m_lastUsedMethod = 2;
}
}
bool catchDegeneratePenetrationCase =
(m_catchDegeneracies && m_penetrationDepthSolver && m_degenerateSimplex && ((distance+margin) < 0.01));
//if (checkPenetration && !isValid)
if (checkPenetration && (!isValid || catchDegeneratePenetrationCase ))
{
//penetration case
//if there is no way to handle penetrations, bail out
if (m_penetrationDepthSolver)
{
// Penetration depth case.
btVector3 tmpPointOnA,tmpPointOnB;
gSpuNumDeepPenetrationChecks++;
bool isValid2 = m_penetrationDepthSolver->calcPenDepth(
*m_simplexSolver,
m_minkowskiA,m_minkowskiB,
m_shapeTypeA, m_shapeTypeB,
marginA, marginB,
localTransA,localTransB,
m_cachedSeparatingAxis, tmpPointOnA, tmpPointOnB,
0,input.m_stackAlloc,input.m_convexVertexData[0], input.m_convexVertexData[1]
);
if (isValid2)
{
btVector3 tmpNormalInB = tmpPointOnB-tmpPointOnA;
btScalar lenSqr = tmpNormalInB.length2();
if (lenSqr > (SIMD_EPSILON*SIMD_EPSILON))
{
tmpNormalInB /= btSqrt(lenSqr);
btScalar distance2 = -(tmpPointOnA-tmpPointOnB).length();
//only replace valid penetrations when the result is deeper (check)
if (!isValid || (distance2 < distance))
{
distance = distance2;
pointOnA = tmpPointOnA;
pointOnB = tmpPointOnB;
normalInB = tmpNormalInB;
isValid = true;
m_lastUsedMethod = 3;
} else
{
}
} else
{
//isValid = false;
m_lastUsedMethod = 4;
}
} else
{
m_lastUsedMethod = 5;
}
}
}
}
if (isValid)
{
#ifdef __SPU__
//spu_printf("distance\n");
#endif //__SPU__
output.addContactPoint(
normalInB,
pointOnB+positionOffset,
distance);
//printf("gjk add:%f",distance);
}
}

View File

@@ -0,0 +1,93 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_GJK_PAIR_DETECTOR_H
#define SPU_GJK_PAIR_DETECTOR_H
#include "SpuContactResult.h"
#include "SpuVoronoiSimplexSolver.h"
class SpuConvexPenetrationDepthSolver;
/// btGjkPairDetector uses GJK to implement the btDiscreteCollisionDetectorInterface
class SpuGjkPairDetector
{
btVector3 m_cachedSeparatingAxis;
const SpuConvexPenetrationDepthSolver* m_penetrationDepthSolver;
SpuVoronoiSimplexSolver* m_simplexSolver;
void* m_minkowskiA;
void* m_minkowskiB;
int m_shapeTypeA;
int m_shapeTypeB;
float m_marginA;
float m_marginB;
bool m_ignoreMargin;
public:
//some debugging to fix degeneracy problems
int m_lastUsedMethod;
int m_curIter;
int m_degenerateSimplex;
int m_catchDegeneracies;
SpuGjkPairDetector(void* objectA,void* objectB,int m_shapeTypeA, int m_shapeTypeB, float marginA, float marginB, SpuVoronoiSimplexSolver* simplexSolver, const SpuConvexPenetrationDepthSolver* penetrationDepthSolver);
virtual ~SpuGjkPairDetector() {};
virtual void getClosestPoints(const SpuClosestPointInput& input,SpuContactResult& output);
void setMinkowskiA(void* minkA)
{
m_minkowskiA = minkA;
}
void setMinkowskiB(void* minkB)
{
m_minkowskiB = minkB;
}
void setCachedSeperatingAxis(const btVector3& seperatingAxis)
{
m_cachedSeparatingAxis = seperatingAxis;
}
void setPenetrationDepthSolver(SpuConvexPenetrationDepthSolver* penetrationDepthSolver)
{
m_penetrationDepthSolver = penetrationDepthSolver;
}
///don't use setIgnoreMargin, it's for Bullet's internal use
void setIgnoreMargin(bool ignoreMargin)
{
m_ignoreMargin = ignoreMargin;
}
};
#endif //SPU_GJK_PAIR_DETECTOR_H

View File

@@ -0,0 +1,19 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

View File

@@ -0,0 +1,347 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuMinkowskiPenetrationDepthSolver.h"
#include "SpuVoronoiSimplexSolver.h"
#include "SpuGjkPairDetector.h"
#include "SpuContactResult.h"
#include "SpuPreferredPenetrationDirections.h"
#include "SpuCollisionShapes.h"
#define NUM_UNITSPHERE_POINTS 42
static btVector3 sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] =
{
btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
};
bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
btTransform& transA,const btTransform& transB,
btVector3& v, btPoint3& pa, btPoint3& pb,
class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
struct SpuConvexPolyhedronVertexData* convexVertexDataA,
struct SpuConvexPolyhedronVertexData* convexVertexDataB
) const
{
(void)stackAlloc;
(void)v;
struct btIntermediateResult : public SpuContactResult
{
btIntermediateResult():m_hasResult(false)
{
}
btVector3 m_normalOnBInWorld;
btVector3 m_pointInWorld;
btScalar m_depth;
bool m_hasResult;
virtual void setShapeIdentifiers(int partId0,int index0, int partId1,int index1)
{
(void)partId0;
(void)index0;
(void)partId1;
(void)index1;
}
void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
{
m_normalOnBInWorld = normalOnBInWorld;
m_pointInWorld = pointInWorld;
m_depth = depth;
m_hasResult = true;
}
};
//just take fixed number of orientation, and sample the penetration depth in that direction
btScalar minProj = btScalar(1e30);
btVector3 minNorm;
btVector3 minVertex;
btVector3 minA,minB;
btVector3 seperatingAxisInA,seperatingAxisInB;
btVector3 pInA,qInB,pWorld,qWorld,w;
//#define USE_BATCHED_SUPPORT 1
#ifdef USE_BATCHED_SUPPORT
btVector3 supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
btVector3 supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
btVector3 seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
btVector3 seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
int i;
int numSampleDirections = NUM_UNITSPHERE_POINTS;
for (i=0;i<numSampleDirections;i++)
{
const btVector3& norm = sPenetrationDirections[i];
seperatingAxisInABatch[i] = (-norm) * transA.getBasis() ;
seperatingAxisInBBatch[i] = norm * transB.getBasis() ;
}
{
int numPDA = convexA->getNumPreferredPenetrationDirections();
if (numPDA)
{
for (int i=0;i<numPDA;i++)
{
btVector3 norm;
convexA->getPreferredPenetrationDirection(i,norm);
norm = transA.getBasis() * norm;
sPenetrationDirections[numSampleDirections] = norm;
seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
numSampleDirections++;
}
}
}
{
int numPDB = convexB->getNumPreferredPenetrationDirections();
if (numPDB)
{
for (int i=0;i<numPDB;i++)
{
btVector3 norm;
convexB->getPreferredPenetrationDirection(i,norm);
norm = transB.getBasis() * norm;
sPenetrationDirections[numSampleDirections] = norm;
seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
numSampleDirections++;
}
}
}
convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections);
convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections);
for (i=0;i<numSampleDirections;i++)
{
const btVector3& norm = sPenetrationDirections[i];
seperatingAxisInA = seperatingAxisInABatch[i];
seperatingAxisInB = seperatingAxisInBBatch[i];
pInA = supportVerticesABatch[i];
qInB = supportVerticesBBatch[i];
pWorld = transA(pInA);
qWorld = transB(qInB);
w = qWorld - pWorld;
btScalar delta = norm.dot(w);
//find smallest delta
if (delta < minProj)
{
minProj = delta;
minNorm = norm;
minA = pWorld;
minB = qWorld;
}
}
#else
int numSampleDirections = NUM_UNITSPHERE_POINTS;
///this is necessary, otherwise the normal is not correct, and sphere will rotate forever on a sloped triangle mesh
#define DO_PREFERRED_DIRECTIONS 1
#ifdef DO_PREFERRED_DIRECTIONS
{
int numPDA = spuGetNumPreferredPenetrationDirections(shapeTypeA,convexA);
if (numPDA)
{
for (int i=0;i<numPDA;i++)
{
btVector3 norm;
spuGetPreferredPenetrationDirection(shapeTypeA,convexA,i,norm);
norm = transA.getBasis() * norm;
sPenetrationDirections[numSampleDirections] = norm;
numSampleDirections++;
}
}
}
{
int numPDB = spuGetNumPreferredPenetrationDirections(shapeTypeB,convexB);
if (numPDB)
{
for (int i=0;i<numPDB;i++)
{
btVector3 norm;
spuGetPreferredPenetrationDirection(shapeTypeB,convexB,i,norm);
norm = transB.getBasis() * norm;
sPenetrationDirections[numSampleDirections] = norm;
numSampleDirections++;
}
}
}
#endif //DO_PREFERRED_DIRECTIONS
for (int i=0;i<numSampleDirections;i++)
{
const btVector3& norm = sPenetrationDirections[i];
seperatingAxisInA = (-norm)* transA.getBasis();
seperatingAxisInB = norm* transB.getBasis();
pInA = localGetSupportingVertexWithoutMargin(shapeTypeA, convexA, seperatingAxisInA,convexVertexDataA);//, NULL);
qInB = localGetSupportingVertexWithoutMargin(shapeTypeB, convexB, seperatingAxisInB,convexVertexDataB);//, NULL);
// pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
// qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
pWorld = transA(pInA);
qWorld = transB(qInB);
w = qWorld - pWorld;
btScalar delta = norm.dot(w);
//find smallest delta
if (delta < minProj)
{
minProj = delta;
minNorm = norm;
minA = pWorld;
minB = qWorld;
}
}
#endif //USE_BATCHED_SUPPORT
//add the margins
minA += minNorm*marginA;
minB -= minNorm*marginB;
//no penetration
if (minProj < btScalar(0.))
return false;
minProj += (marginA + marginB) + btScalar(1.00);
//#define DEBUG_DRAW 1
#ifdef DEBUG_DRAW
if (debugDraw)
{
btVector3 color(0,1,0);
debugDraw->drawLine(minA,minB,color);
color = btVector3 (1,1,1);
btVector3 vec = minB-minA;
btScalar prj2 = minNorm.dot(vec);
debugDraw->drawLine(minA,minA+(minNorm*minProj),color);
}
#endif //DEBUG_DRAW
SpuGjkPairDetector gjkdet(convexA,convexB,shapeTypeA,shapeTypeB,marginA,marginB,&simplexSolver,0);
btScalar offsetDist = minProj;
btVector3 offset = minNorm * offsetDist;
SpuClosestPointInput input;
input.m_convexVertexData[0] = convexVertexDataA;
input.m_convexVertexData[1] = convexVertexDataB;
btVector3 newOrg = transA.getOrigin() + offset;
btTransform displacedTrans = transA;
displacedTrans.setOrigin(newOrg);
input.m_transformA = displacedTrans;
input.m_transformB = transB;
input.m_maximumDistanceSquared = btScalar(1e30);//minProj;
btIntermediateResult res;
gjkdet.getClosestPoints(input,res);
btScalar correctedMinNorm = minProj - res.m_depth;
//the penetration depth is over-estimated, relax it
btScalar penetration_relaxation= btScalar(1.);
minNorm*=penetration_relaxation;
if (res.m_hasResult)
{
pa = res.m_pointInWorld - minNorm * correctedMinNorm;
pb = res.m_pointInWorld;
#ifdef DEBUG_DRAW
if (debugDraw)
{
btVector3 color(1,0,0);
debugDraw->drawLine(pa,pb,color);
}
#endif//DEBUG_DRAW
} else {
// could not seperate shapes
btAssert (false);
}
return res.m_hasResult;
}

View File

@@ -0,0 +1,47 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
#include "SpuConvexPenetrationDepthSolver.h"
class btStackAlloc;
class btIDebugDraw;
class SpuVoronoiSimplexSolver;
///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
class SpuMinkowskiPenetrationDepthSolver : public SpuConvexPenetrationDepthSolver
{
public:
virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
btTransform& transA,const btTransform& transB,
btVector3& v, btPoint3& pa, btPoint3& pb,
class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
struct SpuConvexPolyhedronVertexData* convexVertexDataA,
struct SpuConvexPolyhedronVertexData* convexVertexDataB
) const;
};
#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H

View File

@@ -0,0 +1,70 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
#define _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
int spuGetNumPreferredPenetrationDirections(int shapeType, void* shape)
{
switch (shapeType)
{
case TRIANGLE_SHAPE_PROXYTYPE:
{
return 2;
//spu_printf("2\n");
break;
}
default:
{
#if __ASSERT
spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
#endif // __ASSERT
}
}
return 0;
}
void spuGetPreferredPenetrationDirection(int shapeType, void* shape, int index, btVector3& penetrationVector)
{
switch (shapeType)
{
case TRIANGLE_SHAPE_PROXYTYPE:
{
btVector3* vertices = (btVector3*)shape;
///calcNormal
penetrationVector = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
penetrationVector.normalize();
if (index)
penetrationVector *= btScalar(-1.);
break;
}
default:
{
#if __ASSERT
spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
#endif // __ASSERT
}
}
}
#endif //_SPU_PREFERRED_PENETRATION_DIRECTIONS_H

View File

@@ -0,0 +1,606 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Elsevier CDROM license agreements grants nonexclusive license to use the software
for any purpose, commercial or non-commercial as long as the following credit is included
identifying the original source of the software:
Parts of the source are "from the book Real-Time Collision Detection by
Christer Ericson, published by Morgan Kaufmann Publishers,
(c) 2005 Elsevier Inc."
*/
#include "SpuVoronoiSimplexSolver.h"
#include <assert.h>
#include <stdio.h>
#define VERTA 0
#define VERTB 1
#define VERTC 2
#define VERTD 3
#define CATCH_DEGENERATE_TETRAHEDRON 1
void SpuVoronoiSimplexSolver::removeVertex(int index)
{
assert(m_numVertices>0);
m_numVertices--;
m_simplexVectorW[index] = m_simplexVectorW[m_numVertices];
m_simplexPointsP[index] = m_simplexPointsP[m_numVertices];
m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices];
}
void SpuVoronoiSimplexSolver::reduceVertices (const SpuUsageBitfield& usedVerts)
{
if ((numVertices() >= 4) && (!usedVerts.usedVertexD))
removeVertex(3);
if ((numVertices() >= 3) && (!usedVerts.usedVertexC))
removeVertex(2);
if ((numVertices() >= 2) && (!usedVerts.usedVertexB))
removeVertex(1);
if ((numVertices() >= 1) && (!usedVerts.usedVertexA))
removeVertex(0);
}
//clear the simplex, remove all the vertices
void SpuVoronoiSimplexSolver::reset()
{
m_cachedValidClosest = false;
m_numVertices = 0;
m_needsUpdate = true;
m_lastW = btVector3(btScalar(1e30),btScalar(1e30),btScalar(1e30));
m_cachedBC.reset();
}
//add a vertex
void SpuVoronoiSimplexSolver::addVertex(const btVector3& w, const btPoint3& p, const btPoint3& q)
{
m_lastW = w;
m_needsUpdate = true;
m_simplexVectorW[m_numVertices] = w;
m_simplexPointsP[m_numVertices] = p;
m_simplexPointsQ[m_numVertices] = q;
m_numVertices++;
}
bool SpuVoronoiSimplexSolver::updateClosestVectorAndPoints()
{
if (m_needsUpdate)
{
m_cachedBC.reset();
m_needsUpdate = false;
switch (numVertices())
{
case 0:
m_cachedValidClosest = false;
break;
case 1:
{
m_cachedP1 = m_simplexPointsP[0];
m_cachedP2 = m_simplexPointsQ[0];
m_cachedV = m_cachedP1-m_cachedP2; //== m_simplexVectorW[0]
m_cachedBC.reset();
m_cachedBC.setBarycentricCoordinates(btScalar(1.),btScalar(0.),btScalar(0.),btScalar(0.));
m_cachedValidClosest = m_cachedBC.isValid();
break;
};
case 2:
{
//closest point origin from line segment
const btVector3& from = m_simplexVectorW[0];
const btVector3& to = m_simplexVectorW[1];
btVector3 nearest;
btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.));
btVector3 diff = p - from;
btVector3 v = to - from;
btScalar t = v.dot(diff);
if (t > 0) {
btScalar dotVV = v.dot(v);
if (t < dotVV) {
t /= dotVV;
diff -= t*v;
m_cachedBC.m_usedVertices.usedVertexA = true;
m_cachedBC.m_usedVertices.usedVertexB = true;
} else {
t = 1;
diff -= v;
//reduce to 1 point
m_cachedBC.m_usedVertices.usedVertexB = true;
}
} else
{
t = 0;
//reduce to 1 point
m_cachedBC.m_usedVertices.usedVertexA = true;
}
m_cachedBC.setBarycentricCoordinates(1-t,t);
nearest = from + t*v;
m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]);
m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]);
m_cachedV = m_cachedP1 - m_cachedP2;
reduceVertices(m_cachedBC.m_usedVertices);
m_cachedValidClosest = m_cachedBC.isValid();
break;
}
case 3:
{
//closest point origin from triangle
btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.));
const btVector3& a = m_simplexVectorW[0];
const btVector3& b = m_simplexVectorW[1];
const btVector3& c = m_simplexVectorW[2];
closestPtPointTriangle(p,a,b,c,m_cachedBC);
m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] +
m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] +
m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] +
m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3];
m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] +
m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] +
m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] +
m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3];
m_cachedV = m_cachedP1-m_cachedP2;
reduceVertices (m_cachedBC.m_usedVertices);
m_cachedValidClosest = m_cachedBC.isValid();
break;
}
case 4:
{
btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.));
const btVector3& a = m_simplexVectorW[0];
const btVector3& b = m_simplexVectorW[1];
const btVector3& c = m_simplexVectorW[2];
const btVector3& d = m_simplexVectorW[3];
bool hasSeperation = closestPtPointTetrahedron(p,a,b,c,d,m_cachedBC);
if (hasSeperation)
{
m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] +
m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] +
m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] +
m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3];
m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] +
m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] +
m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] +
m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3];
m_cachedV = m_cachedP1-m_cachedP2;
reduceVertices (m_cachedBC.m_usedVertices);
} else
{
// printf("sub distance got penetration\n");
if (m_cachedBC.m_degenerate)
{
m_cachedValidClosest = false;
} else
{
m_cachedValidClosest = true;
//degenerate case == false, penetration = true + zero
m_cachedV.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
}
break;
}
m_cachedValidClosest = m_cachedBC.isValid();
//closest point origin from tetrahedron
break;
}
default:
{
m_cachedValidClosest = false;
}
};
}
return m_cachedValidClosest;
}
//return/calculate the closest vertex
bool SpuVoronoiSimplexSolver::closest(btVector3& v)
{
bool succes = updateClosestVectorAndPoints();
v = m_cachedV;
return succes;
}
btScalar SpuVoronoiSimplexSolver::maxVertex()
{
int i, numverts = numVertices();
btScalar maxV = btScalar(0.);
for (i=0;i<numverts;i++)
{
btScalar curLen2 = m_simplexVectorW[i].length2();
if (maxV < curLen2)
maxV = curLen2;
}
return maxV;
}
//return the current simplex
int SpuVoronoiSimplexSolver::getSimplex(btPoint3 *pBuf, btPoint3 *qBuf, btVector3 *yBuf) const
{
int i;
for (i=0;i<numVertices();i++)
{
yBuf[i] = m_simplexVectorW[i];
pBuf[i] = m_simplexPointsP[i];
qBuf[i] = m_simplexPointsQ[i];
}
return numVertices();
}
bool SpuVoronoiSimplexSolver::inSimplex(const btVector3& w)
{
bool found = false;
int i, numverts = numVertices();
//btScalar maxV = btScalar(0.);
//w is in the current (reduced) simplex
for (i=0;i<numverts;i++)
{
if (m_simplexVectorW[i] == w)
found = true;
}
//check in case lastW is already removed
if (w == m_lastW)
return true;
return found;
}
void SpuVoronoiSimplexSolver::backup_closest(btVector3& v)
{
v = m_cachedV;
}
bool SpuVoronoiSimplexSolver::emptySimplex() const
{
return (numVertices() == 0);
}
void SpuVoronoiSimplexSolver::compute_points(btPoint3& p1, btPoint3& p2)
{
updateClosestVectorAndPoints();
p1 = m_cachedP1;
p2 = m_cachedP2;
}
bool SpuVoronoiSimplexSolver::closestPtPointTriangle(const btPoint3& p, const btPoint3& a, const btPoint3& b, const btPoint3& c,SpuSubSimplexClosestResult& result)
{
result.m_usedVertices.reset();
// Check if P in vertex region outside A
btVector3 ab = b - a;
btVector3 ac = c - a;
btVector3 ap = p - a;
btScalar d1 = ab.dot(ap);
btScalar d2 = ac.dot(ap);
if (d1 <= btScalar(0.0) && d2 <= btScalar(0.0))
{
result.m_closestPointOnSimplex = a;
result.m_usedVertices.usedVertexA = true;
result.setBarycentricCoordinates(1,0,0);
return true;// a; // barycentric coordinates (1,0,0)
}
// Check if P in vertex region outside B
btVector3 bp = p - b;
btScalar d3 = ab.dot(bp);
btScalar d4 = ac.dot(bp);
if (d3 >= btScalar(0.0) && d4 <= d3)
{
result.m_closestPointOnSimplex = b;
result.m_usedVertices.usedVertexB = true;
result.setBarycentricCoordinates(0,1,0);
return true; // b; // barycentric coordinates (0,1,0)
}
// Check if P in edge region of AB, if so return projection of P onto AB
btScalar vc = d1*d4 - d3*d2;
if (vc <= btScalar(0.0) && d1 >= btScalar(0.0) && d3 <= btScalar(0.0)) {
btScalar v = d1 / (d1 - d3);
result.m_closestPointOnSimplex = a + v * ab;
result.m_usedVertices.usedVertexA = true;
result.m_usedVertices.usedVertexB = true;
result.setBarycentricCoordinates(1-v,v,0);
return true;
//return a + v * ab; // barycentric coordinates (1-v,v,0)
}
// Check if P in vertex region outside C
btVector3 cp = p - c;
btScalar d5 = ab.dot(cp);
btScalar d6 = ac.dot(cp);
if (d6 >= btScalar(0.0) && d5 <= d6)
{
result.m_closestPointOnSimplex = c;
result.m_usedVertices.usedVertexC = true;
result.setBarycentricCoordinates(0,0,1);
return true;//c; // barycentric coordinates (0,0,1)
}
// Check if P in edge region of AC, if so return projection of P onto AC
btScalar vb = d5*d2 - d1*d6;
if (vb <= btScalar(0.0) && d2 >= btScalar(0.0) && d6 <= btScalar(0.0)) {
btScalar w = d2 / (d2 - d6);
result.m_closestPointOnSimplex = a + w * ac;
result.m_usedVertices.usedVertexA = true;
result.m_usedVertices.usedVertexC = true;
result.setBarycentricCoordinates(1-w,0,w);
return true;
//return a + w * ac; // barycentric coordinates (1-w,0,w)
}
// Check if P in edge region of BC, if so return projection of P onto BC
btScalar va = d3*d6 - d5*d4;
if (va <= btScalar(0.0) && (d4 - d3) >= btScalar(0.0) && (d5 - d6) >= btScalar(0.0)) {
btScalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
result.m_closestPointOnSimplex = b + w * (c - b);
result.m_usedVertices.usedVertexB = true;
result.m_usedVertices.usedVertexC = true;
result.setBarycentricCoordinates(0,1-w,w);
return true;
// return b + w * (c - b); // barycentric coordinates (0,1-w,w)
}
// P inside face region. Compute Q through its barycentric coordinates (u,v,w)
btScalar denom = btScalar(1.0) / (va + vb + vc);
btScalar v = vb * denom;
btScalar w = vc * denom;
result.m_closestPointOnSimplex = a + ab * v + ac * w;
result.m_usedVertices.usedVertexA = true;
result.m_usedVertices.usedVertexB = true;
result.m_usedVertices.usedVertexC = true;
result.setBarycentricCoordinates(1-v-w,v,w);
return true;
// return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = btScalar(1.0) - v - w
}
/// Test if point p and d lie on opposite sides of plane through abc
int SpuVoronoiSimplexSolver::pointOutsideOfPlane(const btPoint3& p, const btPoint3& a, const btPoint3& b, const btPoint3& c, const btPoint3& d)
{
btVector3 normal = (b-a).cross(c-a);
btScalar signp = (p - a).dot(normal); // [AP AB AC]
btScalar signd = (d - a).dot( normal); // [AD AB AC]
#ifdef CATCH_DEGENERATE_TETRAHEDRON
#ifdef BT_USE_DOUBLE_PRECISION
if (signd * signd < (btScalar(1e-8) * btScalar(1e-8)))
{
return -1;
}
#else
if (signd * signd < (btScalar(1e-4) * btScalar(1e-4)))
{
// printf("affine dependent/degenerate\n");//
return -1;
}
#endif
#endif
// Points on opposite sides if expression signs are opposite
return signp * signd < btScalar(0.);
}
bool SpuVoronoiSimplexSolver::closestPtPointTetrahedron(const btPoint3& p, const btPoint3& a, const btPoint3& b, const btPoint3& c, const btPoint3& d, SpuSubSimplexClosestResult& finalResult)
{
SpuSubSimplexClosestResult tempResult;
// Start out assuming point inside all halfspaces, so closest to itself
finalResult.m_closestPointOnSimplex = p;
finalResult.m_usedVertices.reset();
finalResult.m_usedVertices.usedVertexA = true;
finalResult.m_usedVertices.usedVertexB = true;
finalResult.m_usedVertices.usedVertexC = true;
finalResult.m_usedVertices.usedVertexD = true;
int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d);
int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b);
int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c);
int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a);
if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0)
{
finalResult.m_degenerate = true;
return false;
}
if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC)
{
return false;
}
btScalar bestSqDist = FLT_MAX;
// If point outside face abc then compute closest point on abc
if (pointOutsideABC)
{
closestPtPointTriangle(p, a, b, c,tempResult);
btPoint3 q = tempResult.m_closestPointOnSimplex;
btScalar sqDist = (q - p).dot( q - p);
// Update best closest point if (squared) distance is less than current best
if (sqDist < bestSqDist) {
bestSqDist = sqDist;
finalResult.m_closestPointOnSimplex = q;
//convert result bitmask!
finalResult.m_usedVertices.reset();
finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB;
finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC;
finalResult.setBarycentricCoordinates(
tempResult.m_barycentricCoords[VERTA],
tempResult.m_barycentricCoords[VERTB],
tempResult.m_barycentricCoords[VERTC],
0
);
}
}
// Repeat test for face acd
if (pointOutsideACD)
{
closestPtPointTriangle(p, a, c, d,tempResult);
btPoint3 q = tempResult.m_closestPointOnSimplex;
//convert result bitmask!
btScalar sqDist = (q - p).dot( q - p);
if (sqDist < bestSqDist)
{
bestSqDist = sqDist;
finalResult.m_closestPointOnSimplex = q;
finalResult.m_usedVertices.reset();
finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB;
finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC;
finalResult.setBarycentricCoordinates(
tempResult.m_barycentricCoords[VERTA],
0,
tempResult.m_barycentricCoords[VERTB],
tempResult.m_barycentricCoords[VERTC]
);
}
}
// Repeat test for face adb
if (pointOutsideADB)
{
closestPtPointTriangle(p, a, d, b,tempResult);
btPoint3 q = tempResult.m_closestPointOnSimplex;
//convert result bitmask!
btScalar sqDist = (q - p).dot( q - p);
if (sqDist < bestSqDist)
{
bestSqDist = sqDist;
finalResult.m_closestPointOnSimplex = q;
finalResult.m_usedVertices.reset();
finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;
finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC;
finalResult.setBarycentricCoordinates(
tempResult.m_barycentricCoords[VERTA],
tempResult.m_barycentricCoords[VERTC],
0,
tempResult.m_barycentricCoords[VERTB]
);
}
}
// Repeat test for face bdc
if (pointOutsideBDC)
{
closestPtPointTriangle(p, b, d, c,tempResult);
btPoint3 q = tempResult.m_closestPointOnSimplex;
//convert result bitmask!
btScalar sqDist = (q - p).dot( q - p);
if (sqDist < bestSqDist)
{
bestSqDist = sqDist;
finalResult.m_closestPointOnSimplex = q;
finalResult.m_usedVertices.reset();
finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA;
finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;
finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC;
finalResult.setBarycentricCoordinates(
0,
tempResult.m_barycentricCoords[VERTA],
tempResult.m_barycentricCoords[VERTC],
tempResult.m_barycentricCoords[VERTB]
);
}
}
//help! we ended up full !
if (finalResult.m_usedVertices.usedVertexA &&
finalResult.m_usedVertices.usedVertexB &&
finalResult.m_usedVertices.usedVertexC &&
finalResult.m_usedVertices.usedVertexD)
{
return true;
}
return true;
}

View File

@@ -0,0 +1,156 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPUVoronoiSimplexSolver_H
#define SPUVoronoiSimplexSolver_H
#include <LinearMath/btTransform.h>
#include <LinearMath/btPoint3.h>
#define VORONOI_SIMPLEX_MAX_VERTS 5
struct SpuUsageBitfield{
SpuUsageBitfield()
{
reset();
}
void reset()
{
usedVertexA = false;
usedVertexB = false;
usedVertexC = false;
usedVertexD = false;
}
unsigned short usedVertexA : 1;
unsigned short usedVertexB : 1;
unsigned short usedVertexC : 1;
unsigned short usedVertexD : 1;
unsigned short unused1 : 1;
unsigned short unused2 : 1;
unsigned short unused3 : 1;
unsigned short unused4 : 1;
};
struct SpuSubSimplexClosestResult
{
btVector3 m_closestPointOnSimplex;
//MASK for m_usedVertices
//stores the simplex vertex-usage, using the MASK,
// if m_usedVertices & MASK then the related vertex is used
SpuUsageBitfield m_usedVertices;
float m_barycentricCoords[4];
bool m_degenerate;
void reset()
{
m_degenerate = false;
setBarycentricCoordinates();
m_usedVertices.reset();
}
bool isValid()
{
bool valid = (m_barycentricCoords[0] >= float(0.)) &&
(m_barycentricCoords[1] >= float(0.)) &&
(m_barycentricCoords[2] >= float(0.)) &&
(m_barycentricCoords[3] >= float(0.));
return valid;
}
void setBarycentricCoordinates(float a=float(0.),float b=float(0.),float c=float(0.),float d=float(0.))
{
m_barycentricCoords[0] = a;
m_barycentricCoords[1] = b;
m_barycentricCoords[2] = c;
m_barycentricCoords[3] = d;
}
};
/// SpuVoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin.
/// Can be used with GJK, as an alternative to Johnson distance algorithm.
class SpuVoronoiSimplexSolver
{
public:
int m_numVertices;
btVector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS];
btVector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS];
btVector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS];
int m_VertexIndexA[VORONOI_SIMPLEX_MAX_VERTS];
int m_VertexIndexB[VORONOI_SIMPLEX_MAX_VERTS];
btVector3 m_cachedP1;
btVector3 m_cachedP2;
btVector3 m_cachedV;
btVector3 m_lastW;
bool m_cachedValidClosest;
SpuSubSimplexClosestResult m_cachedBC;
bool m_needsUpdate;
void removeVertex(int index);
void reduceVertices (const SpuUsageBitfield& usedVerts);
bool updateClosestVectorAndPoints();
bool closestPtPointTetrahedron(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, SpuSubSimplexClosestResult& finalResult);
int pointOutsideOfPlane(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d);
bool closestPtPointTriangle(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c,SpuSubSimplexClosestResult& result);
int RemoveDegenerateIndices (const int *inArray, int numIndices, int *outArray) const;
public:
void reset();
void addVertex(const btVector3& w, const btPoint3& p, const btPoint3& q);
bool closest(btVector3& v);
btScalar maxVertex();
bool fullSimplex() const
{
return (m_numVertices == 4);
}
int getSimplex(btVector3 *pBuf, btVector3 *qBuf, btVector3 *yBuf) const;
bool inSimplex(const btVector3& w);
void backup_closest(btVector3& v) ;
bool emptySimplex() const ;
void compute_points(btVector3& p1, btVector3& p2) ;
int numVertices() const
{
return m_numVertices;
}
};
#endif //SpuVoronoiSimplexSolver

View File

@@ -0,0 +1 @@
Empty placeholder for future Libspe2 SPU task

View File

@@ -0,0 +1,604 @@
/*
Bullet Continuous Collision Detection and Physics Library - Parallel solver
Copyright (c) 2007 Starbreeze Studios
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Written by: Marten Svanfeldt
*/
#include "SpuParallelSolver.h"
//#include "SpuFakeDma.h"
#include "SpuSync.h"
#include "LinearMath/btVector3.h"
#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
#include "BulletDynamics/Dynamics/btRigidBody.h"
#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
#include "LinearMath/btMinMax.h"
#include "BulletCollision/CollisionShapes/btCollisionShape.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
#include "LinearMath/btQuickprof.h"
#include "SpuSolverTask/SpuParallellSolverTask.h"
#include <stdio.h>
enum
{
PARALLEL_SOLVER_BODIES_PER_TASK = 64,
PARALLEL_SOLVER_CELLS_PER_TASK = SPU_HASH_NUMCELLS >> 3
};
//-- Hash handling
static void recordDependency(SpuSolverHash* hash, unsigned int i, unsigned int j)
{
hash->m_dependencyMatrix[i][j >> 5] |= (1 << (j & 31));
hash->m_dependencyMatrix[j][i >> 5] |= (1 << (i & 31));
}
// Clear the given hash
static void clearHash (SpuSolverHash* hash)
{
size_t hashSize = sizeof(SpuSolverHash);
memset(hash, 0, hashSize);
int i;
// Setup basic dependency
for ( i = 0; i < SPU_HASH_NUMCELLS; ++i)
{
hash->m_dependencyMatrix[i][i >> 5] |= (1 << (i & 31));
}
// Set some ones to "unused cells"
for ( i = SPU_HASH_WORDWIDTH-SPU_HASH_NUMUNUSEDBITS; i < SPU_HASH_WORDWIDTH; ++i)
{
hash->m_currentMask[0][SPU_HASH_NUMCELLDWORDS-1] |= (1 << i);
}
}
/*
static bool getDependency(SpuSolverHash* hash, unsigned int i, unsigned int j)
{
return (hash->m_dependencyMatrix[i][j >> 5] & (1 << (j & 31))) != 0;
}
*/
static unsigned int getObjectIndex (btCollisionObject* object)
{
btVector3 center = object->getWorldTransform().getOrigin();
int cx = (int)floorf(center.x() / SPU_HASH_PHYSSIZE);
int cy = (int)floorf(center.y() / SPU_HASH_PHYSSIZE);
int cz = (int)floorf(center.z() / SPU_HASH_PHYSSIZE);
return spuGetHashCellIndex(cx, cy, cz);
};
btParallelSequentialImpulseSolver::btParallelSequentialImpulseSolver (btThreadSupportInterface* threadIf, int maxOutstandingTasks)
: m_numberOfContacts(0), m_taskScheduler (threadIf, maxOutstandingTasks)
{
m_solverHash = new SpuSolverHash;
clearHash(m_solverHash);
}
btParallelSequentialImpulseSolver::~btParallelSequentialImpulseSolver ()
{
delete m_solverHash;
}
void btParallelSequentialImpulseSolver::prepareSolve(int numBodies, int numManifolds)
{
m_sortedManifolds.reserve(numManifolds);
m_allObjects.reserve(numBodies);
}
btScalar btParallelSequentialImpulseSolver::solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints, const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher)
{
BT_PROFILE("parallel_solveGroup");
if (!numManifolds && !numConstraints)
return 0;
int i;
///refresh contact points is not needed anymore, it has been moved into the processCollision detection part.
#ifdef FORCE_REFESH_CONTACT_MANIFOLDS
for ( i = 0; i < numManifolds; ++i)
{
btPersistentManifold* currManifold = manifold[i];
btRigidBody* rb0 = (btRigidBody*)currManifold->getBody0();
btRigidBody* rb1 = (btRigidBody*)currManifold->getBody1();
currManifold->refreshContactPoints(rb0->getCenterOfMassTransform(),rb1->getCenterOfMassTransform());
}
#endif //FORCE_REFESH_CONTACT_MANIFOLDS
// Record and mark the manifolds to the cells
for ( i = 0; i < numManifolds; ++i)
{
// Compute a hash cell for this manifold
btPersistentManifold* currManifold = manifold[i];
btCollisionObject *ownerObject, *otherObject;
btRigidBody* rb0 = (btRigidBody*)currManifold->getBody0();
btRigidBody* rb1 = (btRigidBody*)currManifold->getBody1();
if (rb0->getIslandTag() >= 0)
{
ownerObject = rb0;
otherObject = rb1;
}
else
{
ownerObject = rb1;
otherObject = rb0;
}
// Save the cell
unsigned int ownerCellIdx = getObjectIndex(ownerObject);
ManifoldCellHolder holder = {ownerCellIdx, currManifold};
m_sortedManifolds.push_back(holder);
m_solverHash->m_Hash[ownerCellIdx].m_numManifolds++;
// Record dependency
if (rb0->getIslandTag() >= 0 && rb1->getIslandTag() >= 0)
{
unsigned int otherCellIdx = getObjectIndex(otherObject);
recordDependency(m_solverHash, ownerCellIdx, otherCellIdx);
}
// Save statistics
int numContacts = currManifold->getNumContacts();
m_solverHash->m_Hash[ownerCellIdx].m_numContacts += numContacts;
m_numberOfContacts += numContacts;
}
// Record and mark constraints to the cells
for ( i = 0; i < numConstraints; ++i)
{
// Compute a hash cell for this manifold
btTypedConstraint* currConstraint = constraints[i];
if (!constraintTypeSupported(currConstraint->getConstraintType()))
continue;
btCollisionObject *ownerObject, *otherObject;
btRigidBody* rb0 = &currConstraint->getRigidBodyA();
btRigidBody* rb1 = &currConstraint->getRigidBodyB();
if (rb0->getIslandTag() >= 0)
{
ownerObject = rb0;
otherObject = rb1;
}
else
{
ownerObject = rb1;
otherObject = rb0;
}
// Save the cell
unsigned int ownerCellIdx = getObjectIndex(ownerObject);
ConstraintCellHolder holder = {ownerCellIdx, currConstraint->getConstraintType(), currConstraint};
m_sortedConstraints.push_back(holder);
m_solverHash->m_Hash[ownerCellIdx].m_numConstraints++;
// Record dependency
if (rb0 && rb1 && rb0->getIslandTag() >= 0 && rb1->getIslandTag() >= 0)
{
unsigned int otherCellIdx = getObjectIndex(otherObject);
recordDependency(m_solverHash, ownerCellIdx, otherCellIdx);
}
}
// Save all RBs
for ( i = 0; i < numBodies; ++i)
{
btCollisionObject* obj = bodies[i];
//unsigned int cellIdx = getObjectIndex(obj);
btRigidBody* rb = btRigidBody::upcast(obj);
m_allObjects.push_back(rb);
}
return 0;
}
template<typename T>
class CellHolderPredicate
{
public:
SIMD_FORCE_INLINE bool operator() ( const T& lhs, const T& rhs )
{
return lhs.m_hashCellIndex < rhs.m_hashCellIndex;
}
};
/*static void printDependencyMatrix(SpuSolverHash* hash)
{
for (int r = 0; r < SPU_HASH_NUMCELLS; ++r)
{
for (int c = 0; c < SPU_HASH_NUMCELLS; ++c)
{
if (getDependency(hash, r, c))
{
printf("1");
}
else
{
printf("0");
}
}
printf("\n");
}
printf("\n");
fflush(stdout);
}
*/
// Solver caches
btAlignedObjectArray<SpuSolverBody> solverBodyPool_persist;
btAlignedObjectArray<uint32_t> solverBodyOffsetList_persist;
btAlignedObjectArray<SpuSolverInternalConstraint> solverInternalConstraintPool_persist;
btAlignedObjectArray<SpuSolverConstraint> solverConstraintPool_persist;
void btParallelSequentialImpulseSolver::allSolved (const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc)
{
BT_PROFILE("parallel_allSolved");
if (!m_numberOfContacts && !m_sortedConstraints.size())
{
m_sortedManifolds.clear();
m_sortedConstraints.clear();
m_allObjects.clear();
clearHash(m_solverHash);
return;
}
//printDependencyMatrix(m_solverHash);
// Sort the manifolds list
int numManifolds = m_sortedManifolds.size();
m_sortedManifolds.quickSort(CellHolderPredicate<ManifoldCellHolder>());
// Sort the constraint list
int numConstraints = m_sortedConstraints.size();
m_sortedConstraints.quickSort(CellHolderPredicate<ConstraintCellHolder>());
// Sort the body list
int numBodies = m_allObjects.size();
// Reassign the hash offset
uint32_t emptyCellMask[SPU_HASH_NUMCELLDWORDS] = {0};
int numBodyOffsets = 0;
{
int manifoldRunner = 0;
int bodyOffsetRunner = 0;
int internalConstraintRunner = 0;
int constraintRunner = 0;
for (int i = 0; i < SPU_HASH_NUMCELLS; ++i)
{
bool empty = true;
SpuSolverHashCell& hashCell = m_solverHash->m_Hash[i];
hashCell.m_solverBodyOffsetListOffset = bodyOffsetRunner;
if (hashCell.m_numManifolds)
{
hashCell.m_manifoldListOffset = manifoldRunner;
manifoldRunner += hashCell.m_numManifolds;
bodyOffsetRunner += hashCell.m_numManifolds*2;
}
if (hashCell.m_numContacts)
{
hashCell.m_internalConstraintListOffset = internalConstraintRunner*3;
internalConstraintRunner += hashCell.m_numContacts;
empty = false;
}
if (hashCell.m_numConstraints)
{
hashCell.m_constraintListOffset = constraintRunner;
constraintRunner += hashCell.m_numConstraints;
bodyOffsetRunner += hashCell.m_numConstraints*2;
empty = false;
}
emptyCellMask[i >> 5] |= (empty ? (1 << (i&31)) : 0);
// Align the bodyOffsetRunner to a whole number of 4 for right alignment in the list
bodyOffsetRunner = (bodyOffsetRunner+3)&~0x3;
}
numBodyOffsets = bodyOffsetRunner;
}
// Setup rigid bodies
// Allocate temporary data
solverBodyPool_persist.resize(numBodies + numManifolds + numConstraints);
SpuSolverBody* solverBodyPool = &solverBodyPool_persist[0];
solverBodyOffsetList_persist.resize(numBodyOffsets);
uint32_t* solverBodyOffsetList = &solverBodyOffsetList_persist[0];
solverInternalConstraintPool_persist.resize(m_numberOfContacts*3);
SpuSolverInternalConstraint* solverInternalConstraintPool = &solverInternalConstraintPool_persist[0];
solverConstraintPool_persist.resize(numConstraints);
SpuSolverConstraint* solverConstraintPool = &solverConstraintPool_persist[0];
// Setup all the moving rigid bodies
{
BT_PROFILE("setup moving rigidbodies");
int bodiesPerTask = PARALLEL_SOLVER_BODIES_PER_TASK;
int bodiesToSchedule = numBodies;
int startBody = 0;
while (bodiesToSchedule > 0)
{
// Schedule a bunch of hash cells
int numBodiesInTask = bodiesToSchedule > bodiesPerTask ? bodiesPerTask : bodiesToSchedule;
SpuSolverTaskDesc* desc = m_taskScheduler.getTask();
desc->m_solverCommand = CMD_SOLVER_SETUP_BODIES;
desc->m_solverData.m_solverHash = m_solverHash;
desc->m_solverData.m_solverBodyList = solverBodyPool;
desc->m_commandData.m_bodySetup.m_startBody = startBody;
desc->m_commandData.m_bodySetup.m_numBodies = numBodiesInTask;
desc->m_commandData.m_bodySetup.m_rbList = &m_allObjects[0];
m_taskScheduler.issueTask();
bodiesToSchedule -= numBodiesInTask;
startBody += numBodiesInTask;
}
m_taskScheduler.flushTasks();
}
// Manifold setup
{
int cellsPerTask = PARALLEL_SOLVER_CELLS_PER_TASK;
int cellsToSchedule = SPU_HASH_NUMCELLS;
int startCell = 0;
while (cellsToSchedule > 0)
{
int numCellsInTask = cellsToSchedule > cellsPerTask ? cellsPerTask : cellsToSchedule;
SpuSolverTaskDesc* desc = m_taskScheduler.getTask();
desc->m_solverCommand = CMD_SOLVER_MANIFOLD_SETUP;
desc->m_solverData.m_solverHash = m_solverHash;
desc->m_solverData.m_solverBodyList = solverBodyPool;
desc->m_solverData.m_solverBodyOffsetList = solverBodyOffsetList;
desc->m_solverData.m_solverInternalConstraintList = solverInternalConstraintPool;
desc->m_solverData.m_solverConstraintList = solverConstraintPool;
desc->m_commandData.m_manifoldSetup.m_startCell = startCell;
desc->m_commandData.m_manifoldSetup.m_numCells = numCellsInTask;
desc->m_commandData.m_manifoldSetup.m_numBodies = numBodies;
desc->m_commandData.m_manifoldSetup.m_numManifolds = numManifolds;
desc->m_commandData.m_manifoldSetup.m_manifoldHolders = &m_sortedManifolds[0];
desc->m_commandData.m_manifoldSetup.m_constraintHolders = &m_sortedConstraints[0];
desc->m_commandData.m_manifoldSetup.m_solverInfo = info;
m_taskScheduler.issueTask();
cellsToSchedule -= numCellsInTask;
startCell += numCellsInTask;
}
m_taskScheduler.flushTasks();
}
{
BT_PROFILE("parallel_solve_iterations");
btSpinlock::SpinVariable* spinVar = (btSpinlock::SpinVariable*)btAlignedAlloc(sizeof(btSpinlock::SpinVariable), 128);
for (int iter = 0; iter < info.m_numIterations; ++iter)
{
btSpinlock lock (spinVar);
lock.Init();
// Clear the "processed cells" part of the hash
memcpy(m_solverHash->m_currentMask[0], emptyCellMask, sizeof(uint32_t)*SPU_HASH_NUMCELLDWORDS);
for (int task = 0; task < m_taskScheduler.getMaxOutstandingTasks(); ++task)
{
SpuSolverTaskDesc* desc = m_taskScheduler.getTask();
desc->m_solverCommand = CMD_SOLVER_SOLVE_ITERATE;
desc->m_solverData.m_solverHash = m_solverHash;
desc->m_solverData.m_solverBodyList = solverBodyPool;
desc->m_solverData.m_solverBodyOffsetList = solverBodyOffsetList;
desc->m_solverData.m_solverInternalConstraintList = solverInternalConstraintPool;
desc->m_solverData.m_solverConstraintList = solverConstraintPool;
desc->m_commandData.m_iterate.m_spinLockVar = spinVar;
m_taskScheduler.issueTask();
}
m_taskScheduler.flushTasks();
}
btAlignedFree((void*)spinVar);
}
// Write back velocity
{
int bodiesPerTask = PARALLEL_SOLVER_BODIES_PER_TASK;
int bodiesToSchedule = numBodies;
int startBody = 0;
while (bodiesToSchedule > 0)
{
// Schedule a bunch of hash cells
int numBodiesInTask = bodiesToSchedule > bodiesPerTask ? bodiesPerTask : bodiesToSchedule;
SpuSolverTaskDesc* desc = m_taskScheduler.getTask();
desc->m_solverCommand = CMD_SOLVER_COPYBACK_BODIES;
desc->m_solverData.m_solverHash = m_solverHash;
desc->m_solverData.m_solverBodyList = solverBodyPool;
desc->m_commandData.m_bodyCopyback.m_startBody = startBody;
desc->m_commandData.m_bodyCopyback.m_numBodies = numBodiesInTask;
desc->m_commandData.m_bodyCopyback.m_rbList = &m_allObjects[0];
m_taskScheduler.issueTask();
bodiesToSchedule -= numBodiesInTask;
startBody += numBodiesInTask;
}
m_taskScheduler.flushTasks();
}
// Clean up
m_sortedManifolds.resize(0);
m_sortedConstraints.resize(0);
m_allObjects.resize(0);
clearHash(m_solverHash);
m_numberOfContacts = 0;
}
void btParallelSequentialImpulseSolver::reset()
{
m_sortedManifolds.clear();
m_allObjects.clear();
m_numberOfContacts = 0;
clearHash(m_solverHash);
solverBodyPool_persist.clear();
solverBodyOffsetList_persist.clear();
solverConstraintPool_persist.clear();
solverInternalConstraintPool_persist.clear();
}
SolverTaskScheduler::SolverTaskScheduler(btThreadSupportInterface* threadIf, int maxOutstandingTasks)
: m_threadInterface (threadIf), m_maxNumOutstandingTasks (maxOutstandingTasks > SPU_MAX_SPUS ? SPU_MAX_SPUS : maxOutstandingTasks),
m_currentTask (0), m_numBusyTasks (0)
{
m_taskDescriptors.resize(m_maxNumOutstandingTasks);
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_threadInterface->startSPU();
}
SolverTaskScheduler::~SolverTaskScheduler()
{
m_threadInterface->stopSPU();
}
SpuSolverTaskDesc* SolverTaskScheduler::getTask()
{
int taskIdx = -1;
if (m_taskBusy[m_currentTask])
{
//try to find a new one
for (int i = 0; i < m_maxNumOutstandingTasks; ++i)
{
if (!m_taskBusy[i])
{
taskIdx = i;
break;
}
}
if (taskIdx < 0)
{
// Have to wait
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
taskIdx = taskId;
}
m_currentTask = taskIdx;
}
SpuSolverTaskDesc* result = &m_taskDescriptors[m_currentTask];
memset(result, 0, sizeof(SpuSolverTaskDesc));
result->m_taskId = m_currentTask;
return result;
}
void SolverTaskScheduler::issueTask()
{
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
SpuSolverTaskDesc& desc = m_taskDescriptors[m_currentTask];
m_threadInterface->sendRequest(1, (uint32_t)&desc, m_currentTask);
}
void SolverTaskScheduler::flushTasks()
{
while (m_numBusyTasks > 0)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}

View File

@@ -0,0 +1,75 @@
/*
Bullet Continuous Collision Detection and Physics Library - Parallel solver
Copyright (c) 2007 Starbreeze Studios
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Written by: Marten Svanfeldt
*/
#ifndef SPU_PARALLELSOLVER_H
#define SPU_PARALLELSOLVER_H
#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
#include "btThreadSupportInterface.h"
#include "LinearMath/btAlignedObjectArray.h"
class SolverTaskScheduler
{
protected:
class btThreadSupportInterface* m_threadInterface;
int m_maxNumOutstandingTasks;
unsigned int m_currentTask;
unsigned int m_numBusyTasks;
btAlignedObjectArray<struct SpuSolverTaskDesc> m_taskDescriptors;
btAlignedObjectArray<bool> m_taskBusy;
public:
SolverTaskScheduler (btThreadSupportInterface* threadIf, int maxOutstandingTasks);
~SolverTaskScheduler ();
struct SpuSolverTaskDesc* getTask ();
void issueTask();
void flushTasks();
int getMaxOutstandingTasks()
{
return m_maxNumOutstandingTasks;
}
};
class btParallelSequentialImpulseSolver : public btConstraintSolver
{
protected:
struct SpuSolverHash* m_solverHash;
btAlignedObjectArray<struct ManifoldCellHolder> m_sortedManifolds;
btAlignedObjectArray<struct ConstraintCellHolder> m_sortedConstraints;
btAlignedObjectArray<class btRigidBody*> m_allObjects;
int m_numberOfContacts;
SolverTaskScheduler m_taskScheduler;
public:
btParallelSequentialImpulseSolver (btThreadSupportInterface* threadIf, int maxOutstandingTasks);
virtual ~btParallelSequentialImpulseSolver();
virtual void prepareSolve (int numBodies, int numManifolds);
virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints, const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
virtual void allSolved (const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc);
virtual void reset ();
};
#endif

View File

@@ -0,0 +1,786 @@
#include "../PlatformDefinitions.h"
#include "SpuRaycastTask.h"
#include "../SpuCollisionObjectWrapper.h"
#include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
#include "SpuSubSimplexConvexCast.h"
#include "LinearMath/btAabbUtil2.h"
/* Future optimization strategies:
1. BBOX prune before loading shape data
2. Could reduce number of dmas for ray output data to a single read and write.
By sharing the temporary work unit output structures across objects.
3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't
being cached across calls. Fix that by doing the final ray pruning inside the callback.
*/
/* Future work:
1. support first hit, closest hit, etc rather than just closest hit.
2. support compound objects
*/
#define CALLBACK_ALL
struct RaycastTask_LocalStoreMemory
{
ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]);
btCollisionObject* getColObj()
{
return (btCollisionObject*) gColObj;
}
ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper);
SpuCollisionObjectWrapper* getCollisionObjectWrapper ()
{
return &gCollisionObjectWrapper;
}
CollisionShape_LocalStoreMemory gCollisionShape;
ATTRIBUTE_ALIGNED16(int spuIndices[16]);
bvhMeshShape_LocalStoreMemory bvhShapeData;
SpuConvexPolyhedronVertexData convexVertexData;
CompoundShape_LocalStoreMemory compoundShapeData;
};
#ifdef WIN32
void* createRaycastLocalStoreMemory()
{
return new RaycastTask_LocalStoreMemory;
};
#elif defined(__CELLOS_LV2__)
ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory);
void* createRaycastLocalStoreMemory()
{
return &gLocalStoreMemory;
}
#endif
void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper)
{
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
/* DMA Collision object wrapper into local store */
dmaSize = sizeof(SpuCollisionObjectWrapper);
dmaPpuAddress2 = objectWrapper;
cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
/* DMA Collision object into local store */
dmaSize = sizeof(btCollisionObject);
dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr();
cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(2));
/* Gather information about collision object and shape */
gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform();
gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin ();
gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType ();
gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape();
gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape;
/* DMA shape data */
dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType);
cellDmaWaitTagStatusAll(DMA_MASK(1));
if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType))
{
btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape;
gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions ();
} else {
gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0);
}
}
void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
{
cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
}
void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
{
cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
}
#if 0
SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
{
#if USE_SOFTWARE_CACHE
// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
// so the first and last bytes should fall on the same cache line
btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
void* ls = spe_cache_read(ea);
memcpy(buffer, ls, size);
#else
stallingUnalignedDmaSmallGet(buffer,ea,size);
#endif
}
#endif
void small_cache_read_triple( void* ls0, ppu_address_t ea0,
void* ls1, ppu_address_t ea1,
void* ls2, ppu_address_t ea2,
size_t size)
{
btAssert(size<16);
ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]);
ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]);
ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]);
uint32_t i;
///make sure last 4 bits are the same, for cellDmaSmallGet
char* localStore0 = (char*)ls0;
uint32_t last4BitsOffset = ea0 & 0x0f;
char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
char* localStore1 = (char*)ls1;
last4BitsOffset = ea1 & 0x0f;
char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
char* localStore2 = (char*)ls2;
last4BitsOffset = ea2 & 0x0f;
char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
cellDmaWaitTagStatusAll( DMA_MASK(1) );
//this is slowish, perhaps memcpy on SPU is smarter?
for (i=0; btLikely( i<size );i++)
{
localStore0[i] = tmpTarget0[i];
localStore1[i] = tmpTarget1[i];
localStore2[i] = tmpTarget2[i];
}
}
void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr);
class spuRaycastNodeCallback1 : public btNodeOverlapCallback
{
RaycastGatheredObjectData* m_gatheredObjectData;
const SpuRaycastTaskWorkUnit* m_workUnits;
SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
int m_workUnit;
RaycastTask_LocalStoreMemory* m_lsMemPtr;
ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
public:
spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr)
: m_gatheredObjectData(gatheredObjectData),
m_workUnits(workUnits),
m_workUnitsOut(workUnitsOut),
m_workUnit(0),
m_lsMemPtr (lsMemPtr)
{
}
void setWorkUnit (int workUnit) { m_workUnit = workUnit; }
virtual void processNode(int subPart, int triangleIndex)
{
///Create a triangle on the stack, call process collision, with GJK
///DMA the vertices, can benefit from software caching
// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
// ugly solution to support both 16bit and 32bit indices
if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
{
short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
sizeof(short int));
m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
} else
{
int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
sizeof(int));
}
//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
// spu_printf("SPU index0=%d ,",spuIndices[0]);
// spu_printf("SPU index1=%d ,",spuIndices[1]);
// spu_printf("SPU index2=%d ,",spuIndices[2]);
// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
for (int j=2;btLikely( j>=0 );j--)
{
int graphicsindex = m_lsMemPtr->spuIndices[j];
//spu_printf("SPU index=%d ,",graphicsindex);
btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
///handle un-aligned vertices...
//another DMA for each vertex
small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
sizeof(btScalar));
//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
spuTriangleVertices[j] = btVector3(
spuUnscaledVertex[0]*meshScaling.getX(),
spuUnscaledVertex[1]*meshScaling.getY(),
spuUnscaledVertex[2]*meshScaling.getZ());
//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
}
RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
SpuRaycastTaskWorkUnitOut out;
out.hitFraction = 1.0;
performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr);
/* XXX: For now only take the closest hit */
if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction)
{
m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction;
m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal;
}
}
};
class spuRaycastNodeCallback : public btNodeOverlapCallback
{
RaycastGatheredObjectData* m_gatheredObjectData;
const SpuRaycastTaskWorkUnit* m_workUnits;
SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
int m_numWorkUnits;
RaycastTask_LocalStoreMemory* m_lsMemPtr;
ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]);
ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]);
//ATTRIBUTE_ALIGNED16(int spuIndices[16]);
public:
spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
: m_gatheredObjectData(gatheredObjectData),
m_workUnits(workUnits),
m_workUnitsOut(workUnitsOut),
m_numWorkUnits(numWorkUnits),
m_lsMemPtr (lsMemPtr)
{
}
virtual void processNode(int subPart, int triangleIndex)
{
///Create a triangle on the stack, call process collision, with GJK
///DMA the vertices, can benefit from software caching
// spu_printf("processNode with triangleIndex %d\n",triangleIndex);
// ugly solution to support both 16bit and 32bit indices
if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
{
short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
sizeof(short int));
m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
} else
{
int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
sizeof(int));
}
//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
// spu_printf("SPU index0=%d ,",spuIndices[0]);
// spu_printf("SPU index1=%d ,",spuIndices[1]);
// spu_printf("SPU index2=%d ,",spuIndices[2]);
// spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
for (int j=2;btLikely( j>=0 );j--)
{
int graphicsindex = m_lsMemPtr->spuIndices[j];
//spu_printf("SPU index=%d ,",graphicsindex);
btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
// spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
///handle un-aligned vertices...
//another DMA for each vertex
small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
sizeof(btScalar));
//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
spuTriangleVertices[j] = btVector3(
spuUnscaledVertex[0]*meshScaling.getX(),
spuUnscaledVertex[1]*meshScaling.getY(),
spuUnscaledVertex[2]*meshScaling.getZ());
//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
}
RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0];
//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
for (int i = 0; i < m_numWorkUnits; i++)
{
SpuRaycastTaskWorkUnitOut out;
out.hitFraction = 1.0;
performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr);
/* XXX: For now only take the closest hit */
if (out.hitFraction < m_workUnitsOut[i].hitFraction)
{
m_workUnitsOut[i].hitFraction = out.hitFraction;
m_workUnitsOut[i].hitNormal = out.hitNormal;
}
}
}
};
void spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr,
btNodeOverlapCallback* nodeCallback,
const btVector3* rayFrom,
const btVector3* rayTo,
int numWorkUnits,
unsigned short int* quantizedQueryAabbMin,
unsigned short int* quantizedQueryAabbMax,
const btQuantizedBvhNode* rootNode,
int startNodeIndex,int endNodeIndex)
{
int curIndex = startNodeIndex;
int walkIterations = 0;
int subTreeSize = endNodeIndex - startNodeIndex;
int escapeIndex;
unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap;
unsigned int isLeafNode;
#define RAYAABB2
#ifdef RAYAABB2
unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK];
btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK];
for (int i = 0; i < numWorkUnits; i++)
{
btVector3 rayDirection = (rayTo[i]-rayFrom[i]);
rayDirection.normalize ();
lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]);
rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0];
rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1];
rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2];
sign[i][0] = rayDirection[0] < 0.0;
sign[i][1] = rayDirection[1] < 0.0;
sign[i][2] = rayDirection[2] < 0.0;
}
#endif
while (curIndex < endNodeIndex)
{
//catch bugs in tree data
assert (walkIterations < subTreeSize);
walkIterations++;
isLeafNode = rootNode->isLeafNode();
anyRayBoxOverlap = 0;
for (int i = 0; i < numWorkUnits; i++)
{
unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i);
unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i);
boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
if (!boxBoxOverlap)
continue;
rayBoxOverlap = 0;
btScalar param = 1.0;
btVector3 normal;
btVector3 bounds[2];
bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin);
bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax);
#ifdef RAYAABB2
rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]);
#else
rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal);
#endif
#ifndef CALLBACK_ALL
anyRayBoxOverlap = rayBoxOverlap || anyRayBoxOverlap;
/* If we have any ray vs. box overlap and this isn't a leaf node
we know that we need to dig deeper
*/
if (!isLeafNode && anyRayBoxOverlap)
break;
if (isLeafNode && rayBoxOverlap)
{
spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback;
callback->setWorkUnit (i);
nodeCallback->processNode (0, rootNode->getTriangleIndex());
}
#else
/* If we have any ray vs. box overlap and this isn't a leaf node
we know that we need to dig deeper
*/
if (rayBoxOverlap)
{
anyRayBoxOverlap = 1;
break;
}
#endif
}
#ifdef CALLBACK_ALL
if (isLeafNode && anyRayBoxOverlap)
{
nodeCallback->processNode (0, rootNode->getTriangleIndex());
}
#endif
if (anyRayBoxOverlap || isLeafNode)
{
rootNode++;
curIndex++;
} else
{
escapeIndex = rootNode->getEscapeIndex();
rootNode += escapeIndex;
curIndex += escapeIndex;
}
}
}
void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
{
//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape;
//need the mesh interface, for access to triangle vertices
dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape);
unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
/* Calculate the AABB for the ray in the triangle mesh shape */
btTransform rayInTriangleSpace;
rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse();
for (int i = 0; i < numWorkUnits; i++)
{
btVector3 aabbMin;
btVector3 aabbMax;
rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom);
rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo);
aabbMin = rayFromInTriangleSpace[i];
aabbMin.setMin (rayToInTriangleSpace[i]);
aabbMax = rayFromInTriangleSpace[i];
aabbMax.setMax (rayToInTriangleSpace[i]);
lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0);
lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1);
}
QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
#ifdef CALLBACK_ALL
spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr);
#else
spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr);
#endif
IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
// spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
//not likely to happen
if (subTrees.size() && indexArray.size() == 1)
{
///DMA in the index info
dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
cellDmaWaitTagStatusAll(DMA_MASK(1));
//display the headers
int numBatch = subTrees.size();
for (int i=0;i<numBatch;)
{
// BEN: TODO - can reorder DMA transfers for less stall
int remaining = subTrees.size() - i;
int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
// spu_printf("nextBatch = %d\n",nextBatch);
for (int j=0;j<nextBatch;j++)
{
const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
unsigned int overlap = 1;
for (int boxId = 0; boxId < numWorkUnits; boxId++)
{
overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
if (overlap)
break;
}
if (overlap)
{
btAssert(subtree.m_subtreeSize);
//dma the actual nodes of this subtree
dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
cellDmaWaitTagStatusAll(DMA_MASK(2));
/* Walk this subtree */
{
spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr,
&nodeCallback,
&rayFromInTriangleSpace[0],
&rayToInTriangleSpace[0],
numWorkUnits,
&quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0],
&lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize);
}
}
// spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
}
// unsigned short int m_quantizedAabbMin[3];
// unsigned short int m_quantizedAabbMax[3];
// int m_rootNodeIndex;
// int m_subtreeSize;
i+=nextBatch;
}
//pre-fetch first tree, then loop and double buffer
}
}
void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
{
//XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n");
}
void
performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
{
SpuVoronoiSimplexSolver simplexSolver;
btTransform rayFromTrans, rayToTrans;
rayFromTrans.setIdentity ();
rayFromTrans.setOrigin (workUnit.rayFrom);
rayToTrans.setIdentity ();
rayToTrans.setOrigin (workUnit.rayTo);
SpuCastResult result;
/* Load the vertex data if the shape is a convex hull */
/* XXX: We might be loading the shape twice */
ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]);
if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE)
{
register int dmaSize;
register ppu_address_t dmaPpuAddress2;
dmaSize = sizeof(btConvexHullShape);
dmaPpuAddress2 = gatheredObjectData->m_collisionShape;
cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape);
cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2!
lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape;
lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0];
}
/* performRaycast */
SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver);
bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result);
if (r)
{
workUnitOut->hitFraction = result.m_fraction;
workUnitOut->hitNormal = result.m_normal;
}
}
void processRaycastTask(void* userPtr, void* lsMemory)
{
RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory;
SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr;
SpuRaycastTaskDesc& taskDesc = *taskDescPtr;
SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers;
//spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers);
/* for each object */
RaycastGatheredObjectData gatheredObjectData;
for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++)
{
//spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers);
/* load initial collision shape */
GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]);
if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType))
{
SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK];
for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
{
tWorkUnitsOut[rayId].hitFraction = 1.0;
}
performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory);
for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
{
const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
if (tWorkUnitsOut[rayId].hitFraction == 1.0)
continue;
ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
/* XXX Only support taking the closest hit for now */
if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction)
{
workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction;
workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal;
}
/* write ray cast data back */
dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
}
} else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) {
btVector3 objectBoxMin, objectBoxMax;
computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform);
for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
{
const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
btScalar ignored_param = 1.0;
btVector3 ignored_normal;
if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal))
{
ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
SpuRaycastTaskWorkUnitOut tWorkUnitOut;
tWorkUnitOut.hitFraction = 1.0;
performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
if (tWorkUnitOut.hitFraction == 1.0)
continue;
dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
/* XXX Only support taking the closest hit for now */
if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
{
workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
/* write ray cast data back */
dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
}
}
}
} else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) {
for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
{
const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
SpuRaycastTaskWorkUnitOut tWorkUnitOut;
tWorkUnitOut.hitFraction = 1.0;
performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
if (tWorkUnitOut.hitFraction == 1.0)
continue;
dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
/* XXX Only support taking the closest hit for now */
if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
{
workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
}
/* write ray cast data back */
dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
cellDmaWaitTagStatusAll(DMA_MASK(1));
}
}
}
}

View File

@@ -0,0 +1,50 @@
#ifndef __SPU_RAYCAST_TASK_H
#define __SPU_RAYCAST_TASK_H
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
#include "LinearMath/btVector3.h"
#include "../PlatformDefinitions.h"
ATTRIBUTE_ALIGNED16(struct) RaycastGatheredObjectData
{
ppu_address_t m_collisionShape;
void* m_spuCollisionShape;
btVector3 m_primitiveDimensions;
int m_shapeType;
float m_collisionMargin;
btTransform m_worldTransform;
};
ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnitOut
{
btVector3 hitNormal; /* out */
btScalar hitFraction; /* out */
btCollisionWorld::LocalShapeInfo shapeInfo; /* out */
};
/* Perform a raycast on collision object */
ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnit
{
btVector3 rayFrom; /* in */
btVector3 rayTo; /* in */
SpuRaycastTaskWorkUnitOut* output; /* out */
};
#define SPU_RAYCAST_WORK_UNITS_PER_TASK 16
ATTRIBUTE_ALIGNED128(struct) SpuRaycastTaskDesc
{
SpuRaycastTaskWorkUnit workUnits[SPU_RAYCAST_WORK_UNITS_PER_TASK];
unsigned int numWorkUnits;
void* spuCollisionObjectsWrappers;
unsigned int numSpuCollisionObjectWrappers;
int taskId;
};
void processRaycastTask (void* userPtr, void* lsMemory);
void* createRaycastLocalStoreMemory ();
#endif

View File

@@ -0,0 +1,152 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuSubSimplexConvexCast.h"
#include "BulletCollision/CollisionShapes/btConvexShape.h"
#include "BulletCollision/CollisionShapes/btMinkowskiSumShape.h"
#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
SpuSubsimplexRayCast::SpuSubsimplexRayCast (void* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB,
SpuVoronoiSimplexSolver* simplexSolver)
:m_simplexSolver(simplexSolver), m_shapeB(shapeB), m_convexDataB(convexDataB), m_shapeTypeB(shapeTypeB), m_marginB(marginB)
{
}
///Typically the conservative advancement reaches solution in a few iterations, clip it to 32 for degenerate cases.
///See discussion about this here http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=565
#ifdef BT_USE_DOUBLE_PRECISION
#define MAX_ITERATIONS 64
#else
#define MAX_ITERATIONS 32
#endif
/* Returns the support point of the minkowski sum:
* MSUM(Pellet, ConvexShape)
*
*/
void supportPoints (const btTransform xformRay,
const btTransform xformB,
const int shapeType,
const void* shape,
SpuConvexPolyhedronVertexData* convexVertexData,
const btScalar marginB,
const btVector3& seperatingAxis,
btVector3& w,
btVector3& supVertexRay,
btVector3& supVertexB)
{
btVector3 saUnit = seperatingAxis;
saUnit.normalize();
btVector3 SupportPellet = xformRay(0.0001 * -saUnit);
btVector3 rotatedSeperatingAxis = seperatingAxis * xformB.getBasis();
btVector3 SupportShape = xformB(localGetSupportingVertexWithoutMargin(shapeType, (void*)shape, rotatedSeperatingAxis, convexVertexData));
SupportShape += saUnit * marginB;
w = SupportPellet - SupportShape;
supVertexRay = SupportPellet;
supVertexB = SupportShape;
}
bool SpuSubsimplexRayCast::calcTimeOfImpact(const btTransform& fromRay,
const btTransform& toRay,
const btTransform& fromB,
const btTransform& toB,
SpuCastResult& result)
{
m_simplexSolver->reset();
btVector3 linVelRay, linVelB;
linVelRay = toRay.getOrigin() - fromRay.getOrigin();
linVelB = toB.getOrigin() - fromB.getOrigin ();
btScalar lambda = btScalar(0.);
btTransform interpolatedTransRay = fromRay;
btTransform interpolatedTransB = fromB;
btVector3 r = (linVelRay-linVelB);
btVector3 supVertexRay;
btVector3 supVertexB;
btVector3 v;
supportPoints (fromRay, fromB, m_shapeTypeB, m_shapeB, m_convexDataB, m_marginB, r, v, supVertexRay, supVertexB);
btVector3 n;
n.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
bool hasResult = false;
btVector3 c;
int maxIter = MAX_ITERATIONS;
btScalar lastLambda = lambda;
btScalar dist2 = v.length2();
#ifdef BT_USE_DOUBLE_PRECISION
btScalar epsilon = btScalar(0.0001);
#else
btScalar epsilon = btScalar(0.0001);
#endif //BT_USE_DOUBLE_PRECISION
btVector3 w,p;
btScalar VdotR;
while ( (dist2 > epsilon) && maxIter--)
{
supportPoints (interpolatedTransRay, interpolatedTransB, m_shapeTypeB, m_shapeB, m_convexDataB, m_marginB, v, w, supVertexRay, supVertexB);
btScalar VdotW = v.dot(w);
if (lambda > btScalar(1.0))
{
return false;
}
if ( VdotW > btScalar(0.))
{
VdotR = v.dot(r);
if (VdotR >= -(SIMD_EPSILON*SIMD_EPSILON))
return false;
else
{
lambda = lambda - VdotW / VdotR;
interpolatedTransRay.getOrigin().setInterpolate3(fromRay.getOrigin(), toRay.getOrigin(), lambda);
interpolatedTransB.getOrigin().setInterpolate3(fromB.getOrigin(), toB.getOrigin(), lambda);
lastLambda = lambda;
n = v;
hasResult = true;
}
}
m_simplexSolver->addVertex(w, supVertexRay, supVertexB);
if (m_simplexSolver->closest(v))
{
dist2 = v.length2();
hasResult = true;
//printf("V=%f , %f, %f\n",v[0],v[1],v[2]);
//printf("DIST2=%f\n",dist2);
//printf("numverts = %i\n",m_simplexSolver->numVertices());
} else
{
dist2 = btScalar(0.);
}
}
result.m_fraction = lambda;
result.m_normal = n;
btVector3 hitRay, hitB;
m_simplexSolver->compute_points (hitRay, hitB);
/* TODO: We could output hit point here (hitB) */
return true;
}

View File

@@ -0,0 +1,60 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_SUBSIMPLEX_RAY_CAST_H
#define SPU_SUBSIMPLEX_RAY_CAST_H
#include "../SpuNarrowPhaseCollisionTask/SpuVoronoiSimplexSolver.h"
#include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
#include "SpuRaycastTask.h"
class btConvexShape;
struct SpuCastResult
{
float m_fraction;
btVector3 m_normal;
};
/// btSubsimplexConvexCast implements Gino van den Bergens' paper
///"Ray Casting against bteral Convex Objects with Application to Continuous Collision Detection"
/// GJK based Ray Cast, optimized version
/// Objects should not start in overlap, otherwise results are not defined.
class SpuSubsimplexRayCast
{
SpuVoronoiSimplexSolver* m_simplexSolver;
void* m_shapeB;
SpuConvexPolyhedronVertexData* m_convexDataB;
int m_shapeTypeB;
float m_marginB;
public:
SpuSubsimplexRayCast (void* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB,
SpuVoronoiSimplexSolver* simplexSolver);
//virtual ~btSubsimplexConvexCast();
///SimsimplexConvexCast calculateTimeOfImpact calculates the time of impact+normal for the linear cast (sweep) between two moving objects.
///Precondition is that objects should not penetration/overlap at the start from the interval. Overlap can be tested using btGjkPairDetector.
bool calcTimeOfImpact(const btTransform& fromRay,
const btTransform& toRay,
const btTransform& fromB,
const btTransform& toB,
SpuCastResult& result);
};
#endif //SUBSIMPLEX_RAY_CAST_H

View File

@@ -0,0 +1,189 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuRaycastTaskProcess.h"
SpuRaycastTaskProcess::SpuRaycastTaskProcess(class btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks)
:m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
m_workUnitTaskBuffers = (unsigned char *)0;
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuRaycastTaskDesc.resize(m_maxNumOutstandingTasks);
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_currentWorkUnitInTask = 0;
m_threadInterface->startSPU();
//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
//printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput));
}
SpuRaycastTaskProcess::~SpuRaycastTaskProcess()
{
if (m_workUnitTaskBuffers != 0)
{
btAlignedFree(m_workUnitTaskBuffers);
m_workUnitTaskBuffers = 0;
}
m_threadInterface->stopSPU();
}
void SpuRaycastTaskProcess::initialize2(void* spuCollisionObjectsWrappers, int numSpuCollisionObjectWrappers)
{
m_spuCollisionObjectWrappers = spuCollisionObjectsWrappers;
m_numSpuCollisionObjectWrappers = numSpuCollisionObjectWrappers;
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_currentWorkUnitInTask = 0;
#ifdef DEBUG_SpuRaycastTaskProcess
m_initialized = true;
#endif
}
void SpuRaycastTaskProcess::issueTask2()
{
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
SpuRaycastTaskDesc& taskDesc = m_spuRaycastTaskDesc[m_currentTask];
taskDesc.taskId = m_currentTask;
m_threadInterface->sendRequest(1, (uint32_t) &taskDesc,m_currentTask);
//printf("send thread requested for task %d\n", m_currentTask);
// if all tasks busy, wait for spu event to clear the task.
if (m_numBusyTasks >= m_maxNumOutstandingTasks)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
} else {
//printf("Sent request, not enough busy tasks\n");
}
}
void SpuRaycastTaskProcess::addWorkToTask(SpuRaycastTaskWorkUnit& workunit)
{
m_spuRaycastTaskDesc[m_currentTask].workUnits[m_currentWorkUnitInTask] = workunit;
m_currentWorkUnitInTask++;
if (m_currentWorkUnitInTask == SPU_RAYCAST_WORK_UNITS_PER_TASK)
{
m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask;
m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers;
m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers;
//printf("Task buffer full, issuing\n");
issueTask2 ();
//printf("Returned from issueTask2()\n");
m_currentWorkUnitInTask = 0;
// find new task buffer
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
//init the task data
break;
}
}
//printf("next task = %d\n", m_currentTask);
}
}
void
SpuRaycastTaskProcess::flush2()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("\nSpuRaycastTaskProcess::flush()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
// if there's a partially filled task buffer, submit that task
//printf("Flushing... %d remaining\n", m_currentWorkUnitInTask);
if (m_currentWorkUnitInTask > 0)
{
m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask;
m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers;
m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers;
issueTask2();
m_currentWorkUnitInTask = 0;
}
// all tasks are issued, wait for all tasks to be complete
while(m_numBusyTasks > 0)
{
// Consolidating SPU code
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
//printf("Busy tasks... %d\n", m_numBusyTasks);
{
// SPURS support.
m_threadInterface->waitForResponse(&taskId, &outputSize);
}
//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
//postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}

View File

@@ -0,0 +1,72 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_RAY_TASK_PROCESS_H
#define SPU_RAY_TASK_PROCESS_H
#include <assert.h>
#include <string.h>
#include <LinearMath/btScalar.h>
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include <LinearMath/btAlignedAllocator.h>
#include "PlatformDefinitions.h"
#include "LinearMath/btAlignedObjectArray.h"
#include "SpuRaycastTask/SpuRaycastTask.h"
#include "btThreadSupportInterface.h"
/// SpuRaycastTaskProcess handles SPU processing of raycast requests
class SpuRaycastTaskProcess
{
unsigned char *m_workUnitTaskBuffers;
// track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<SpuRaycastTaskDesc> m_spuRaycastTaskDesc;
btThreadSupportInterface* m_threadInterface;
int m_maxNumOutstandingTasks;
int m_numBusyTasks;
// the current task and the current entry to insert a new work unit
int m_currentTask;
int m_currentWorkUnitInTask;
int m_numSpuCollisionObjectWrappers;
void* m_spuCollisionObjectWrappers;
void issueTask2();
//void postProcess(unsigned int taskId, int outputSize);
public:
SpuRaycastTaskProcess(btThreadSupportInterface* threadInterface, int maxNumOutstandingTasks);
~SpuRaycastTaskProcess();
/// call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize2(void* spuCollisionObjectsWrappers, int numSpuCollisionObjectWrappers);
/// batch up additional work to a current task for SPU processing. When batch is full, it issues the task.
void addWorkToTask(struct SpuRaycastTaskWorkUnit&);
/// call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush2();
};
#endif // SPU_COLLISION_TASK_PROCESS_H

View File

@@ -0,0 +1,214 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "SpuSampleTask.h"
#include "BulletDynamics/Dynamics/btRigidBody.h"
#include "../PlatformDefinitions.h"
#include "../SpuFakeDma.h"
#include "LinearMath/btMinMax.h"
#ifdef __SPU__
#include <spu_printf.h>
#else
#include <stdio.h>
#define spu_printf printf
#endif
#define MAX_NUM_BODIES 8192
struct SampleTask_LocalStoreMemory
{
ATTRIBUTE_ALIGNED16(char gLocalRigidBody [sizeof(btRigidBody)+16]);
ATTRIBUTE_ALIGNED16(void* gPointerArray[MAX_NUM_BODIES]);
};
//-- MAIN METHOD
void processSampleTask(void* userPtr, void* lsMemory)
{
// BT_PROFILE("processSampleTask");
SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory;
SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr;
SpuSampleTaskDesc& taskDesc = *taskDescPtr;
switch (taskDesc.m_sampleCommand)
{
case CMD_SAMPLE_INTEGRATE_BODIES:
{
btTransform predictedTrans;
btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
int batchSize = taskDesc.m_sampleValue;
if (batchSize>MAX_NUM_BODIES)
{
spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
break;
}
int dmaArraySize = batchSize*sizeof(void*);
uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
// spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
if (dmaArraySize>=16)
{
cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
} else
{
stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize);
}
for ( int i=0;i<batchSize;i++)
{
///DMA rigid body
void* localPtr = &localMemory->gLocalRigidBody[0];
void* shortAdd = localMemory->gPointerArray[i];
uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
// spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
int dmaBodySize = sizeof(btRigidBody);
cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
float timeStep = 1.f/60.f;
btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
if (body)
{
if (body->isActive() && (!body->isStaticOrKinematicObject()))
{
body->predictIntegratedTransform(timeStep, predictedTrans);
body->proceedToTransform( predictedTrans);
void* ptr = (void*)localPtr;
// spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress);
cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
}
}
}
break;
}
case CMD_SAMPLE_PREDICT_MOTION_BODIES:
{
btTransform predictedTrans;
btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
int batchSize = taskDesc.m_sampleValue;
int dmaArraySize = batchSize*sizeof(void*);
if (batchSize>MAX_NUM_BODIES)
{
spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
break;
}
uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
// spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
if (dmaArraySize>=16)
{
cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
} else
{
stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress , dmaArraySize);
}
for ( int i=0;i<batchSize;i++)
{
///DMA rigid body
void* localPtr = &localMemory->gLocalRigidBody[0];
void* shortAdd = localMemory->gPointerArray[i];
uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
// spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
int dmaBodySize = sizeof(btRigidBody);
cellDmaGet((void*)localPtr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
float timeStep = 1.f/60.f;
btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
if (body)
{
if (!body->isStaticOrKinematicObject())
{
if (body->isActive())
{
body->integrateVelocities( timeStep);
//damping
body->applyDamping(timeStep);
body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
void* ptr = (void*)localPtr;
cellDmaLargePut(ptr, ppuRigidBodyAddress , dmaBodySize, DMA_TAG(1), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(1));
}
}
}
}
break;
}
default:
{
}
};
}
#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
ATTRIBUTE_ALIGNED16(SampleTask_LocalStoreMemory gLocalStoreMemory);
void* createSampleLocalStoreMemory()
{
return &gLocalStoreMemory;
}
#else
void* createSampleLocalStoreMemory()
{
return new SampleTask_LocalStoreMemory;
};
#endif

View File

@@ -0,0 +1,54 @@
/*
Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_SAMPLE_TASK_H
#define SPU_SAMPLE_TASK_H
#include "../PlatformDefinitions.h"
#include "LinearMath/btScalar.h"
#include "LinearMath/btVector3.h"
#include "LinearMath/btMatrix3x3.h"
#include "LinearMath/btAlignedAllocator.h"
enum
{
CMD_SAMPLE_INTEGRATE_BODIES = 1,
CMD_SAMPLE_PREDICT_MOTION_BODIES
};
ATTRIBUTE_ALIGNED16(struct) SpuSampleTaskDesc
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint32_t m_sampleCommand;
uint32_t m_taskId;
uint64_t m_mainMemoryPtr;
int m_sampleValue;
};
void processSampleTask(void* userPtr, void* lsMemory);
void* createSampleLocalStoreMemory();
#endif //SPU_SAMPLE_TASK_H

View File

@@ -0,0 +1 @@
Empty placeholder for future Libspe2 SPU task

View File

@@ -0,0 +1,222 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//#define __CELLOS_LV2__ 1
#define USE_SAMPLE_PROCESS 1
#ifdef USE_SAMPLE_PROCESS
#include "SpuSampleTaskProcess.h"
#include <stdio.h>
#ifdef __SPU__
void SampleThreadFunc(void* userPtr,void* lsMemory)
{
//do nothing
printf("hello world\n");
}
void* SamplelsMemoryFunc()
{
//don't create local store memory, just return 0
return 0;
}
#else
#include "btThreadSupportInterface.h"
//# include "SPUAssert.h"
#include <string.h>
extern "C" {
extern char SPU_SAMPLE_ELF_SYMBOL[];
};
SpuSampleTaskProcess::SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks)
:m_threadInterface(threadInterface),
m_maxNumOutstandingTasks(maxNumOutstandingTasks)
{
m_taskBusy.resize(m_maxNumOutstandingTasks);
m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = false;
m_threadInterface->startSPU();
}
SpuSampleTaskProcess::~SpuSampleTaskProcess()
{
m_threadInterface->stopSPU();
}
void SpuSampleTaskProcess::initialize()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("SpuSampleTaskProcess::initialize()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
for (int i = 0; i < m_maxNumOutstandingTasks; i++)
{
m_taskBusy[i] = false;
}
m_numBusyTasks = 0;
m_currentTask = 0;
m_initialized = true;
}
void SpuSampleTaskProcess::issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand)
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("SpuSampleTaskProcess::issueTask (m_currentTask= %d\)n", m_currentTask);
#endif //DEBUG_SPU_TASK_SCHEDULING
m_taskBusy[m_currentTask] = true;
m_numBusyTasks++;
SpuSampleTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
{
// send task description in event message
// no error checking here...
// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
taskDesc.m_mainMemoryPtr = reinterpret_cast<uint64_t>(sampleMainMemPtr);
taskDesc.m_sampleValue = sampleValue;
taskDesc.m_sampleCommand = sampleCommand;
//some bookkeeping to recognize finished tasks
taskDesc.m_taskId = m_currentTask;
}
m_threadInterface->sendRequest(1, (uint32_t) &taskDesc, m_currentTask);
// if all tasks busy, wait for spu event to clear the task.
if (m_numBusyTasks >= m_maxNumOutstandingTasks)
{
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
m_threadInterface->waitForResponse(&taskId, &outputSize);
//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
// find new task buffer
for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
{
if (!m_taskBusy[i])
{
m_currentTask = i;
break;
}
}
}
///Optional PPU-size post processing for each task
void SpuSampleTaskProcess::postProcess(int taskId, int outputSize)
{
}
void SpuSampleTaskProcess::flush()
{
#ifdef DEBUG_SPU_TASK_SCHEDULING
printf("\nSpuCollisionTaskProcess::flush()\n");
#endif //DEBUG_SPU_TASK_SCHEDULING
// all tasks are issued, wait for all tasks to be complete
while(m_numBusyTasks > 0)
{
// Consolidating SPU code
unsigned int taskId;
unsigned int outputSize;
for (int i=0;i<m_maxNumOutstandingTasks;i++)
{
if (m_taskBusy[i])
{
taskId = i;
break;
}
}
{
m_threadInterface->waitForResponse(&taskId, &outputSize);
}
//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
postProcess(taskId, outputSize);
m_taskBusy[taskId] = false;
m_numBusyTasks--;
}
}
#endif
#endif //USE_SAMPLE_PROCESS

View File

@@ -0,0 +1,153 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef SPU_SAMPLE_TASK_PROCESS_H
#define SPU_SAMPLE_TASK_PROCESS_H
#include <assert.h>
#include "PlatformDefinitions.h"
#include <stdlib.h>
#include "LinearMath/btAlignedObjectArray.h"
#include "SpuSampleTask/SpuSampleTask.h"
//just add your commands here, try to keep them globally unique for debugging purposes
#define CMD_SAMPLE_TASK_COMMAND 10
/// SpuSampleTaskProcess handles SPU processing of collision pairs.
/// When PPU issues a task, it will look for completed task buffers
/// PPU will do postprocessing, dependent on workunit output (not likely)
class SpuSampleTaskProcess
{
// track task buffers that are being used, and total busy tasks
btAlignedObjectArray<bool> m_taskBusy;
btAlignedObjectArray<SpuSampleTaskDesc>m_spuSampleTaskDesc;
unsigned int m_numBusyTasks;
// the current task and the current entry to insert a new work unit
unsigned int m_currentTask;
bool m_initialized;
void postProcess(int taskId, int outputSize);
class btThreadSupportInterface* m_threadInterface;
unsigned int m_maxNumOutstandingTasks;
public:
SpuSampleTaskProcess(btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks);
~SpuSampleTaskProcess();
///call initialize in the beginning of the frame, before addCollisionPairToTask
void initialize();
void issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand);
///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
void flush();
};
#if defined(USE_LIBSPE2) && defined(__SPU__)
////////////////////MAIN/////////////////////////////
#include "../SpuLibspe2Support.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <SpuFakeDma.h>
void * SamplelsMemoryFunc();
void SampleThreadFunc(void* userPtr,void* lsMemory);
//#define DEBUG_LIBSPE2_MAINLOOP
int main(unsigned long long speid, addr64 argp, addr64 envp)
{
printf("SPU is up \n");
ATTRIBUTE_ALIGNED128(btSpuStatus status);
ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
unsigned int received_message = Spu_Mailbox_Event_Nothing;
bool shutdown = false;
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
status.m_status = Spu_Status_Free;
status.m_lsMemory.p = SamplelsMemoryFunc();
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
while (!shutdown)
{
received_message = spu_read_in_mbox();
switch(received_message)
{
case Spu_Mailbox_Event_Shutdown:
shutdown = true;
break;
case Spu_Mailbox_Event_Task:
// refresh the status
#ifdef DEBUG_LIBSPE2_MAINLOOP
printf("SPU recieved Task \n");
#endif //DEBUG_LIBSPE2_MAINLOOP
cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
btAssert(status.m_status==Spu_Status_Occupied);
cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
break;
case Spu_Mailbox_Event_Nothing:
default:
break;
}
// set to status free and wait for next task
status.m_status = Spu_Status_Free;
cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
cellDmaWaitTagStatusAll(DMA_MASK(3));
}
return 0;
}
//////////////////////////////////////////////////////
#endif
#endif // SPU_SAMPLE_TASK_PROCESS_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,279 @@
/*
Bullet Continuous Collision Detection and Physics Library - Parallel solver
Copyright (c) 2007 Starbreeze Studios
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Written by: Marten Svanfeldt
*/
#ifndef SPU_PARALLELSOLVERTASK_H
#define SPU_PARALLELSOLVERTASK_H
#include "../PlatformDefinitions.h"
#include "LinearMath/btScalar.h"
#include "LinearMath/btVector3.h"
#include "LinearMath/btMatrix3x3.h"
#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
#include "../SpuSync.h"
#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
#include "LinearMath/btAlignedAllocator.h"
ATTRIBUTE_ALIGNED16(struct) ManifoldCellHolder
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint32_t m_hashCellIndex;
class btPersistentManifold* m_manifold;
};
ATTRIBUTE_ALIGNED16(struct) ConstraintCellHolder
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint32_t m_hashCellIndex;
uint32_t m_constraintType;
class btTypedConstraint* m_constraint;
};
enum
{
SPU_HASH_NUMCELLS = 128,
SPU_HASH_WORDWIDTH = sizeof(uint32_t)*8,
SPU_HASH_NUMCELLDWORDS = ((SPU_HASH_NUMCELLS + SPU_HASH_WORDWIDTH - 1) / SPU_HASH_WORDWIDTH),
SPU_HASH_NUMUNUSEDBITS = (SPU_HASH_NUMCELLDWORDS * SPU_HASH_WORDWIDTH) - SPU_HASH_NUMCELLS,
SPU_HASH_PHYSSIZE = 4, //TODO: MAKE CONFIGURABLE
SPU_MAX_BODIES_PER_CELL = 1024,
SPU_MAX_SPUS = 6
};
enum
{
CMD_SOLVER_SETUP_BODIES = 1,
CMD_SOLVER_MANIFOLD_SETUP = 2,
CMD_SOLVER_CONSTRAINT_SETUP = 3,
CMD_SOLVER_SOLVE_ITERATE = 4,
CMD_SOLVER_COPYBACK_BODIES = 5
};
struct SpuSolverHashCell
{
uint16_t m_numLocalBodies;
uint16_t m_solverBodyOffsetListOffset;
uint16_t m_numManifolds;
uint16_t m_manifoldListOffset;
uint16_t m_numContacts;
uint16_t m_internalConstraintListOffset;
uint16_t m_numConstraints;
uint16_t m_constraintListOffset;
};
// Shared data structures
struct SpuSolverHash
{
// Dependency matrix
ATTRIBUTE_ALIGNED16(uint32_t m_dependencyMatrix[SPU_HASH_NUMCELLS][SPU_HASH_NUMCELLDWORDS]);
ATTRIBUTE_ALIGNED16(uint32_t m_currentMask[SPU_MAX_SPUS+1][SPU_HASH_NUMCELLDWORDS]);
// The hash itself
ATTRIBUTE_ALIGNED16(SpuSolverHashCell m_Hash[SPU_HASH_NUMCELLS]);
// Hash meta-data
};
inline unsigned int spuHash(unsigned int k) { return k*2654435769u; };
inline unsigned int spuGetHashCellIndex(int x, int y, int z)
{
//int n = 0x8da6b343 * x + 0xd8163841 * y + 0xcb1ab31f * z;
int n = x ^ spuHash(y ^ spuHash (z));
return ((unsigned int)n) & (SPU_HASH_NUMCELLS-1);
}
ATTRIBUTE_ALIGNED16(struct) SpuSolverBody
{
BT_DECLARE_ALIGNED_ALLOCATOR();
btVector3 m_linearVelocity;
btVector3 m_angularVelocity;
btMatrix3x3 m_worldInvInertiaTensor;
btScalar m_angularFactor;
btScalar m_invertedMass;
};
ATTRIBUTE_ALIGNED16(struct) SpuSolverInternalConstraint
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint32_t m_localOffsetBodyA;
uint32_t m_localOffsetBodyB;
btScalar m_appliedImpulse;
btScalar m_friction;
btScalar m_restitution;
btScalar m_jacDiagABInv;
btScalar m_penetration;
btVector3 m_normal;
btVector3 m_relpos1CrossNormal;
btVector3 m_relpos2CrossNormal;
btVector3 m_angularComponentA;
btVector3 m_angularComponentB;
};
ATTRIBUTE_ALIGNED16(struct) SpuSolverConstraint
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint16_t m_localOffsetBodyA;
uint16_t m_localOffsetBodyB;
uint16_t m_constraintType;
struct
{
uint16_t m_useLinear : 1;
uint16_t m_limit1 : 1;
uint16_t m_limit2 : 1;
uint16_t m_limit3 : 1;
uint16_t m_limit4 : 1;
uint16_t m_limit5 : 1;
uint16_t m_limit6 : 1;
uint16_t m_motor1 : 1;
uint16_t m_motor2 : 1;
uint16_t m_motor3 : 1;
uint16_t m_motor4 : 1;
uint16_t m_motor5 : 1;
uint16_t m_motor6 : 1;
} m_flags;
// Linear parts, used by all constraints
btQuadWordStorage m_relPos1;
btQuadWordStorage m_relPos2;
btQuadWordStorage m_jacdiagABInv; //Jacobian inverse multiplied by gamma (damping) for each axis
btQuadWordStorage m_linearBias; //depth*tau/(dt*gamma) along each axis
// Joint-specific parts
union
{
struct
{
btQuadWordStorage m_frameAinW[3];
btQuadWordStorage m_frameBinW[3];
// For angular
btQuadWordStorage m_angJacdiagABInv; //1/j
btQuadWordStorage m_angularBias; //error/dt, in x/y. limit error*bias factor / (dt * relaxation factor) in z
// For limit
float m_limitAccumulatedImpulse;
float m_limitJacFactor; //limitSign*relaxation factor
// For motor
float m_motorVelocity;
float m_motorImpulse;
} hinge;
struct
{
btQuadWordStorage m_swingAxis;
btQuadWordStorage m_twistAxis;
float m_swingError;
float m_swingJacInv;
float m_swingLimitImpulse;
float m_twistError;
float m_twistJacInv;
float m_twistLimitImpulse;
} conetwist;
};
};
ATTRIBUTE_ALIGNED16(struct) SpuSolverDataDesc
{
BT_DECLARE_ALIGNED_ALLOCATOR();
SpuSolverHash* m_solverHash;
SpuSolverBody* m_solverBodyList;
SpuSolverInternalConstraint* m_solverInternalConstraintList;
SpuSolverConstraint* m_solverConstraintList;
uint32_t* m_solverBodyOffsetList;
};
ATTRIBUTE_ALIGNED16(struct) SpuSolverTaskDesc
{
BT_DECLARE_ALIGNED_ALLOCATOR();
uint32_t m_solverCommand;
uint32_t m_taskId;
SpuSolverDataDesc m_solverData;
// command specific data
union
{
// Body setup
struct
{
uint32_t m_startBody;
uint32_t m_numBodies;
class btRigidBody** m_rbList;
} m_bodySetup, m_bodyCopyback;
struct
{
uint32_t m_startCell;
uint32_t m_numCells;
uint32_t m_numBodies;
uint32_t m_numManifolds;
ManifoldCellHolder* m_manifoldHolders;
ConstraintCellHolder* m_constraintHolders;
btContactSolverInfoData m_solverInfo;
} m_manifoldSetup;
struct
{
btSpinlock::SpinVariable* m_spinLockVar;
} m_iterate;
} m_commandData;
};
void processSolverTask(void* userPtr, void* lsMemory);
void* createSolverLocalStoreMemory();
// Helper
inline bool constraintTypeSupported(btTypedConstraintType type)
{
return type == POINT2POINT_CONSTRAINT_TYPE ||
type == HINGE_CONSTRAINT_TYPE ||
type == CONETWIST_CONSTRAINT_TYPE ||
type == D6_CONSTRAINT_TYPE;
}
#endif

View File

@@ -0,0 +1,146 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2007 Starbreeze Studios
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Written by: Marten Svanfeldt
*/
#ifndef SPU_SYNC_H
#define SPU_SYNC_H
#include "PlatformDefinitions.h"
#if defined(WIN32)
#define WIN32_LEAN_AND_MEAN
#ifdef _XBOX
#include <Xtl.h>
#else
#include <Windows.h>
#endif
class btSpinlock
{
public:
//typedef volatile LONG SpinVariable;
typedef CRITICAL_SECTION SpinVariable;
btSpinlock (SpinVariable* var)
: spinVariable (var)
{}
void Init ()
{
//*spinVariable = 0;
InitializeCriticalSection(spinVariable);
}
void Lock ()
{
EnterCriticalSection(spinVariable);
}
void Unlock ()
{
LeaveCriticalSection(spinVariable);
}
private:
SpinVariable* spinVariable;
};
#elif defined (__CELLOS_LV2__)
//#include <cell/atomic.h>
#include <cell/sync/mutex.h>
class btSpinlock
{
public:
typedef CellSyncMutex SpinVariable;
btSpinlock (SpinVariable* var)
: spinVariable (var)
{}
void Init ()
{
#ifndef __SPU__
//*spinVariable = 1;
cellSyncMutexInitialize(spinVariable);
#endif
}
void Lock ()
{
#ifdef __SPU__
// lock semaphore
/*while (cellAtomicTestAndDecr32(atomic_buf, (uint64_t)spinVariable) == 0)
{
};*/
cellSyncMutexLock((uint64_t)spinVariable);
#endif
}
void Unlock ()
{
#ifdef __SPU__
//cellAtomicIncr32(atomic_buf, (uint64_t)spinVariable);
cellSyncMutexUnlock((uint64_t)spinVariable);
#endif
}
private:
SpinVariable* spinVariable;
ATTRIBUTE_ALIGNED128(uint32_t atomic_buf[32]);
};
#else
//create a dummy implementation (without any locking) useful for serial processing
class btSpinlock
{
public:
typedef int SpinVariable;
btSpinlock (SpinVariable* var)
: spinVariable (var)
{}
void Init ()
{
}
void Lock ()
{
}
void Unlock ()
{
}
private:
SpinVariable* spinVariable;
};
#endif
#endif

View File

@@ -0,0 +1,259 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "Win32ThreadSupport.h"
#ifdef USE_WIN32_THREADING
#include <windows.h>
#include "SpuCollisionTaskProcess.h"
#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
///The number of threads should be equal to the number of available cores
///Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
///Setup and initialize SPU/CELL/Libspe2
Win32ThreadSupport::Win32ThreadSupport(const Win32ThreadConstructionInfo & threadConstructionInfo)
{
startThreads(threadConstructionInfo);
}
///cleanup/shutdown Libspe2
Win32ThreadSupport::~Win32ThreadSupport()
{
stopSPU();
}
#include <stdio.h>
DWORD WINAPI Thread_no_1( LPVOID lpParam )
{
Win32ThreadSupport::btSpuStatus* status = (Win32ThreadSupport::btSpuStatus*)lpParam;
while (1)
{
WaitForSingleObject(status->m_eventStartHandle,INFINITE);
void* userPtr = status->m_userPtr;
if (userPtr)
{
btAssert(status->m_status);
status->m_userThreadFunc(userPtr,status->m_lsMemory);
status->m_status = 2;
SetEvent(status->m_eventCompletetHandle);
} else
{
//exit Thread
status->m_status = 3;
SetEvent(status->m_eventCompletetHandle);
printf("Thread with taskId %i with handle %i exiting\n",status->m_taskId, status->m_threadHandle);
break;
}
}
printf("Thread TERMINATED\n");
return 0;
}
///send messages to SPUs
void Win32ThreadSupport::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t taskId)
{
/// gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc);
///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
switch (uiCommand)
{
case CMD_GATHER_AND_PROCESS_PAIRLIST:
{
//#define SINGLE_THREADED 1
#ifdef SINGLE_THREADED
btSpuStatus& spuStatus = m_activeSpuStatus[0];
spuStatus.m_userPtr=(void*)uiArgument0;
spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
HANDLE handle =0;
#else
btSpuStatus& spuStatus = m_activeSpuStatus[taskId];
btAssert(taskId>=0);
btAssert(taskId<m_activeSpuStatus.size());
spuStatus.m_commandId = uiCommand;
spuStatus.m_status = 1;
spuStatus.m_userPtr = (void*)uiArgument0;
///fire event to start new task
SetEvent(spuStatus.m_eventStartHandle);
#endif //CollisionTask_LocalStoreMemory
break;
}
default:
{
///not implemented
btAssert(0);
}
};
}
///check for messages from SPUs
void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
{
///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
btAssert(m_activeSpuStatus.size());
int last = -1;
#ifndef SINGLE_THREADED
DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
btAssert(res != WAIT_FAILED);
last = res - WAIT_OBJECT_0;
btSpuStatus& spuStatus = m_activeSpuStatus[last];
btAssert(spuStatus.m_threadHandle);
btAssert(spuStatus.m_eventCompletetHandle);
//WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
btAssert(spuStatus.m_status > 1);
spuStatus.m_status = 0;
///need to find an active spu
btAssert(last>=0);
#else
last=0;
btSpuStatus& spuStatus = m_activeSpuStatus[last];
#endif //SINGLE_THREADED
*puiArgument0 = spuStatus.m_taskId;
*puiArgument1 = spuStatus.m_status;
}
void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo)
{
m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
m_completeHandles.resize(threadConstructionInfo.m_numThreads);
for (int i=0;i<threadConstructionInfo.m_numThreads;i++)
{
printf("starting thread %d\n",i);
btSpuStatus& spuStatus = m_activeSpuStatus[i];
LPSECURITY_ATTRIBUTES lpThreadAttributes=NULL;
SIZE_T dwStackSize=threadConstructionInfo.m_threadStackSize;
LPTHREAD_START_ROUTINE lpStartAddress=&Thread_no_1;
LPVOID lpParameter=&spuStatus;
DWORD dwCreationFlags=0;
LPDWORD lpThreadId=0;
spuStatus.m_userPtr=0;
sprintf(spuStatus.m_eventStartHandleName,"eventStart%s%d",threadConstructionInfo.m_uniqueName,i);
spuStatus.m_eventStartHandle = CreateEvent(0,false,false,spuStatus.m_eventStartHandleName);
sprintf(spuStatus.m_eventCompletetHandleName,"eventComplete%s%d",threadConstructionInfo.m_uniqueName,i);
spuStatus.m_eventCompletetHandle = CreateEvent(0,false,false,spuStatus.m_eventCompletetHandleName);
m_completeHandles[i] = spuStatus.m_eventCompletetHandle;
HANDLE handle = CreateThread(lpThreadAttributes,dwStackSize,lpStartAddress,lpParameter, dwCreationFlags,lpThreadId);
SetThreadPriority(handle,THREAD_PRIORITY_HIGHEST);
//SetThreadPriority(handle,THREAD_PRIORITY_TIME_CRITICAL);
SetThreadAffinityMask(handle, 1<<i);
spuStatus.m_taskId = i;
spuStatus.m_commandId = 0;
spuStatus.m_status = 0;
spuStatus.m_threadHandle = handle;
spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
printf("started thread %d with threadHandle %d\n",i,handle);
}
}
void Win32ThreadSupport::startSPU()
{
}
///tell the task scheduler we are done with the SPU tasks
void Win32ThreadSupport::stopSPU()
{
int i;
for (i=0;i<m_activeSpuStatus.size();i++)
{
btSpuStatus& spuStatus = m_activeSpuStatus[i];
if (spuStatus.m_status>0)
{
WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
}
spuStatus.m_userPtr = 0;
SetEvent(spuStatus.m_eventStartHandle);
WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
CloseHandle(spuStatus.m_eventCompletetHandle);
CloseHandle(spuStatus.m_eventStartHandle);
CloseHandle(spuStatus.m_threadHandle);
}
m_activeSpuStatus.clear();
m_completeHandles.clear();
}
#endif //USE_WIN32_THREADING

View File

@@ -0,0 +1,121 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btScalar.h"
#include "PlatformDefinitions.h"
#ifdef USE_WIN32_THREADING //platform specific defines are defined in PlatformDefinitions.h
#ifndef WIN32_THREAD_SUPPORT_H
#define WIN32_THREAD_SUPPORT_H
#include "LinearMath/btAlignedObjectArray.h"
#include "btThreadSupportInterface.h"
typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory);
typedef void* (*Win32lsMemorySetupFunc)();
///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
class Win32ThreadSupport : public btThreadSupportInterface
{
public:
///placeholder, until libspe2 support is there
struct btSpuStatus
{
uint32_t m_taskId;
uint32_t m_commandId;
uint32_t m_status;
Win32ThreadFunc m_userThreadFunc;
void* m_userPtr; //for taskDesc etc
void* m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc
void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
void* m_eventStartHandle;
char m_eventStartHandleName[32];
void* m_eventCompletetHandle;
char m_eventCompletetHandleName[32];
};
private:
btAlignedObjectArray<btSpuStatus> m_activeSpuStatus;
btAlignedObjectArray<void*> m_completeHandles;
public:
///Setup and initialize SPU/CELL/Libspe2
struct Win32ThreadConstructionInfo
{
Win32ThreadConstructionInfo(char* uniqueName,
Win32ThreadFunc userThreadFunc,
Win32lsMemorySetupFunc lsMemoryFunc,
int numThreads=1,
int threadStackSize=65535
)
:m_uniqueName(uniqueName),
m_userThreadFunc(userThreadFunc),
m_lsMemoryFunc(lsMemoryFunc),
m_numThreads(numThreads),
m_threadStackSize(threadStackSize)
{
}
char* m_uniqueName;
Win32ThreadFunc m_userThreadFunc;
Win32lsMemorySetupFunc m_lsMemoryFunc;
int m_numThreads;
int m_threadStackSize;
};
Win32ThreadSupport(const Win32ThreadConstructionInfo& threadConstructionInfo);
///cleanup/shutdown Libspe2
virtual ~Win32ThreadSupport();
void startThreads(const Win32ThreadConstructionInfo& threadInfo);
///send messages to SPUs
virtual void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1);
///check for messages from SPUs
virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
virtual void startSPU();
///tell the task scheduler we are done with the SPU tasks
virtual void stopSPU();
};
#endif //WIN32_THREAD_SUPPORT_H
#endif //USE_WIN32_THREADING

View File

@@ -0,0 +1,22 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "btThreadSupportInterface.h"
btThreadSupportInterface::~btThreadSupportInterface()
{
}

View File

@@ -0,0 +1,43 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef THREAD_SUPPORT_INTERFACE_H
#define THREAD_SUPPORT_INTERFACE_H
//#include <LinearMath/btScalar.h> //for uint32_t etc.
#include "PlatformDefinitions.h"
class btThreadSupportInterface
{
public:
virtual ~btThreadSupportInterface();
///send messages to SPUs
virtual void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1) =0;
///check for messages from SPUs
virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0;
///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
virtual void startSPU() =0;
///tell the task scheduler we are done with the SPU tasks
virtual void stopSPU()=0;
};
#endif //THREAD_SUPPORT_INTERFACE_H