delete obsolete SpuRaycast files, attempt to get demos to compile/run under 64bit glut.

2009-09-06 23:01:49 +00:00
parent 86518d9aed
commit 819622fbe8
8 changed files with 27 additions and 1053 deletions
--- a/Demos/AllBulletDemos/Main.cpp
+++ b/Demos/AllBulletDemos/Main.cpp
@@ -378,7 +378,25 @@ void KeyboardSpecialUp(int key, int x, int y)
 }
 void	GlutIdleFunc()
 {
 	int current_window, new_window;
    current_window = glutGetWindow();
    if (GLUI_Master.gluis.first_child() != NULL )
 	{
 		new_window = ((GLUI_Main*)GLUI_Master.gluis.first_child())->getMainWindowId();
 	}
    if ( (new_window > 0) && (new_window != current_window )) 
 	{
 		  //--- Window is changed only if its not already the current window ---
 		glutSetWindow( new_window );
 	}
 	if (demo)
 		demo->moveAndDisplay();
 	glutSetWindow( current_window );
 }
 void KeyboardSpecial(int key, int x, int y)
 {
@@ -443,6 +461,7 @@ int main(int argc, char** argv)
 	GLUI_Master.set_glutReshapeFunc(Resize);  
 	GLUI_Master.set_glutKeyboardFunc(Keyboard);
 	GLUI_Master.set_glutSpecialFunc(KeyboardSpecial);
 	GLUI_Master.set_glutIdleFunc(GlutIdleFunc);
 	GLUI_Master.set_glutSpecialUpFunc(KeyboardSpecialUp);
 	GLUI_Master.set_glutMouseFunc(Mouse);
 	glutMotionFunc(MouseMotion);
--- a/Extras/glui/GL/glui.h
+++ b/Extras/glui/GL/glui.h
@@ -794,6 +794,10 @@ public:
    */
    bool         should_redraw_now(GLUI_Control *ctl);
 	int getMainWindowId()
 	{
 		return main_gfx_window_id;
 	}
    /** Switch to the appropriate draw buffer now.  Returns the old draw buffer. 
       This routine should probably only be called from inside the GLUI_DrawingSentinal,
       in glui_internal_control.h
--- a/Glut/GL/glut.h
+++ b/Glut/GL/glut.h
@@ -51,11 +51,14 @@ typedef unsigned short int wchar_t;
 #pragma comment (lib, "winmm.lib")     /* link with Windows MultiMedia lib */
 #pragma comment (lib, "opengl32.lib")  /* link with Microsoft OpenGL lib */
 #pragma comment (lib, "glu32.lib")     /* link with OpenGL Utility lib */
-#pragma message("Note: including lib: glut32.lib\n")
+
 #ifdef _WIN64
 #pragma message("Note: including lib: glut64.lib\n")
 #pragma comment (lib, "glut64.lib")    /* link with Win32 GLUT lib */
 #else
 #pragma message("Note: including lib: glut32.lib\n")
 #pragma comment (lib, "glut32.lib")    /* link with Win32 GLUT lib */
 asb
 #endif
 #pragma warning (disable:4244)	/* Disable bogus conversion warnings. */
--- a/src/BulletMultiThreaded/SpuIntegrationTask/readme.txt
+++ b/src/BulletMultiThreaded/SpuIntegrationTask/readme.txt
@@ -1 +0,0 @@
 Empty placeholder for future Libspe2 SPU task
--- a/src/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp
+++ b/src/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp
@@ -1,790 +0,0 @@
 #include "../PlatformDefinitions.h"
 #include "SpuRaycastTask.h"
 #include "../SpuCollisionObjectWrapper.h"
 #include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
 #include "SpuSubSimplexConvexCast.h"
 #include "LinearMath/btAabbUtil2.h"
 #include "BulletCollision/CollisionShapes/btTriangleShape.h"
 /* Future optimization strategies: 
 1. BBOX prune before loading shape data
 2. Could reduce number of dmas for ray output data to a single read and write.
   By sharing the temporary work unit output structures across objects.
 3. The reason SpuRaycastNodeCallback1 is slower is because the triangle data isn't
   being cached across calls. Fix that by doing the final ray pruning inside the callback.
 */
 /* Future work:
 1. support first hit, closest hit, etc rather than just closest hit.
 2. support compound objects
 */
 #define CALLBACK_ALL
 struct RaycastTask_LocalStoreMemory
 {
 	ATTRIBUTE_ALIGNED16(char gColObj [sizeof(btCollisionObject)+16]);
 	btCollisionObject* getColObj()
 	{
 		return (btCollisionObject*) gColObj;
 	}
 	ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper);
 	SpuCollisionObjectWrapper* getCollisionObjectWrapper ()
 	{
 		return &gCollisionObjectWrapper;
 	}
 	CollisionShape_LocalStoreMemory gCollisionShape;
 	ATTRIBUTE_ALIGNED16(int	spuIndices[16]);
 	bvhMeshShape_LocalStoreMemory bvhShapeData;
 	SpuConvexPolyhedronVertexData convexVertexData;
 	CompoundShape_LocalStoreMemory compoundShapeData;
 };
 #ifdef WIN32
 void* createRaycastLocalStoreMemory()
 {
 	return new RaycastTask_LocalStoreMemory;
 };
 #elif defined(__CELLOS_LV2__)
 ATTRIBUTE_ALIGNED16(RaycastTask_LocalStoreMemory gLocalStoreMemory);
 void* createRaycastLocalStoreMemory()
 {
 	return &gLocalStoreMemory;
 }
 #endif
 void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper)
 {
 	register int dmaSize;
 	register ppu_address_t	dmaPpuAddress2;
 	/* DMA Collision object wrapper into local store */
 	dmaSize = sizeof(SpuCollisionObjectWrapper);
 	dmaPpuAddress2 = objectWrapper;
 	cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
 	cellDmaWaitTagStatusAll(DMA_MASK(1));
 	/* DMA Collision object into local store */
 	dmaSize = sizeof(btCollisionObject);
 	dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr();
 	cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);
 	cellDmaWaitTagStatusAll(DMA_MASK(2));
 	/* Gather information about collision object and shape */
 	gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform();
 	gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin ();
 	gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType ();
 	gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape();
 	gatheredObjectData->m_spuCollisionShape = (btConvexShape*)&lsMemPtr->gCollisionShape.collisionShape;
 	/* DMA shape data */
 	dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType);
 	cellDmaWaitTagStatusAll(DMA_MASK(1));
 	if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType))
 	{
 		btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape;
 		gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions ();
 	} else {
 		gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0);
 	}
 }
 void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
 {
 	cellDmaGet(rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
 }
 void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag)
 {
 	cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0);
 }
 #if 0
 SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
 {
 #if USE_SOFTWARE_CACHE
 	// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
 	// so the first and last bytes should fall on the same cache line
 	btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
 	void* ls = spe_cache_read(ea);
 	memcpy(buffer, ls, size);
 #else
 	stallingUnalignedDmaSmallGet(buffer,ea,size);
 #endif
 }
 #endif
 void small_cache_read_triple(	void* ls0, ppu_address_t ea0,
 												void* ls1, ppu_address_t ea1,
 												void* ls2, ppu_address_t ea2,
 												size_t size)
 {
 		btAssert(size<16);
 		ATTRIBUTE_ALIGNED16(char	tmpBuffer0[32]);
 		ATTRIBUTE_ALIGNED16(char	tmpBuffer1[32]);
 		ATTRIBUTE_ALIGNED16(char	tmpBuffer2[32]);
 		uint32_t i;
 		///make sure last 4 bits are the same, for cellDmaSmallGet
 		char* localStore0 = (char*)ls0;
 		uint32_t last4BitsOffset = ea0 & 0x0f;
 		char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
 		tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
 		char* localStore1 = (char*)ls1;
 		last4BitsOffset = ea1 & 0x0f;
 		char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
 		tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
 		char* localStore2 = (char*)ls2;
 		last4BitsOffset = ea2 & 0x0f;
 		char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
 		tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
 		cellDmaWaitTagStatusAll( DMA_MASK(1) );
 		//this is slowish, perhaps memcpy on SPU is smarter?
 		for (i=0; btLikely( i<size );i++)
 		{
 			localStore0[i] = tmpTarget0[i];
 			localStore1[i] = tmpTarget1[i];
 			localStore2[i] = tmpTarget2[i];
 		}
 }
 void performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr);
 class spuRaycastNodeCallback1 : public btNodeOverlapCallback
 {
 	RaycastGatheredObjectData* m_gatheredObjectData;
 	const SpuRaycastTaskWorkUnit* m_workUnits;
 	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
 	int m_workUnit;
 	RaycastTask_LocalStoreMemory* m_lsMemPtr;
 	ATTRIBUTE_ALIGNED16(btVector3	spuTriangleVertices[3]);
 	ATTRIBUTE_ALIGNED16(btScalar	spuUnscaledVertex[4]);
 	//ATTRIBUTE_ALIGNED16(int	spuIndices[16]);
 public:
 	spuRaycastNodeCallback1(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, RaycastTask_LocalStoreMemory* lsMemPtr)
 		: m_gatheredObjectData(gatheredObjectData),
 		  m_workUnits(workUnits),
 		  m_workUnitsOut(workUnitsOut),
 		  m_workUnit(0),
 		  m_lsMemPtr (lsMemPtr)
 	{
 	}
 	void setWorkUnit (int workUnit) { m_workUnit = workUnit; }
 	virtual void processNode(int subPart, int triangleIndex)
 	{
 		///Create a triangle on the stack, call process collision, with GJK
 		///DMA the vertices, can benefit from software caching
 		//		spu_printf("processNode with triangleIndex %d\n",triangleIndex);
 			// ugly solution to support both 16bit and 32bit indices
 		if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
 		{
 			short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
 			ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
 			small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
 									&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
 									&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
 									sizeof(short int));
 			m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
 			m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
 			m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
 		} else
 		{
 			int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
 			small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
 								&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
 								&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
 								sizeof(int));
 		}
 		//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
 		//		spu_printf("SPU index0=%d ,",spuIndices[0]);
 		//		spu_printf("SPU index1=%d ,",spuIndices[1]);
 		//		spu_printf("SPU index2=%d ,",spuIndices[2]);
 		//		spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
 		const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
 		for (int j=2;btLikely( j>=0 );j--)
 		{
 			int graphicsindex = m_lsMemPtr->spuIndices[j];
 						//spu_printf("SPU index=%d ,",graphicsindex);
 			btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
 			//			spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
 			///handle un-aligned vertices...
 			//another DMA for each vertex
 			small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
 									&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
 									&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
 									sizeof(btScalar));
 			//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
 			spuTriangleVertices[j] = btVector3(
 				spuUnscaledVertex[0]*meshScaling.getX(),
 				spuUnscaledVertex[1]*meshScaling.getY(),
 				spuUnscaledVertex[2]*meshScaling.getZ());
 				//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
 		}
 		RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
 		triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
 		btTriangleShape triangle(spuTriangleVertices[0],spuTriangleVertices[1],spuTriangleVertices[2]);
 		triangleGatheredObjectData.m_spuCollisionShape = &triangle;
 		//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
 		//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
 		//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
 		SpuRaycastTaskWorkUnitOut out;
 		out.hitFraction = 1.0;
 		performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[m_workUnit], &out, m_lsMemPtr);
 		/* XXX: For now only take the closest hit */
 		if (out.hitFraction < m_workUnitsOut[m_workUnit].hitFraction)
 		{
 			m_workUnitsOut[m_workUnit].hitFraction = out.hitFraction;
 			m_workUnitsOut[m_workUnit].hitNormal = out.hitNormal;
 		}
 	}
 };
 class spuRaycastNodeCallback : public btNodeOverlapCallback
 {
 	RaycastGatheredObjectData* m_gatheredObjectData;
 	const SpuRaycastTaskWorkUnit* m_workUnits;
 	SpuRaycastTaskWorkUnitOut* m_workUnitsOut;
 	int m_numWorkUnits;
 	RaycastTask_LocalStoreMemory* m_lsMemPtr;
 	ATTRIBUTE_ALIGNED16(btVector3	spuTriangleVertices[3]);
 	ATTRIBUTE_ALIGNED16(btScalar	spuUnscaledVertex[4]);
 	//ATTRIBUTE_ALIGNED16(int	spuIndices[16]);
 public:
 	spuRaycastNodeCallback(RaycastGatheredObjectData* gatheredObjectData,const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
 		: m_gatheredObjectData(gatheredObjectData),
 		  m_workUnits(workUnits),
 		  m_workUnitsOut(workUnitsOut),
 		  m_numWorkUnits(numWorkUnits),
 		  m_lsMemPtr (lsMemPtr)
 	{
 	}
 	virtual void processNode(int subPart, int triangleIndex)
 	{
 		///Create a triangle on the stack, call process collision, with GJK
 		///DMA the vertices, can benefit from software caching
 		//		spu_printf("processNode with triangleIndex %d\n",triangleIndex);
 			// ugly solution to support both 16bit and 32bit indices
 		if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
 		{
 			short int* indexBasePtr = (short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
 			ATTRIBUTE_ALIGNED16(short int tmpIndices[3]);
 			small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
 									&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
 									&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
 									sizeof(short int));
 			m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
 			m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
 			m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
 		} else
 		{
 			int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
 			small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
 								&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
 								&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
 								sizeof(int));
 		}
 		//printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]);
 		//		spu_printf("SPU index0=%d ,",spuIndices[0]);
 		//		spu_printf("SPU index1=%d ,",spuIndices[1]);
 		//		spu_printf("SPU index2=%d ,",spuIndices[2]);
 		//		spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
 		const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
 		for (int j=2;btLikely( j>=0 );j--)
 		{
 			int graphicsindex = m_lsMemPtr->spuIndices[j];
 						//spu_printf("SPU index=%d ,",graphicsindex);
 			btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
 			//			spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
 			///handle un-aligned vertices...
 			//another DMA for each vertex
 			small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
 									&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
 									&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
 									sizeof(btScalar));
 			//printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]);
 			spuTriangleVertices[j] = btVector3(
 				spuUnscaledVertex[0]*meshScaling.getX(),
 				spuUnscaledVertex[1]*meshScaling.getY(),
 				spuUnscaledVertex[2]*meshScaling.getZ());
 				//spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
 		}
 		RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData);
 		triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
 		btTriangleShape triangle(spuTriangleVertices[0],spuTriangleVertices[1],spuTriangleVertices[2]);
 		triangleGatheredObjectData.m_spuCollisionShape = &triangle;
 		//printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]);
 		//printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]);
 		//printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]);
 		for (int i = 0; i < m_numWorkUnits; i++)
 		{
 			SpuRaycastTaskWorkUnitOut out;
 			out.hitFraction = 1.0;
 			performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnits[i], &out, m_lsMemPtr);
 			/* XXX: For now only take the closest hit */
 			if (out.hitFraction < m_workUnitsOut[i].hitFraction)
 			{
 				m_workUnitsOut[i].hitFraction = out.hitFraction;
 				m_workUnitsOut[i].hitNormal = out.hitNormal;
 			}
 		}
 	}
 };
 void	spuWalkStacklessQuantizedTreeAgainstRays(RaycastTask_LocalStoreMemory* lsMemPtr, 
 						 btNodeOverlapCallback* nodeCallback,
 						 const btVector3* rayFrom,
 						 const btVector3* rayTo,
 						 int numWorkUnits,
 						 unsigned short int* quantizedQueryAabbMin,
 						 unsigned short int* quantizedQueryAabbMax,
 						 const btQuantizedBvhNode* rootNode,
 						 int startNodeIndex,int endNodeIndex)
 {
 	int curIndex = startNodeIndex;
 	int walkIterations = 0;
 	//int subTreeSize = endNodeIndex - startNodeIndex;
 	int escapeIndex;
 	unsigned int boxBoxOverlap, rayBoxOverlap, anyRayBoxOverlap;
 	unsigned int isLeafNode;
 #define RAYAABB2
 #ifdef RAYAABB2
 	unsigned int sign[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
 	btVector3 rayInvDirection[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 	btScalar lambda_max[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 	for (int i = 0; i < numWorkUnits; i++)
 	{
 		btVector3 rayDirection = (rayTo[i]-rayFrom[i]);
 		rayDirection.normalize ();
 		lambda_max[i] = rayDirection.dot(rayTo[i]-rayFrom[i]);
 		rayInvDirection[i][0] = btScalar(1.0) / rayDirection[0];
 		rayInvDirection[i][1] = btScalar(1.0) / rayDirection[1];
 		rayInvDirection[i][2] = btScalar(1.0) / rayDirection[2];
 		sign[i][0] = rayDirection[0] < 0.0;
 		sign[i][1] = rayDirection[1] < 0.0;
 		sign[i][2] = rayDirection[2] < 0.0;
 	}
 #endif
 	while (curIndex < endNodeIndex)
 	{
 		//catch bugs in tree data
 //		btAssert (walkIterations < subTreeSize);
 		walkIterations++;
 		isLeafNode = rootNode->isLeafNode();
 		anyRayBoxOverlap = 0;
 		for (int i = 0; i < numWorkUnits; i++)
 		{
 			unsigned short int* quamin = (quantizedQueryAabbMin + 3 * i);
 			unsigned short int* quamax = (quantizedQueryAabbMax + 3 * i);
 			boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quamin,quamax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
 			if (!boxBoxOverlap)
 				continue;
 			rayBoxOverlap = 0;
 			btScalar param = 1.0;
 			btVector3 normal;
 			btVector3 bounds[2];
 			bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin);
 			bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax);
 #ifdef RAYAABB2
 			rayBoxOverlap = btRayAabb2 (rayFrom[i], rayInvDirection[i], sign[i], bounds, param, 0.0, lambda_max[i]);
 #else
 			rayBoxOverlap = btRayAabb(rayFrom[i], rayTo[i], bounds[0], bounds[1], param, normal);
 #endif
 #ifndef CALLBACK_ALL
 			anyRayBoxOverlap = rayBoxOverlap || anyRayBoxOverlap;
 			/* If we have any ray vs. box overlap and this isn't a leaf node
 			   we know that we need to dig deeper
 			*/
 			if (!isLeafNode && anyRayBoxOverlap)
 				break;
 			if (isLeafNode && rayBoxOverlap)
 			{
 				spuRaycastNodeCallback1* callback = (spuRaycastNodeCallback1*)nodeCallback;
 				callback->setWorkUnit (i);
 				nodeCallback->processNode (0, rootNode->getTriangleIndex());
 			}
 #else
 			/* If we have any ray vs. box overlap and this isn't a leaf node
 			   we know that we need to dig deeper
 			*/
 			if (rayBoxOverlap)
 			{
 				anyRayBoxOverlap = 1;
 				break;
 			}
 #endif
 		}
 #ifdef CALLBACK_ALL
 		if (isLeafNode && anyRayBoxOverlap)
 		{
 			nodeCallback->processNode (0, rootNode->getTriangleIndex());
 		}
 #endif
 		if (anyRayBoxOverlap || isLeafNode)
 		{
 			rootNode++;
 			curIndex++;
 		} else
 		{
 			escapeIndex = rootNode->getEscapeIndex();
 			rootNode += escapeIndex;
 			curIndex += escapeIndex;
 		}
 	}
 }
 void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit* workUnits, SpuRaycastTaskWorkUnitOut* workUnitsOut, int numWorkUnits, RaycastTask_LocalStoreMemory* lsMemPtr)
 {
 	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
 //	register int dmaSize;
 //	register ppu_address_t	dmaPpuAddress2;
 	btBvhTriangleMeshShape*	trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape;
 	//need the mesh interface, for access to triangle vertices
 	dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape);
 	unsigned short int quantizedQueryAabbMin[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
 	unsigned short int quantizedQueryAabbMax[SPU_RAYCAST_WORK_UNITS_PER_TASK][3];
 	btVector3 rayFromInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 	btVector3 rayToInTriangleSpace[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 	/* Calculate the AABB for the ray in the triangle mesh shape */
 	btTransform rayInTriangleSpace;
 	rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse();
 	for (int i = 0; i < numWorkUnits; i++)
 	{
 		btVector3 aabbMin;
 		btVector3 aabbMax;
 		rayFromInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayFrom);
 		rayToInTriangleSpace[i] = rayInTriangleSpace(workUnits[i].rayTo);
 		aabbMin = rayFromInTriangleSpace[i];
 		aabbMin.setMin (rayToInTriangleSpace[i]);
 		aabbMax = rayFromInTriangleSpace[i];
 		aabbMax.setMax (rayToInTriangleSpace[i]);
 		lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin[i],aabbMin,0);
 		lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax[i],aabbMax,1);
 	}
 	QuantizedNodeArray&	nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
 	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
 	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();	
 #ifdef CALLBACK_ALL
 	spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnits, workUnitsOut, numWorkUnits, lsMemPtr);
 #else
 	spuRaycastNodeCallback1 nodeCallback (gatheredObjectData, workUnits, workUnitsOut, lsMemPtr);
 #endif
 	IndexedMeshArray&	indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
 	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
 	//	spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
 	//not likely to happen
 	if (subTrees.size() && indexArray.size() == 1)
 	{
 		///DMA in the index info
 		dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
 		cellDmaWaitTagStatusAll(DMA_MASK(1));
 		//display the headers
 		int numBatch = subTrees.size();
 		for (int i=0;i<numBatch;)
 		{
 // BEN: TODO - can reorder DMA transfers for less stall
 			int remaining = subTrees.size() - i;
 			int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
 			dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
 			cellDmaWaitTagStatusAll(DMA_MASK(1));
 			//			spu_printf("nextBatch = %d\n",nextBatch);
 			for (int j=0;j<nextBatch;j++)
 			{
 				const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
 				unsigned int overlap = 1;
 				for (int boxId = 0; boxId < numWorkUnits; boxId++)
 				{
 					overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin[boxId],quantizedQueryAabbMax[boxId],subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
 					if (overlap)
 						break;
 				}
 				if (overlap)
 				{
 					btAssert(subtree.m_subtreeSize);
 					//dma the actual nodes of this subtree
 					dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
 					cellDmaWaitTagStatusAll(DMA_MASK(2));
 					/* Walk this subtree */
 					{
 						spuWalkStacklessQuantizedTreeAgainstRays(lsMemPtr,
 										        &nodeCallback,
 										        &rayFromInTriangleSpace[0],
 											&rayToInTriangleSpace[0],
 											numWorkUnits,
 											&quantizedQueryAabbMin[0][0],&quantizedQueryAabbMax[0][0],
 											&lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize);
 					}
 				}
 				//				spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
 			}
 			//	unsigned short int	m_quantizedAabbMin[3];
 			//	unsigned short int	m_quantizedAabbMax[3];
 			//	int			m_rootNodeIndex;
 			//	int			m_subtreeSize;
 			i+=nextBatch;
 		}
 		//pre-fetch first tree, then loop and double buffer
 	}
 }
 void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
 {
 	//XXX spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n");
 }
 void
 performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr)
 {
 	btVoronoiSimplexSolver simplexSolver;
 	btTransform rayFromTrans, rayToTrans;
 	rayFromTrans.setIdentity ();
 	rayFromTrans.setOrigin (workUnit.rayFrom);
 	rayToTrans.setIdentity ();
 	rayToTrans.setOrigin (workUnit.rayTo);
 	SpuCastResult result;
 	/* Load the vertex data if the shape is a convex hull */
 	/* XXX: We might be loading the shape twice */
 	ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]);
 	if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE)
 	{
 		register int dmaSize;
 		register ppu_address_t	dmaPpuAddress2;
 		dmaSize = sizeof(btConvexHullShape);
 		dmaPpuAddress2 = gatheredObjectData->m_collisionShape;
 		cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0);
 		cellDmaWaitTagStatusAll(DMA_MASK(1));
 		dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape);
 		cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2!
 		lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape;
 		lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0];
 	}
 	/* performRaycast */
 	SpuSubsimplexRayCast caster ((btConvexShape*)gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, gatheredObjectData->m_collisionMargin, &simplexSolver);
 	bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result);
 	if (r)
 	{
 		workUnitOut->hitFraction = result.m_fraction;
 		workUnitOut->hitNormal = result.m_normal;
 	}
 }
 void	processRaycastTask(void* userPtr, void* lsMemory)
 {
 	RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory;
 	SpuRaycastTaskDesc* taskDescPtr = (SpuRaycastTaskDesc*)userPtr;
 	SpuRaycastTaskDesc& taskDesc = *taskDescPtr;
 	SpuCollisionObjectWrapper* cows = (SpuCollisionObjectWrapper*)taskDesc.spuCollisionObjectsWrappers;
 	//spu_printf("in processRaycastTask %d\n", taskDesc.numSpuCollisionObjectWrappers);
 	/* for each object */
 	RaycastGatheredObjectData gatheredObjectData;
 	for (unsigned int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++)
 	{
 		//spu_printf("%d / %d\n", objectId, taskDesc.numSpuCollisionObjectWrappers);
 		/* load initial collision shape */
 		GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]);
 		if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType))
 		{
 			SpuRaycastTaskWorkUnitOut tWorkUnitsOut[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 			unsigned int rayId ;
 			for (rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
 			{
 				tWorkUnitsOut[rayId].hitFraction = 1.0;
 			}
 			performRaycastAgainstConcave (&gatheredObjectData, &taskDesc.workUnits[0], &tWorkUnitsOut[0], taskDesc.numWorkUnits, localMemory);
 			for (rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
 			{
 				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
 				if (tWorkUnitsOut[rayId].hitFraction == 1.0)
 					continue;
 				ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
 				dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 				cellDmaWaitTagStatusAll(DMA_MASK(1));
 				/* XXX Only support taking the closest hit for now */
 				if (tWorkUnitsOut[rayId].hitFraction < workUnitOut.hitFraction)
 				{
 					workUnitOut.hitFraction = tWorkUnitsOut[rayId].hitFraction;
 					workUnitOut.hitNormal = tWorkUnitsOut[rayId].hitNormal;
 				}
 				/* write ray cast data back */
 				dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 				cellDmaWaitTagStatusAll(DMA_MASK(1));
 			}
 		} else if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) {
 			btVector3 objectBoxMin, objectBoxMax;
 			computeAabb (objectBoxMin, objectBoxMax, (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, gatheredObjectData.m_shapeType, gatheredObjectData.m_worldTransform);
 			for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
 			{
 				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
 				btScalar ignored_param = 1.0;
 				btVector3 ignored_normal;
 				if (btRayAabb(workUnit.rayFrom, workUnit.rayTo, objectBoxMin, objectBoxMax, ignored_param, ignored_normal))
 				{
 					ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
 					SpuRaycastTaskWorkUnitOut tWorkUnitOut;
 					tWorkUnitOut.hitFraction = 1.0;
 					performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
 					if (tWorkUnitOut.hitFraction == 1.0)
 						continue;
 					dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 					cellDmaWaitTagStatusAll(DMA_MASK(1));
 					/* XXX Only support taking the closest hit for now */
 					if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
 					{
 						workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
 						workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
 						/* write ray cast data back */
 						dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 						cellDmaWaitTagStatusAll(DMA_MASK(1));
 					}
 				}
 			}
 		} else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) {
 			for (unsigned int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++)
 			{
 				const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId];
 				ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut);
 				SpuRaycastTaskWorkUnitOut tWorkUnitOut;
 				tWorkUnitOut.hitFraction = 1.0;
 				performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory);
 				if (tWorkUnitOut.hitFraction == 1.0)
 					continue;
 				dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 				cellDmaWaitTagStatusAll(DMA_MASK(1));
 				/* XXX Only support taking the closest hit for now */
 				if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction)
 				{
 					workUnitOut.hitFraction = tWorkUnitOut.hitFraction;
 					workUnitOut.hitNormal = tWorkUnitOut.hitNormal;
 				}
 				/* write ray cast data back */
 				dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1);
 				cellDmaWaitTagStatusAll(DMA_MASK(1));
 			}
 		}
 	}
 }
--- a/src/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h
+++ b/src/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h
@@ -1,50 +0,0 @@
 #ifndef __SPU_RAYCAST_TASK_H
 #define __SPU_RAYCAST_TASK_H
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
 #include "LinearMath/btVector3.h"
 #include "../PlatformDefinitions.h"
 ATTRIBUTE_ALIGNED16(struct) RaycastGatheredObjectData
 {
 	ppu_address_t m_collisionShape;
 	btCollisionShape* m_spuCollisionShape;
 	btVector3	m_primitiveDimensions;
 	int		m_shapeType;
 	float	m_collisionMargin;
 	btTransform	m_worldTransform;
 };
 ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnitOut
 {
 	btVector3 hitNormal; /* out */
 	btScalar hitFraction; /* out */
 	btCollisionWorld::LocalShapeInfo shapeInfo; /* out */
 };
 /* Perform a raycast on collision object */
 ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnit
 {
 	btVector3 rayFrom; /* in */
 	btVector3 rayTo; /* in */
 	SpuRaycastTaskWorkUnitOut* output; /* out */
 };
 #define SPU_RAYCAST_WORK_UNITS_PER_TASK 16
 ATTRIBUTE_ALIGNED128(struct) SpuRaycastTaskDesc
 {
 	SpuRaycastTaskWorkUnit workUnits[SPU_RAYCAST_WORK_UNITS_PER_TASK];
 	unsigned int numWorkUnits;
 	void* spuCollisionObjectsWrappers;
 	unsigned int numSpuCollisionObjectWrappers;
 	int taskId;
 };
 void	processRaycastTask (void* userPtr, void* lsMemory);
 void*	createRaycastLocalStoreMemory ();
 #endif
--- a/src/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp
+++ b/src/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp
@@ -1,151 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #include "SpuSubSimplexConvexCast.h"
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 #include "BulletCollision/CollisionShapes/btMinkowskiSumShape.h"
 #include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
 SpuSubsimplexRayCast::SpuSubsimplexRayCast (btConvexShape* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB,
 										    btVoronoiSimplexSolver* simplexSolver)
 	:m_simplexSolver(simplexSolver), m_shapeB(shapeB), m_convexDataB(convexDataB), m_shapeTypeB(shapeTypeB), m_marginB(marginB)
 {
 }
 ///Typically the conservative advancement reaches solution in a few iterations, clip it to 32 for degenerate cases.
 ///See discussion about this here http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=565
 #ifdef BT_USE_DOUBLE_PRECISION
 #define MAX_ITERATIONS 64
 #else
 #define MAX_ITERATIONS 32
 #endif
 /* Returns the support point of the minkowski sum:
 * MSUM(Pellet, ConvexShape)
 *
 */
 void supportPoints (const btTransform& xformRay,
 		    const btTransform& xformB,
 		    const int shapeType,
 		    const btConvexShape* shape,
 		    const btScalar marginB,
 		    const btVector3& seperatingAxis,
 		    btVector3& w,
 		    btVector3& supVertexRay,
 		    btVector3& supVertexB)
 {
 	btVector3 saUnit = seperatingAxis;
 	saUnit.normalize();
 	btVector3 SupportPellet = xformRay(0.0001 * -saUnit);
 	btVector3 rotatedSeperatingAxis = seperatingAxis * xformB.getBasis();
 	btVector3 SupportShape = shape->localGetSupportVertexWithoutMarginNonVirtual(rotatedSeperatingAxis);
 	SupportShape += saUnit * marginB;
 	w = SupportPellet - SupportShape;
 	supVertexRay = SupportPellet;
 	supVertexB = SupportShape;
 }
 bool	SpuSubsimplexRayCast::calcTimeOfImpact(const btTransform& fromRay,
 											   const btTransform& toRay,
 											   const btTransform& fromB,
 											   const btTransform& toB,
 											   SpuCastResult& result)
 {
 	m_simplexSolver->reset();
 	btVector3 linVelRay, linVelB;
 	linVelRay = toRay.getOrigin() - fromRay.getOrigin();
 	linVelB = toB.getOrigin() - fromB.getOrigin ();
 	btScalar lambda = btScalar(0.);
 	btTransform interpolatedTransRay = fromRay;
 	btTransform interpolatedTransB = fromB;
 	btVector3 r = (linVelRay-linVelB);
 	btVector3 supVertexRay;
 	btVector3 supVertexB;
 	btVector3 v;
 	supportPoints (fromRay, fromB, m_shapeTypeB, m_shapeB, m_marginB, r, v, supVertexRay, supVertexB);
 	btVector3 n;
 	n.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
 	bool hasResult = false;
 	btVector3 c;
 	int maxIter = MAX_ITERATIONS;
 	btScalar lastLambda = lambda;
 	btScalar dist2 = v.length2();
 #ifdef BT_USE_DOUBLE_PRECISION
 	btScalar epsilon = btScalar(0.0001);
 #else
 	btScalar epsilon = btScalar(0.0001);
 #endif //BT_USE_DOUBLE_PRECISION
 	btVector3 w,p;
 	btScalar VdotR;
 	while ( (dist2 > epsilon) && maxIter--)
 	{
 		supportPoints (interpolatedTransRay, interpolatedTransB, m_shapeTypeB, m_shapeB,  m_marginB, v, w, supVertexRay, supVertexB);
 		btScalar VdotW = v.dot(w);
 		if (lambda > btScalar(1.0))
 		{
 			return false;
 		}
 		if ( VdotW > btScalar(0.))
 		{
 			VdotR = v.dot(r);
 			if (VdotR >= -(SIMD_EPSILON*SIMD_EPSILON))
 				return false;
 			else
 			{
 				lambda = lambda - VdotW / VdotR;
 				interpolatedTransRay.getOrigin().setInterpolate3(fromRay.getOrigin(), toRay.getOrigin(), lambda);
 				interpolatedTransB.getOrigin().setInterpolate3(fromB.getOrigin(), toB.getOrigin(), lambda);
 				lastLambda = lambda;
 				n = v;
 				hasResult = true;
 			}
 		} 
 		m_simplexSolver->addVertex(w, supVertexRay, supVertexB);
 		if (m_simplexSolver->closest(v))
 		{
 			dist2 = v.length2();
 			hasResult = true;
 			//printf("V=%f , %f, %f\n",v[0],v[1],v[2]);
 			//printf("DIST2=%f\n",dist2);
 			//printf("numverts = %i\n",m_simplexSolver->numVertices());
 		} else
 		{
 			dist2 = btScalar(0.);
 		} 
 	}
 	result.m_fraction = lambda;
 	result.m_normal = n;
 	btVector3 hitRay, hitB;
 	m_simplexSolver->compute_points (hitRay, hitB);
 	/* TODO: We could output hit point here (hitB) */
 	return true;
 }
--- a/src/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h
+++ b/src/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h
@@ -1,60 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef SPU_SUBSIMPLEX_RAY_CAST_H
 #define SPU_SUBSIMPLEX_RAY_CAST_H
 #include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
 #include "../SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
 #include "SpuRaycastTask.h"
 class btConvexShape;
 struct SpuCastResult
 {
 	float m_fraction;
 	btVector3 m_normal;
 };
 /// btSubsimplexConvexCast implements Gino van den Bergens' paper
 ///"Ray Casting against bteral Convex Objects with Application to Continuous Collision Detection"
 /// GJK based Ray Cast, optimized version
 /// Objects should not start in overlap, otherwise results are not defined.
 class SpuSubsimplexRayCast
 {
 	btVoronoiSimplexSolver* m_simplexSolver;
 	btConvexShape* m_shapeB;
 	SpuConvexPolyhedronVertexData* m_convexDataB;
 	int m_shapeTypeB;
 	float m_marginB;
 public:
 	SpuSubsimplexRayCast (btConvexShape* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB,
 						  btVoronoiSimplexSolver* simplexSolver);
 	//virtual ~btSubsimplexConvexCast();
 	///SimsimplexConvexCast calculateTimeOfImpact calculates the time of impact+normal for the linear cast (sweep) between two moving objects.
 	///Precondition is that objects should not penetration/overlap at the start from the interval. Overlap can be tested using btGjkPairDetector.
 	bool calcTimeOfImpact(const btTransform& fromRay,
 						  const btTransform& toRay,
 						  const btTransform& fromB,
 						  const btTransform& toB,
 						  SpuCastResult& result);
 };
 #endif //SUBSIMPLEX_RAY_CAST_H
		`@@ -1 +0,0 @@`
			`Empty placeholder for future Libspe2 SPU task`