fix: some file didn't have the svn:eol-style native yet

2010-03-06 15:23:36 +00:00
parent 4fd48ac691
commit 81f04a4d48
641 changed files with 301123 additions and 301123 deletions
--- a/src/BulletMultiThreaded/MiniCL.cpp
+++ b/src/BulletMultiThreaded/MiniCL.cpp
--- a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
+++ b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
@@ -1,74 +1,74 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-
-#include "MiniCLTask.h"
-#include "BulletMultiThreaded/PlatformDefinitions.h"
-#include "BulletMultiThreaded/SpuFakeDma.h"
-#include "LinearMath/btMinMax.h"
-#include "MiniCLTask.h"
-#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
-
-
-#ifdef __SPU__
-#include <spu_printf.h>
-#else
-#include <stdio.h>
-#define spu_printf printf
-#endif
-
-int gMiniCLNumOutstandingTasks = 0;
-
-struct MiniCLTask_LocalStoreMemory
-{
-	
-};
-
-
-//-- MAIN METHOD
-void processMiniCLTask(void* userPtr, void* lsMemory)
-{
-	//	BT_PROFILE("processSampleTask");
-
-	MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
-
-	MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
-	MiniCLTaskDesc& taskDesc = *taskDescPtr;
-
-	for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
-	{
-		taskDesc.m_kernel->m_launcher(&taskDesc, i);
-	}
-
-//	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
-	
-}
-
-
-#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
-
-ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory	gLocalStoreMemory);
-
-void* createMiniCLLocalStoreMemory()
-{
-	return &gLocalStoreMemory;
-}
-#else
-void* createMiniCLLocalStoreMemory()
-{
-	return new MiniCLTask_LocalStoreMemory;
-};
-
-#endif
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCLTask.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "BulletMultiThreaded/SpuFakeDma.h"
+#include "LinearMath/btMinMax.h"
+#include "MiniCLTask.h"
+#include "BulletMultiThreaded/MiniCLTaskScheduler.h"
+
+
+#ifdef __SPU__
+#include <spu_printf.h>
+#else
+#include <stdio.h>
+#define spu_printf printf
+#endif
+
+int gMiniCLNumOutstandingTasks = 0;
+
+struct MiniCLTask_LocalStoreMemory
+{
+	
+};
+
+
+//-- MAIN METHOD
+void processMiniCLTask(void* userPtr, void* lsMemory)
+{
+	//	BT_PROFILE("processSampleTask");
+
+	MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
+
+	MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
+	MiniCLTaskDesc& taskDesc = *taskDescPtr;
+
+	for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
+	{
+		taskDesc.m_kernel->m_launcher(&taskDesc, i);
+	}
+
+//	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
+	
+}
+
+
+#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory	gLocalStoreMemory);
+
+void* createMiniCLLocalStoreMemory()
+{
+	return &gLocalStoreMemory;
+}
+#else
+void* createMiniCLLocalStoreMemory()
+{
+	return new MiniCLTask_LocalStoreMemory;
+};
+
+#endif
--- a/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
+++ b/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
@@ -1,62 +1,62 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef MINICL__TASK_H
-#define MINICL__TASK_H
-
-#include "BulletMultiThreaded/PlatformDefinitions.h"
-#include "LinearMath/btScalar.h"
-
-#include "LinearMath/btAlignedAllocator.h"
-
-
-#define MINICL_MAX_ARGLENGTH (sizeof(void*))
-#define MINI_CL_MAX_ARG 16
-#define MINI_CL_MAX_KERNEL_NAME 256
-
-struct MiniCLKernel;
-
-ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
-{
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	MiniCLTaskDesc()
-	{
-		for (int i=0;i<MINI_CL_MAX_ARG;i++)
-		{
-			m_argSizes[i]=0;
-		}
-	}
-
-	uint32_t		m_taskId;
-
-	uint32_t		m_firstWorkUnit;
-	uint32_t		m_lastWorkUnit;
-
-	MiniCLKernel*	m_kernel;
-
-	void*			m_argData[MINI_CL_MAX_ARG];
-	int				m_argSizes[MINI_CL_MAX_ARG];
-};
-
-extern "C" int gMiniCLNumOutstandingTasks;
-
-
-void	processMiniCLTask(void* userPtr, void* lsMemory);
-void*	createMiniCLLocalStoreMemory();
-
-
-#endif //MINICL__TASK_H
-
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef MINICL__TASK_H
+#define MINICL__TASK_H
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+
+#define MINICL_MAX_ARGLENGTH (sizeof(void*))
+#define MINI_CL_MAX_ARG 16
+#define MINI_CL_MAX_KERNEL_NAME 256
+
+struct MiniCLKernel;
+
+ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	MiniCLTaskDesc()
+	{
+		for (int i=0;i<MINI_CL_MAX_ARG;i++)
+		{
+			m_argSizes[i]=0;
+		}
+	}
+
+	uint32_t		m_taskId;
+
+	uint32_t		m_firstWorkUnit;
+	uint32_t		m_lastWorkUnit;
+
+	MiniCLKernel*	m_kernel;
+
+	void*			m_argData[MINI_CL_MAX_ARG];
+	int				m_argSizes[MINI_CL_MAX_ARG];
+};
+
+extern "C" int gMiniCLNumOutstandingTasks;
+
+
+void	processMiniCLTask(void* userPtr, void* lsMemory);
+void*	createMiniCLLocalStoreMemory();
+
+
+#endif //MINICL__TASK_H
+
--- a/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp
+++ b/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp
--- a/src/BulletMultiThreaded/SpuFakeDma.h
+++ b/src/BulletMultiThreaded/SpuFakeDma.h
@@ -1,135 +1,135 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef FAKE_DMA_H
-#define FAKE_DMA_H
-
-
-#include "PlatformDefinitions.h"
-#include "LinearMath/btScalar.h"
-
-
-#ifdef __SPU__
-
-#ifndef USE_LIBSPE2
-
-#include <cell/dma.h>
-#include <stdint.h>
-
-#define DMA_TAG(xfer) (xfer + 1)
-#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
-
-#else // !USE_LIBSPE2
-
-#define DMA_TAG(xfer) (xfer + 1)
-#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
-		
-#include <spu_mfcio.h>		
-		
-#define DEBUG_DMA		
-#ifdef DEBUG_DMA
-#define dUASSERT(a,b) if (!(a)) { printf(b);}
-#define uintsize ppu_address_t
-		
-#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-															dUASSERT(size < 16384, "size too big: "); \
-															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-															} \
-															mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-														dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-														dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-														dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-														dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-    													dUASSERT(size < 16384, "size too big: "); \
-														dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    													printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-														} \
-														mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-        													dUASSERT(size < 16384, "size too big: "); \
-															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-        													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    														printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
-															} \
-															mfc_put(ls, ea, size, tag, tid, rid)
-#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
-																dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
-																dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
-																dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
-    															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
-    															dUASSERT(size < 16384, "size too big: "); \
-    															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
-    	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
-    															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
-																} \
-																mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
-
-#else
-#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
-#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
-#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
-#endif // DEBUG_DMA
-
-		
-		
-		
-		
-		
-		
-		
-#endif // USE_LIBSPE2
-#else // !__SPU__
-//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
-//Potential to add networked simulation using this interface
-
-#define DMA_TAG(a) (a)
-#define DMA_MASK(a) (a)
-
-		/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-		/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
-		void	cellDmaWaitTagStatusAll(int ignore);
-
-
-#endif //__CELLOS_LV2__
-
-///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
-int	stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
-
-
-void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
-
-
-#endif //FAKE_DMA_H
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef FAKE_DMA_H
+#define FAKE_DMA_H
+
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+
+#ifdef __SPU__
+
+#ifndef USE_LIBSPE2
+
+#include <cell/dma.h>
+#include <stdint.h>
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+
+#else // !USE_LIBSPE2
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+		
+#include <spu_mfcio.h>		
+		
+#define DEBUG_DMA		
+#ifdef DEBUG_DMA
+#define dUASSERT(a,b) if (!(a)) { printf(b);}
+#define uintsize ppu_address_t
+		
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+															dUASSERT(size < 16384, "size too big: "); \
+															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+															} \
+															mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+														dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+														dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+														dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+														dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+    													dUASSERT(size < 16384, "size too big: "); \
+														dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    													printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+														} \
+														mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+        													dUASSERT(size < 16384, "size too big: "); \
+															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+        													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    														printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
+															} \
+															mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+																dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+																dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+																dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+    															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+    															dUASSERT(size < 16384, "size too big: "); \
+    															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+    	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+																} \
+																mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+
+#else
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+#endif // DEBUG_DMA
+
+		
+		
+		
+		
+		
+		
+		
+#endif // USE_LIBSPE2
+#else // !__SPU__
+//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
+//Potential to add networked simulation using this interface
+
+#define DMA_TAG(a) (a)
+#define DMA_MASK(a) (a)
+
+		/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		void	cellDmaWaitTagStatusAll(int ignore);
+
+
+#endif //__CELLOS_LV2__
+
+///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
+int	stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
+
+
+void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+
+
+#endif //FAKE_DMA_H
--- a/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
+++ b/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
--- a/src/BulletMultiThreaded/btGpu3DGridBroadphase.h
+++ b/src/BulletMultiThreaded/btGpu3DGridBroadphase.h
@@ -1,138 +1,138 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASE_H
-#define BTGPU3DGRIDBROADPHASE_H
-
-//----------------------------------------------------------------------------------------
-
-#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
-
-#include "btGpu3DGridBroadphaseSharedTypes.h"
-
-//----------------------------------------------------------------------------------------
-
-///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
-
-class btGpu3DGridBroadphase : public btSimpleBroadphase
-{
-protected:
-	bool			m_bInitialized;
-    unsigned int	m_numBodies;
-    unsigned int	m_numCells;
-	unsigned int	m_maxPairsPerBody;
-	btScalar		m_cellFactorAABB;
-    unsigned int	m_maxBodiesPerCell;
-	bt3DGridBroadphaseParams m_params;
-	btScalar		m_maxRadius;
-	// CPU data
-    unsigned int*	m_hBodiesHash;
-    unsigned int*	m_hCellStart;
-	unsigned int*	m_hPairBuffStartCurr;
-	bt3DGrid3F1U*		m_hAABB;
-	unsigned int*	m_hPairBuff;
-	unsigned int*	m_hPairScan;
-	unsigned int*	m_hPairOut;
-// large proxies
-	int		m_numLargeHandles;						
-	int		m_maxLargeHandles;						
-	int		m_LastLargeHandleIndex;							
-	btSimpleBroadphaseProxy* m_pLargeHandles;
-	void* m_pLargeHandlesRawPtr;
-	int		m_firstFreeLargeHandle;
-	int allocLargeHandle()
-	{
-		btAssert(m_numLargeHandles < m_maxLargeHandles);
-		int freeLargeHandle = m_firstFreeLargeHandle;
-		m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
-		m_numLargeHandles++;
-		if(freeLargeHandle > m_LastLargeHandleIndex)
-		{
-			m_LastLargeHandleIndex = freeLargeHandle;
-		}
-		return freeLargeHandle;
-	}
-	void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
-	{
-		int handle = int(proxy - m_pLargeHandles);
-		btAssert((handle >= 0) && (handle < m_maxHandles));
-		if(handle == m_LastLargeHandleIndex)
-		{
-			m_LastLargeHandleIndex--;
-		}
-		proxy->SetNextFree(m_firstFreeLargeHandle);
-		m_firstFreeLargeHandle = handle;
-		proxy->m_clientObject = 0;
-		m_numLargeHandles--;
-	}
-	bool isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax);
-	bool isLargeProxy(btBroadphaseProxy* proxy);
-// debug
-	unsigned int	m_numPairsAdded;
-	unsigned int	m_numPairsRemoved;
-	unsigned int	m_numOverflows;
-// 
-public:
-	btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-					   int gridSizeX, int gridSizeY, int gridSizeZ, 
-					   int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-					   int maxBodiesPerCell = 8,
-					   btScalar cellFactorAABB = btScalar(1.0f));
-	btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
-						const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-						int gridSizeX, int gridSizeY, int gridSizeZ, 
-						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-						int maxBodiesPerCell = 8,
-						btScalar cellFactorAABB = btScalar(1.0f));
-	virtual ~btGpu3DGridBroadphase();
-	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
-
-	virtual btBroadphaseProxy*	createProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
-	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
-	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
-	virtual void	resetPool(btDispatcher* dispatcher);
-
-protected:
-	void _initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
-						int gridSizeX, int gridSizeY, int gridSizeZ, 
-						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
-						int maxBodiesPerCell = 8,
-						btScalar cellFactorAABB = btScalar(1.0f));
-	void _finalize();
-	void addPairsToCache(btDispatcher* dispatcher);
-	void addLarge2LargePairsToCache(btDispatcher* dispatcher);
-
-// overrides for CPU version
-	virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
-	virtual void prepareAABB();
-	virtual void calcHashAABB();
-	virtual void sortHash();	
-	virtual void findCellStart();
-	virtual void findOverlappingPairs();
-	virtual void findPairsLarge();
-	virtual void computePairCacheChanges();
-	virtual void scanOverlappingPairBuff();
-	virtual void squeezeOverlappingPairBuff();
-};
-
-//----------------------------------------------------------------------------------------
-
-#endif //BTGPU3DGRIDBROADPHASE_H
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASE_H
+#define BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
+
+class btGpu3DGridBroadphase : public btSimpleBroadphase
+{
+protected:
+	bool			m_bInitialized;
+    unsigned int	m_numBodies;
+    unsigned int	m_numCells;
+	unsigned int	m_maxPairsPerBody;
+	btScalar		m_cellFactorAABB;
+    unsigned int	m_maxBodiesPerCell;
+	bt3DGridBroadphaseParams m_params;
+	btScalar		m_maxRadius;
+	// CPU data
+    unsigned int*	m_hBodiesHash;
+    unsigned int*	m_hCellStart;
+	unsigned int*	m_hPairBuffStartCurr;
+	bt3DGrid3F1U*		m_hAABB;
+	unsigned int*	m_hPairBuff;
+	unsigned int*	m_hPairScan;
+	unsigned int*	m_hPairOut;
+// large proxies
+	int		m_numLargeHandles;						
+	int		m_maxLargeHandles;						
+	int		m_LastLargeHandleIndex;							
+	btSimpleBroadphaseProxy* m_pLargeHandles;
+	void* m_pLargeHandlesRawPtr;
+	int		m_firstFreeLargeHandle;
+	int allocLargeHandle()
+	{
+		btAssert(m_numLargeHandles < m_maxLargeHandles);
+		int freeLargeHandle = m_firstFreeLargeHandle;
+		m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
+		m_numLargeHandles++;
+		if(freeLargeHandle > m_LastLargeHandleIndex)
+		{
+			m_LastLargeHandleIndex = freeLargeHandle;
+		}
+		return freeLargeHandle;
+	}
+	void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
+	{
+		int handle = int(proxy - m_pLargeHandles);
+		btAssert((handle >= 0) && (handle < m_maxHandles));
+		if(handle == m_LastLargeHandleIndex)
+		{
+			m_LastLargeHandleIndex--;
+		}
+		proxy->SetNextFree(m_firstFreeLargeHandle);
+		m_firstFreeLargeHandle = handle;
+		proxy->m_clientObject = 0;
+		m_numLargeHandles--;
+	}
+	bool isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax);
+	bool isLargeProxy(btBroadphaseProxy* proxy);
+// debug
+	unsigned int	m_numPairsAdded;
+	unsigned int	m_numPairsRemoved;
+	unsigned int	m_numOverflows;
+// 
+public:
+	btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+					   int gridSizeX, int gridSizeY, int gridSizeZ, 
+					   int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+					   int maxBodiesPerCell = 8,
+					   btScalar cellFactorAABB = btScalar(1.0f));
+	btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
+						const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+						int gridSizeX, int gridSizeY, int gridSizeZ, 
+						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+						int maxBodiesPerCell = 8,
+						btScalar cellFactorAABB = btScalar(1.0f));
+	virtual ~btGpu3DGridBroadphase();
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
+
+	virtual btBroadphaseProxy*	createProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
+	virtual void	resetPool(btDispatcher* dispatcher);
+
+protected:
+	void _initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+						int gridSizeX, int gridSizeY, int gridSizeZ, 
+						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+						int maxBodiesPerCell = 8,
+						btScalar cellFactorAABB = btScalar(1.0f));
+	void _finalize();
+	void addPairsToCache(btDispatcher* dispatcher);
+	void addLarge2LargePairsToCache(btDispatcher* dispatcher);
+
+// overrides for CPU version
+	virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
+	virtual void prepareAABB();
+	virtual void calcHashAABB();
+	virtual void sortHash();	
+	virtual void findCellStart();
+	virtual void findOverlappingPairs();
+	virtual void findPairsLarge();
+	virtual void computePairCacheChanges();
+	virtual void scanOverlappingPairBuff();
+	virtual void squeezeOverlappingPairBuff();
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif //BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
--- a/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
+++ b/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
@@ -1,430 +1,430 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//               K E R N E L    F U N C T I O N S 
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-
-// calculate position in uniform grid
-BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
-{
-    int3 gridPos;
-    gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
-    gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
-    gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
-    return gridPos;
-} // bt3DGrid_calcGridPos()
-
-//----------------------------------------------------------------------------------------
-
-// calculate address in grid from position (clamping to edges)
-BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
-{
-    gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
-    gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
-    gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
-    return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
-} // bt3DGrid_calcGridHash()
-
-//----------------------------------------------------------------------------------------
-
-// calculate grid hash value for each body using its AABB
-BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index*2];
-	bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
-	float4 pos;
-	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
-	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
-	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
-    // get address in grid
-    int3 gridPos = bt3DGrid_calcGridPos(pos);
-    uint gridHash = bt3DGrid_calcGridHash(gridPos);
-    // store grid hash and body index
-    pHash[index] = BT_GPU_make_uint2(gridHash, index);
-} // calcHashAABBD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	// Load hash data into shared memory so that we can look 
-	// at neighboring body's hash value without loading
-	// two hash values per thread
-	BT_GPU___shared__ uint sharedHash[257];
-	sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
-	if((index > 0) && (BT_GPU_threadIdx.x == 0))
-	{
-		// first thread in block must load neighbor body hash
-		volatile uint2 prevData = pHash[index-1];
-		sharedHash[0] = prevData.x;
-	}
-	BT_GPU___syncthreads();
-	if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
-	{
-		cellStart[sortedData.x] = index;
-	}
-} // findCellStartD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
-{
-	return	(min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && 
-			(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && 
-			(min0.fz <= max1.fz)&& (min1.fz <= max0.fz); 
-} // cudaTestAABBOverlap()
- 
-//----------------------------------------------------------------------------------------
-
-BT_GPU___device__ void findPairsInCell(	int3	gridPos,
-										uint    index,
-										uint2*  pHash,
-										uint*   pCellStart,
-										bt3DGrid3F1U* pAABB, 
-										uint*   pPairBuff,
-										uint2*	pPairBuffStartCurr,
-										uint	numBodies)
-{
-    if (	(gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
-		||	(gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
-		||  (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) 
-    {
-		return;
-	}
-    uint gridHash = bt3DGrid_calcGridHash(gridPos);
-    // get start of bucket for this cell
-    uint bucketStart = pCellStart[gridHash];
-    if (bucketStart == 0xffffffff)
-	{
-        return;   // cell empty
-	}
-	// iterate over bodies in this cell
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-    bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); 
-	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	uint handleIndex =  min0.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
-	uint curr_max = start_curr_next.x - start - 1;
-	uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
-	bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
-	for(uint index2 = bucketStart; index2 < bucketEnd; index2++) 
-	{
-        uint2 cellData = pHash[index2];
-        if (cellData.x != gridHash)
-        {
-			break;   // no longer in same bucket
-		}
-		uint unsorted_indx2 = cellData.y;
-        if (unsorted_indx2 < unsorted_indx) // check not colliding with self
-        {   
-			bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
-			bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
-			if(cudaTestAABBOverlap(min0, max0, min1, max1))
-			{
-				uint handleIndex2 = min1.uw;
-				uint k;
-				for(k = 0; k < curr; k++)
-				{
-					uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
-					if(old_pair == handleIndex2)
-					{
-						pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
-						break;
-					}
-				}
-				if(k == curr)
-				{
-					if(curr >= curr_max) 
-					{ // not a good solution, but let's avoid crash
-						break;
-					}
-					pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
-					curr++;
-				}
-			}
-		}
-	}
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
-    return;
-} // findPairsInCell()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findOverlappingPairsD(	bt3DGrid3F1U*	pAABB, uint2* pHash, uint* pCellStart, 
-												uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-	bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
-	bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	float4 pos;
-	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
-	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
-	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
-    // get address in grid
-    int3 gridPos = bt3DGrid_calcGridPos(pos);
-    // examine only neighbouring cells
-    for(int z=-1; z<=1; z++) {
-        for(int y=-1; y<=1; y++) {
-            for(int x=-1; x<=1; x++) {
-                findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
-            }
-        }
-    }
-} // findOverlappingPairsD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void findPairsLargeD(	bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, 
-										uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-    uint2 sortedData = pHash[index];
-	uint unsorted_indx = sortedData.y;
-	bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
-	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
-	uint handleIndex =  min0.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
-	uint curr_max = start_curr_next.x - start - 1;
-    for(uint i = 0; i < numLarge; i++)
-    {
-		uint indx2 = numBodies + i;
-		bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
-		bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
-		if(cudaTestAABBOverlap(min0, max0, min1, max1))
-		{
-			uint k;
-			uint handleIndex2 =  min1.uw;
-			for(k = 0; k < curr; k++)
-			{
-				uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
-				if(old_pair == handleIndex2)
-				{
-					pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
-					break;
-				}
-			}
-			if(k == curr)
-			{
-				pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
-				if(curr >= curr_max) 
-				{ // not a good solution, but let's avoid crash
-					break;
-				}
-				curr++;
-			}
-		}
-    }
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
-    return;
-} // findPairsLargeD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, 
-												uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index * 2];
-	uint handleIndex = bbMin.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint *pInp = pPairBuff + start;
-	uint num_changes = 0;
-	for(uint k = 0; k < curr; k++, pInp++)
-	{
-		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
-		{
-			num_changes++;
-		}
-	}
-	pPairScan[index+1] = num_changes;
-} // computePairCacheChangesD()
-
-//----------------------------------------------------------------------------------------
-
-BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
-												   uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
-{
-    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
-    if(index >= (int)numBodies)
-	{
-		return;
-	}
-	bt3DGrid3F1U bbMin = pAABB[index * 2];
-	uint handleIndex = bbMin.uw;
-	uint2 start_curr = pPairBuffStartCurr[handleIndex];
-	uint start = start_curr.x;
-	uint curr = start_curr.y;
-	uint* pInp = pPairBuff + start;
-	uint* pOut = pPairOut + pPairScan[index];
-	uint* pOut2 = pInp;
-	uint num = 0; 
-	for(uint k = 0; k < curr; k++, pInp++)
-	{
-		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
-		{
-			*pOut = *pInp;
-			pOut++;
-		}
-		if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
-		{
-			*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
-			pOut2++;
-			num++;
-		}
-	}
-	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
-} // squeezeOverlappingPairBuffD()
-
-
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//               E N D   O F    K E R N E L    F U N C T I O N S 
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies)
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    // execute the kernel
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
-    // check if kernel invocation generated an error
-    BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
-} // calcHashAABB()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-	BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
-	BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
-} // findCellStart()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies))
-{
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
-#endif
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
-#endif
-} // findOverlappingPairs()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
-{
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
-#endif
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
-#if B_CUDA_USE_TEX
-    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
-#endif
-} // findPairsLarge()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
-} // computePairCacheChanges()
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
-{
-    int numThreads, numBlocks;
-    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
-    BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
-    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
-} // btCuda_squeezeOverlappingPairBuff()
-
-//------------------------------------------------------------------------------------------------
-
-} // extern "C"
-
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
-//------------------------------------------------------------------------------------------------
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//               K E R N E L    F U N C T I O N S 
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+// calculate position in uniform grid
+BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
+{
+    int3 gridPos;
+    gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
+    gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
+    gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
+    return gridPos;
+} // bt3DGrid_calcGridPos()
+
+//----------------------------------------------------------------------------------------
+
+// calculate address in grid from position (clamping to edges)
+BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
+{
+    gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
+    gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
+    gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
+    return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
+} // bt3DGrid_calcGridHash()
+
+//----------------------------------------------------------------------------------------
+
+// calculate grid hash value for each body using its AABB
+BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index*2];
+	bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
+	float4 pos;
+	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+    // get address in grid
+    int3 gridPos = bt3DGrid_calcGridPos(pos);
+    uint gridHash = bt3DGrid_calcGridHash(gridPos);
+    // store grid hash and body index
+    pHash[index] = BT_GPU_make_uint2(gridHash, index);
+} // calcHashAABBD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	// Load hash data into shared memory so that we can look 
+	// at neighboring body's hash value without loading
+	// two hash values per thread
+	BT_GPU___shared__ uint sharedHash[257];
+	sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
+	if((index > 0) && (BT_GPU_threadIdx.x == 0))
+	{
+		// first thread in block must load neighbor body hash
+		volatile uint2 prevData = pHash[index-1];
+		sharedHash[0] = prevData.x;
+	}
+	BT_GPU___syncthreads();
+	if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
+	{
+		cellStart[sortedData.x] = index;
+	}
+} // findCellStartD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
+{
+	return	(min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && 
+			(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && 
+			(min0.fz <= max1.fz)&& (min1.fz <= max0.fz); 
+} // cudaTestAABBOverlap()
+ 
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ void findPairsInCell(	int3	gridPos,
+										uint    index,
+										uint2*  pHash,
+										uint*   pCellStart,
+										bt3DGrid3F1U* pAABB, 
+										uint*   pPairBuff,
+										uint2*	pPairBuffStartCurr,
+										uint	numBodies)
+{
+    if (	(gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
+		||	(gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
+		||  (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) 
+    {
+		return;
+	}
+    uint gridHash = bt3DGrid_calcGridHash(gridPos);
+    // get start of bucket for this cell
+    uint bucketStart = pCellStart[gridHash];
+    if (bucketStart == 0xffffffff)
+	{
+        return;   // cell empty
+	}
+	// iterate over bodies in this cell
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+    bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); 
+	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	uint handleIndex =  min0.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+	uint curr_max = start_curr_next.x - start - 1;
+	uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
+	bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
+	for(uint index2 = bucketStart; index2 < bucketEnd; index2++) 
+	{
+        uint2 cellData = pHash[index2];
+        if (cellData.x != gridHash)
+        {
+			break;   // no longer in same bucket
+		}
+		uint unsorted_indx2 = cellData.y;
+        if (unsorted_indx2 < unsorted_indx) // check not colliding with self
+        {   
+			bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
+			bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
+			if(cudaTestAABBOverlap(min0, max0, min1, max1))
+			{
+				uint handleIndex2 = min1.uw;
+				uint k;
+				for(k = 0; k < curr; k++)
+				{
+					uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+					if(old_pair == handleIndex2)
+					{
+						pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+						break;
+					}
+				}
+				if(k == curr)
+				{
+					if(curr >= curr_max) 
+					{ // not a good solution, but let's avoid crash
+						break;
+					}
+					pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+					curr++;
+				}
+			}
+		}
+	}
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+    return;
+} // findPairsInCell()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findOverlappingPairsD(	bt3DGrid3F1U*	pAABB, uint2* pHash, uint* pCellStart, 
+												uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+	bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+	bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	float4 pos;
+	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+    // get address in grid
+    int3 gridPos = bt3DGrid_calcGridPos(pos);
+    // examine only neighbouring cells
+    for(int z=-1; z<=1; z++) {
+        for(int y=-1; y<=1; y++) {
+            for(int x=-1; x<=1; x++) {
+                findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
+            }
+        }
+    }
+} // findOverlappingPairsD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findPairsLargeD(	bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, 
+										uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+	bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	uint handleIndex =  min0.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+	uint curr_max = start_curr_next.x - start - 1;
+    for(uint i = 0; i < numLarge; i++)
+    {
+		uint indx2 = numBodies + i;
+		bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
+		bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
+		if(cudaTestAABBOverlap(min0, max0, min1, max1))
+		{
+			uint k;
+			uint handleIndex2 =  min1.uw;
+			for(k = 0; k < curr; k++)
+			{
+				uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+				if(old_pair == handleIndex2)
+				{
+					pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+					break;
+				}
+			}
+			if(k == curr)
+			{
+				pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+				if(curr >= curr_max) 
+				{ // not a good solution, but let's avoid crash
+					break;
+				}
+				curr++;
+			}
+		}
+    }
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+    return;
+} // findPairsLargeD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, 
+												uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index * 2];
+	uint handleIndex = bbMin.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint *pInp = pPairBuff + start;
+	uint num_changes = 0;
+	for(uint k = 0; k < curr; k++, pInp++)
+	{
+		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+		{
+			num_changes++;
+		}
+	}
+	pPairScan[index+1] = num_changes;
+} // computePairCacheChangesD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
+												   uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index * 2];
+	uint handleIndex = bbMin.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint* pInp = pPairBuff + start;
+	uint* pOut = pPairOut + pPairScan[index];
+	uint* pOut2 = pInp;
+	uint num = 0; 
+	for(uint k = 0; k < curr; k++, pInp++)
+	{
+		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+		{
+			*pOut = *pInp;
+			pOut++;
+		}
+		if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
+		{
+			*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
+			pOut2++;
+			num++;
+		}
+	}
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
+} // squeezeOverlappingPairBuffD()
+
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//               E N D   O F    K E R N E L    F U N C T I O N S 
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies)
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    // execute the kernel
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
+    // check if kernel invocation generated an error
+    BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
+} // calcHashAABB()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+	BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
+	BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
+} // findCellStart()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies))
+{
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findOverlappingPairs()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
+{
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findPairsLarge()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
+} // computePairCacheChanges()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
+} // btCuda_squeezeOverlappingPairBuff()
+
+//------------------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
--- a/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
+++ b/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
@@ -1,61 +1,61 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared definitions for GPU-based 3D Grid collision detection broadphase
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-
-//----------------------------------------------------------------------------------------
-
-#include "btGpu3DGridBroadphaseSharedTypes.h"
-
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies);
-
-void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
-
-void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies);
-
-void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
-
-void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
-
-void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
-
-
-//----------------------------------------------------------------------------------------
-
-} // extern "C"
-
-//----------------------------------------------------------------------------------------
-
-#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
-
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies);
+
+void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
+
+void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies);
+
+void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
+
+void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
--- a/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
+++ b/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
@@ -1,67 +1,67 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared definitions for GPU-based 3D Grid collision detection broadphase
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-
-//----------------------------------------------------------------------------------------
-
-#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
-#define BT_3DGRID_PAIR_NEW_FLG   (0x20000000)
-#define BT_3DGRID_PAIR_ANY_FLG   (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
-
-//----------------------------------------------------------------------------------------
-
-struct bt3DGridBroadphaseParams 
-{
-	unsigned int	m_gridSizeX;
-	unsigned int	m_gridSizeY;
-	unsigned int	m_gridSizeZ;
-	unsigned int	m_numCells;
-	float			m_worldOriginX;
-	float			m_worldOriginY;
-	float			m_worldOriginZ;
-	float			m_cellSizeX;
-	float			m_cellSizeY;
-	float			m_cellSizeZ;
-	unsigned int	m_numBodies;
-	unsigned int	m_maxBodiesPerCell;
-};
-
-//----------------------------------------------------------------------------------------
-
-struct bt3DGrid3F1U
-{
-	float			fx;
-	float			fy;
-	float			fz;
-	unsigned int	uw;
-};
-
-//----------------------------------------------------------------------------------------
-
-#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
-
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
+//----------------------------------------------------------------------------------------
+
+#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
+#define BT_3DGRID_PAIR_NEW_FLG   (0x20000000)
+#define BT_3DGRID_PAIR_ANY_FLG   (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGridBroadphaseParams 
+{
+	unsigned int	m_gridSizeX;
+	unsigned int	m_gridSizeY;
+	unsigned int	m_gridSizeZ;
+	unsigned int	m_numCells;
+	float			m_worldOriginX;
+	float			m_worldOriginY;
+	float			m_worldOriginZ;
+	float			m_cellSizeX;
+	float			m_cellSizeY;
+	float			m_cellSizeZ;
+	unsigned int	m_numBodies;
+	unsigned int	m_maxBodiesPerCell;
+};
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGrid3F1U
+{
+	float			fx;
+	float			fy;
+	float			fz;
+	unsigned int	uw;
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
--- a/src/BulletMultiThreaded/btGpuUtilsSharedCode.h
+++ b/src/BulletMultiThreaded/btGpuUtilsSharedCode.h
@@ -1,55 +1,55 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//----------------------------------------------------------------------------------------
-
-// Shared code for GPU-based utilities
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  will be compiled by both CPU and CUDA compilers
-//	file with definitions of BT_GPU_xxx should be included first
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-//----------------------------------------------------------------------------------------
-
-#include "btGpuUtilsSharedDefs.h"
-
-//----------------------------------------------------------------------------------------
-
-extern "C"
-{
-
-//----------------------------------------------------------------------------------------
-
-//Round a / b to nearest higher integer value
-int BT_GPU_PREF(iDivUp)(int a, int b)
-{
-    return (a % b != 0) ? (a / b + 1) : (a / b);
-} // iDivUp()
-
-//----------------------------------------------------------------------------------------
-
-// compute grid and thread block size for a given number of elements
-void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
-{
-    numThreads = BT_GPU_min(blockSize, n);
-    numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
-} // computeGridSize()
-
-//----------------------------------------------------------------------------------------
-
-} // extern "C"
-
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared code for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  will be compiled by both CPU and CUDA compilers
+//	file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpuUtilsSharedDefs.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b)
+{
+    return (a % b != 0) ? (a / b + 1) : (a / b);
+} // iDivUp()
+
+//----------------------------------------------------------------------------------------
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
+{
+    numThreads = BT_GPU_min(blockSize, n);
+    numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
+} // computeGridSize()
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
--- a/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h
+++ b/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h
@@ -1,52 +1,52 @@
-/*
-Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-// Shared definitions for GPU-based utilities
-
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-//  Keep this file free from Bullet headers
-//  it is included into both CUDA and CPU code
-//	file with definitions of BT_GPU_xxx should be included first
-//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-#ifndef BTGPUUTILSDHAREDDEFS_H
-#define BTGPUUTILSDHAREDDEFS_H
-
-
-extern "C"
-{
-
-
-//Round a / b to nearest higher integer value
-int BT_GPU_PREF(iDivUp)(int a, int b);
-
-// compute grid and thread block size for a given number of elements
-void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
-
-void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
-void BT_GPU_PREF(freeArray)(void* devPtr);
-void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
-void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
-void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
-void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
-void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
-
-
-} // extern "C"
-
-
-#endif // BTGPUUTILSDHAREDDEFS_H
-
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+// Shared definitions for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//	file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+#ifndef BTGPUUTILSDHAREDDEFS_H
+#define BTGPUUTILSDHAREDDEFS_H
+
+
+extern "C"
+{
+
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b);
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
+
+void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
+void BT_GPU_PREF(freeArray)(void* devPtr);
+void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
+void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
+void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
+void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
+void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
+
+
+} // extern "C"
+
+
+#endif // BTGPUUTILSDHAREDDEFS_H
+
--- a/src/BulletMultiThreaded/btParallelConstraintSolver.cpp
+++ b/src/BulletMultiThreaded/btParallelConstraintSolver.cpp
@@ -1,74 +1,74 @@
-/*
-   Copyright (C) 2010 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#include "btParallelConstraintSolver.h"
-#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
-
-btParallelConstraintSolver::btParallelConstraintSolver()
-{
-
-	//initialize MiniCL here
-
-}
-	
-btParallelConstraintSolver::~btParallelConstraintSolver()
-{
-	//exit MiniCL
-
-}
-
-	
-btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
-{
-	{
-			int i;
-			btPersistentManifold* manifold = 0;
-//			btCollisionObject* colObj0=0,*colObj1=0;
-
-
-			for (i=0;i<numManifolds;i++)
-			{
-				manifold = manifoldPtr[i];
-				convertContact(manifold,infoGlobal);
-			}
-		
-	}
-
-	btContactSolverInfo info = infoGlobal;
-
-
-
-	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
-	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
-
-	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
-	m_orderTmpConstraintPool.resize(numConstraintPool);
-	m_orderFrictionConstraintPool.resize(numFrictionPool);
-	{
-		int i;
-		for (i=0;i<numConstraintPool;i++)
-		{
-			m_orderTmpConstraintPool[i] = i;
-		}
-		for (i=0;i<numFrictionPool;i++)
-		{
-			m_orderFrictionConstraintPool[i] = i;
-		}
-	}
-
-	return 0.f;
-}
-
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#include "btParallelConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+btParallelConstraintSolver::btParallelConstraintSolver()
+{
+
+	//initialize MiniCL here
+
+}
+	
+btParallelConstraintSolver::~btParallelConstraintSolver()
+{
+	//exit MiniCL
+
+}
+
+	
+btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	{
+			int i;
+			btPersistentManifold* manifold = 0;
+//			btCollisionObject* colObj0=0,*colObj1=0;
+
+
+			for (i=0;i<numManifolds;i++)
+			{
+				manifold = manifoldPtr[i];
+				convertContact(manifold,infoGlobal);
+			}
+		
+	}
+
+	btContactSolverInfo info = infoGlobal;
+
+
+
+	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
+	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
+
+	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
+	m_orderTmpConstraintPool.resize(numConstraintPool);
+	m_orderFrictionConstraintPool.resize(numFrictionPool);
+	{
+		int i;
+		for (i=0;i<numConstraintPool;i++)
+		{
+			m_orderTmpConstraintPool[i] = i;
+		}
+		for (i=0;i<numFrictionPool;i++)
+		{
+			m_orderFrictionConstraintPool[i] = i;
+		}
+	}
+
+	return 0.f;
+}
+
--- a/src/BulletMultiThreaded/btParallelConstraintSolver.h
+++ b/src/BulletMultiThreaded/btParallelConstraintSolver.h
@@ -1,42 +1,42 @@
-/*
-   Copyright (C) 2010 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
-#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
-
-#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
-									      
-class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
-{
-protected:
-
-public:
-
-	btParallelConstraintSolver();
-	
-	virtual ~btParallelConstraintSolver();
-
-	//virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
-	
-	btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
-
-
-
-};
-
-
-
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
+#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+									      
+class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
+{
+protected:
+
+public:
+
+	btParallelConstraintSolver();
+	
+	virtual ~btParallelConstraintSolver();
+
+	//virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
+	
+	btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+
+
+};
+
+
+
 #endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H
--- a/src/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
+++ b/src/BulletMultiThreaded/vectormath/scalar/cpp/boolInVec.h
@@ -1,225 +1,225 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <math.h>
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-private:
-    unsigned int mData;
-
-public:
-    // Default constructor; does no initialization
-    //
-    inline boolInVec( ) { };
-
-    // Construct from a value converted from float
-    //
-    inline boolInVec(floatInVec vec);
-
-    // Explicit cast from bool
-    //
-    explicit inline boolInVec(bool scalar);
-
-    // Explicit cast to bool
-    //
-    inline bool getAsBool() const;
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-    // Implicit cast to bool
-    //
-    inline operator bool() const;
-#endif
-
-    // Boolean negation operator
-    //
-    inline const boolInVec operator ! () const;
-
-    // Assignment operator
-    //
-    inline boolInVec& operator = (boolInVec vec);
-
-    // Boolean and assignment operator
-    //
-    inline boolInVec& operator &= (boolInVec vec);
-
-    // Boolean exclusive or assignment operator
-    //
-    inline boolInVec& operator ^= (boolInVec vec);
-
-    // Boolean or assignment operator
-    //
-    inline boolInVec& operator |= (boolInVec vec);
-
-};
-
-// Equal operator
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-
-// Not equal operator
-//
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-
-// And operator
-//
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-
-// Exclusive or operator
-//
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-
-// Or operator
-//
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// Conditionally select between two values
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-
-} // namespace Vectormath
-
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-    mData = -(int)scalar;
-}
-
-inline
-bool
-boolInVec::getAsBool() const
-{
-    return (mData > 0);
-}
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-inline
-boolInVec::operator bool() const
-{
-    return getAsBool();
-}
-#endif
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(!mData);
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
-}
-
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+private:
+    unsigned int mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline boolInVec( ) { };
+
+    // Construct from a value converted from float
+    //
+    inline boolInVec(floatInVec vec);
+
+    // Explicit cast from bool
+    //
+    explicit inline boolInVec(bool scalar);
+
+    // Explicit cast to bool
+    //
+    inline bool getAsBool() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to bool
+    //
+    inline operator bool() const;
+#endif
+
+    // Boolean negation operator
+    //
+    inline const boolInVec operator ! () const;
+
+    // Assignment operator
+    //
+    inline boolInVec& operator = (boolInVec vec);
+
+    // Boolean and assignment operator
+    //
+    inline boolInVec& operator &= (boolInVec vec);
+
+    // Boolean exclusive or assignment operator
+    //
+    inline boolInVec& operator ^= (boolInVec vec);
+
+    // Boolean or assignment operator
+    //
+    inline boolInVec& operator |= (boolInVec vec);
+
+};
+
+// Equal operator
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+
+// And operator
+//
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+
+// Exclusive or operator
+//
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+
+// Or operator
+//
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// Conditionally select between two values
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = -(int)scalar;
+}
+
+inline
+bool
+boolInVec::getAsBool() const
+{
+    return (mData > 0);
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+boolInVec::operator bool() const
+{
+    return getAsBool();
+}
+#endif
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(!mData);
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // boolInVec_h
--- a/src/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
+++ b/src/BulletMultiThreaded/vectormath/scalar/cpp/floatInVec.h
@@ -1,343 +1,343 @@
-/*
-   Copyright (C) 2009 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-
-*/
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-// A class representing a scalar float value contained in a vector register
-// This class does not support fastmath
-class floatInVec
-{
-private:
-    float mData;
-
-public:
-    // Default constructor; does no initialization
-    //
-    inline floatInVec( ) { };
-
-    // Construct from a value converted from bool
-    //
-    inline floatInVec(boolInVec vec);
-
-    // Explicit cast from float
-    //
-    explicit inline floatInVec(float scalar);
-
-    // Explicit cast to float
-    //
-    inline float getAsFloat() const;
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-    // Implicit cast to float
-    //
-    inline operator float() const;
-#endif
-
-    // Post increment (add 1.0f)
-    //
-    inline const floatInVec operator ++ (int);
-
-    // Post decrement (subtract 1.0f)
-    //
-    inline const floatInVec operator -- (int);
-
-    // Pre increment (add 1.0f)
-    //
-    inline floatInVec& operator ++ ();
-
-    // Pre decrement (subtract 1.0f)
-    //
-    inline floatInVec& operator -- ();
-
-    // Negation operator
-    //
-    inline const floatInVec operator - () const;
-
-    // Assignment operator
-    //
-    inline floatInVec& operator = (floatInVec vec);
-
-    // Multiplication assignment operator
-    //
-    inline floatInVec& operator *= (floatInVec vec);
-
-    // Division assignment operator
-    //
-    inline floatInVec& operator /= (floatInVec vec);
-
-    // Addition assignment operator
-    //
-    inline floatInVec& operator += (floatInVec vec);
-
-    // Subtraction assignment operator
-    //
-    inline floatInVec& operator -= (floatInVec vec);
-
-};
-
-// Multiplication operator
-//
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-
-// Division operator
-//
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-
-// Addition operator
-//
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-
-// Subtraction operator
-//
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-
-// Less than operator
-//
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-
-// Less than or equal operator
-//
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-
-// Greater than operator
-//
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-
-// Greater than or equal operator
-//
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-
-// Equal operator
-//
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-
-// Not equal operator
-//
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// Conditionally select between two values
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-
-} // namespace Vectormath
-
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = float(vec.getAsBool());
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-    mData = scalar;
-}
-
-inline
-float
-floatInVec::getAsFloat() const
-{
-    return mData;
-}
-
-#ifndef _VECTORMATH_NO_SCALAR_CAST
-inline
-floatInVec::operator float() const
-{
-    return getAsFloat();
-}
-#endif
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    float olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    float olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec(1.0f);
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec(1.0f);
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec(-mData);
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(num.getAsFloat() / den.getAsFloat());
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+// A class representing a scalar float value contained in a vector register
+// This class does not support fastmath
+class floatInVec
+{
+private:
+    float mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline floatInVec( ) { };
+
+    // Construct from a value converted from bool
+    //
+    inline floatInVec(boolInVec vec);
+
+    // Explicit cast from float
+    //
+    explicit inline floatInVec(float scalar);
+
+    // Explicit cast to float
+    //
+    inline float getAsFloat() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to float
+    //
+    inline operator float() const;
+#endif
+
+    // Post increment (add 1.0f)
+    //
+    inline const floatInVec operator ++ (int);
+
+    // Post decrement (subtract 1.0f)
+    //
+    inline const floatInVec operator -- (int);
+
+    // Pre increment (add 1.0f)
+    //
+    inline floatInVec& operator ++ ();
+
+    // Pre decrement (subtract 1.0f)
+    //
+    inline floatInVec& operator -- ();
+
+    // Negation operator
+    //
+    inline const floatInVec operator - () const;
+
+    // Assignment operator
+    //
+    inline floatInVec& operator = (floatInVec vec);
+
+    // Multiplication assignment operator
+    //
+    inline floatInVec& operator *= (floatInVec vec);
+
+    // Division assignment operator
+    //
+    inline floatInVec& operator /= (floatInVec vec);
+
+    // Addition assignment operator
+    //
+    inline floatInVec& operator += (floatInVec vec);
+
+    // Subtraction assignment operator
+    //
+    inline floatInVec& operator -= (floatInVec vec);
+
+};
+
+// Multiplication operator
+//
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+
+// Division operator
+//
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+
+// Addition operator
+//
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+
+// Subtraction operator
+//
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+
+// Less than operator
+//
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+
+// Less than or equal operator
+//
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+
+// Greater than operator
+//
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+
+// Greater than or equal operator
+//
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+
+// Equal operator
+//
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// Conditionally select between two values
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = float(vec.getAsBool());
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = scalar;
+}
+
+inline
+float
+floatInVec::getAsFloat() const
+{
+    return mData;
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+floatInVec::operator float() const
+{
+    return getAsFloat();
+}
+#endif
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    float olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    float olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(-mData);
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(num.getAsFloat() / den.getAsFloat());
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h