bt -> b3 rename

add docs
2013-04-15 18:26:09 -07:00
parent 76e74523f6
commit faabffc23d
88 changed files with 720 additions and 6695 deletions
--- a/btgui/OpenGLWindow/renderscene.cpp
+++ b/btgui/OpenGLWindow/renderscene.cpp
@@ -18,7 +18,7 @@ bool keepStaticObjects = false;
 //#include "LinearMath/btQuickprof.h"
 #include "BulletCommon/btQuaternion.h"
 #include "BulletCommon/btMatrix3x3.h"
-//#include "../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"
+//#include "../opencl/gpu_rigidbody_pipeline/b3ConvexUtility.h"
 #include "ShapeData.h"
 ///work-in-progress 
 ///This ReadBulletSample is kept as simple as possible without dependencies to the Bullet SDK.
--- a/build/stringify.bat
+++ b/build/stringify.bat
@@ -11,11 +11,10 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/parallel_primitives/
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sap.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapKernels.h" --stringname="sapCL" stringify
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sapFast.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/sat.cl" --headerfile="../opencl/gpu_sat/kernels/satKernels.h" --stringname="satKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/sat.cl" --headerfile="../opencl/gpu_narrowphase/kernels/satKernels.h" --stringname="satKernelsCL" stringify
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_sat/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_narrowphase/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/primitiveContacts.cl" --headerfile="../opencl/gpu_sat/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/primitiveContacts.cl" --headerfile="../opencl/gpu_narrowphase/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
-
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/bvhTraversal.cl" --headerfile="../opencl/gpu_narrowphase/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/bvhTraversal.cl" --headerfile="../opencl/gpu_sat/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/integrateKernel.cl" --headerfile="../opencl/gpu_rigidbody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify
--- a/demo/donttouch/OpenGL3CoreRenderer.cpp
+++ b/demo/donttouch/OpenGL3CoreRenderer.cpp
@@ -195,7 +195,7 @@ GraphicsShape* createGraphicsShapeFromConcaveMesh(const btBvhTriangleMeshShape*
 	btAlignedObjectArray<GraphicsVertex>* vertices = new btAlignedObjectArray<GraphicsVertex>;
 	btAlignedObjectArray<int>* indicesPtr = new btAlignedObjectArray<int>;
-	const btStridingMeshInterface* meshInterface = trimesh->getMeshInterface();
+	const b3StridingMeshInterface* meshInterface = trimesh->getMeshInterface();
 	btVector3 trimeshScaling(1,1,1);
 	for (int partId=0;partId<meshInterface->getNumSubParts();partId++)
--- a/demo/donttouch/btGpuDynamicsWorld.cpp
+++ b/demo/donttouch/btGpuDynamicsWorld.cpp
@@ -127,7 +127,7 @@ int btGpuDynamicsWorld::findOrRegisterCollisionShape(const btCollisionShape* col
 				m_uniqueShapes.push_back(colShape);
 				btBvhTriangleMeshShape* trimesh = (btBvhTriangleMeshShape*) colShape;
-				btStridingMeshInterface* meshInterface = trimesh->getMeshInterface();
+				b3StridingMeshInterface* meshInterface = trimesh->getMeshInterface();
 				btAlignedObjectArray<btVector3> vertices;
 				btAlignedObjectArray<int> indices;
--- a/demo/gpudemo/GpuDemo.cpp
+++ b/demo/gpudemo/GpuDemo.cpp
@@ -1,7 +1,7 @@
 #include "GpuDemo.h"
 #include "GpuDemoInternalData.h"
 #include "BulletCommon/btScalar.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/ShapeData.h"
 #include "OpenGLWindow/GLInstancingRenderer.h"
@@ -50,27 +50,27 @@ void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 	//	if (useInterop)
 	//	{
-	//		m_data->m_clContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
+	//		m_data->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
 	//	} else
 	{
-		m_clData->m_clContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId);
+		m_clData->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId);
-		btOpenCLUtils::printPlatformInfo(platformId);
+		b3OpenCLUtils::printPlatformInfo(platformId);
 	}
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(m_clData->m_clContext);
+	int numDev = b3OpenCLUtils::getNumDevices(m_clData->m_clContext);
 	if (numDev>0)
 	{
-		m_clData->m_clDevice= btOpenCLUtils::getDevice(m_clData->m_clContext,0);
+		m_clData->m_clDevice= b3OpenCLUtils::getDevice(m_clData->m_clContext,0);
 		m_clData->m_clQueue = clCreateCommandQueue(m_clData->m_clContext, m_clData->m_clDevice, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(m_clData->m_clDevice);
+        b3OpenCLUtils::printDeviceInfo(m_clData->m_clDevice);
 		btOpenCLDeviceInfo info;
-		btOpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info);
+		b3OpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info);
 		m_clData->m_clDeviceName = info.m_deviceName;
 		m_clData->m_clInitialized = true;
--- a/demo/gpudemo/GpuDemoInternalData.h
+++ b/demo/gpudemo/GpuDemoInternalData.h
@@ -1,7 +1,7 @@
 #ifndef GPU_DEMO_INTERNAL_DATA_H
 #define GPU_DEMO_INTERNAL_DATA_H
-#include "basic_initialize/btOpenCLInclude.h"
+#include "basic_initialize/b3OpenCLInclude.h"
 struct GpuDemoInternalData
 {
--- a/demo/gpudemo/ParticleDemo.cpp
+++ b/demo/gpudemo/ParticleDemo.cpp
@@ -2,7 +2,7 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "OpenGLWindow/ShapeData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #define MSTRINGIFY(A) #A
 static char* particleKernelsString = 
@@ -15,7 +15,7 @@ static char* particleKernelsString =
 #include "parallel_primitives/host/btLauncherCL.h"
 //#include "../../opencl/primitives/AdlPrimitives/Math/Math.h"
 //#include "../../opencl/broadphase_benchmark/btGridBroadphaseCL.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "GpuDemoInternalData.h"
@@ -92,7 +92,7 @@ struct ParticleInternalData
 	cl_kernel m_collideParticlesKernel;
-	btGpuSapBroadphase*	m_broadphaseGPU;
+	b3GpuSapBroadphase*	m_broadphaseGPU;
 	cl_mem		m_clPositionBuffer;
@@ -168,7 +168,7 @@ void ParticleDemo::setupScene(const ConstructionInfo& ci)
 	int maxPairsSmallProxy = 32;
 	float radius = 3.f*m_data->m_simParamCPU[0].m_particleRad;
-	m_data->m_broadphaseGPU = new btGpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
+	m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
 	/*m_data->m_broadphaseGPU = new btGridBroadphaseCl(overlappingPairCache,btVector3(radius,radius,radius), 128, 128, 128,
 		maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
@@ -188,16 +188,16 @@ void ParticleDemo::setupScene(const ConstructionInfo& ci)
 	cl_int pErrNum;
-	cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH);
+	cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH);
-	m_data->m_updatePositionsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog);
+	m_data->m_updatePositionsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
-	m_data->m_updatePositionsKernel2 = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog);
+	m_data->m_updatePositionsKernel2 = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
-	m_data->m_updateAabbsKernel= btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog);
+	m_data->m_updateAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
-	m_data->m_collideParticlesKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog);
+	m_data->m_collideParticlesKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
 	m_instancingRenderer = ci.m_instancingRenderer;
--- a/demo/gpudemo/broadphase/PairBench.cpp
+++ b/demo/gpudemo/broadphase/PairBench.cpp
@@ -4,9 +4,9 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
@@ -95,7 +95,7 @@ __kernel void updateAabbSimple( __global float4* posOrnColors, const int numNode
 struct	PairBenchInternalData
 {
-	btGpuSapBroadphase*	m_broadphaseGPU;
+	b3GpuSapBroadphase*	m_broadphaseGPU;
 	cl_kernel	m_moveObjectsKernel;
 	cl_kernel	m_sineWaveKernel;
@@ -152,13 +152,13 @@ void	PairBench::initPhysics(const ConstructionInfo& ci)
 	initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex);
 	if (m_clData->m_clContext)
 	{
-		m_data->m_broadphaseGPU = new btGpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
+		m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 		cl_program pairBenchProg=0;
 		int errNum=0;
-		m_data->m_moveObjectsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"moveObjectsKernel",&errNum,pairBenchProg);
+		m_data->m_moveObjectsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"moveObjectsKernel",&errNum,pairBenchProg);
-		m_data->m_sineWaveKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"sineWaveKernel",&errNum,pairBenchProg);
+		m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"sineWaveKernel",&errNum,pairBenchProg);
-		m_data->m_colorPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"colorPairsKernel",&errNum,pairBenchProg);
+		m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"colorPairsKernel",&errNum,pairBenchProg);
-		m_data->m_updateAabbSimple = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"updateAabbSimple",&errNum,pairBenchProg);
+		m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"updateAabbSimple",&errNum,pairBenchProg);
 	}
--- a/demo/gpudemo/premake4.lua
+++ b/demo/gpudemo/premake4.lua
@@ -61,9 +61,9 @@ function createProject(vendor)
 			"../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.cpp",
 			"../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.h",
 			"../../btgui/FontFiles/OpenSans.cpp",
-			"../../opencl/basic_initialize/btOpenCLUtils.cpp",
+			"../../opencl/basic_initialize/b3OpenCLUtils.cpp",
-			"../../opencl/basic_initialize/btOpenCLUtils.h",
+			"../../opencl/basic_initialize/b3OpenCLUtils.h",
-			"../../opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp",
+			"../../opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp",
 			"../../opencl/gpu_narrowphase/host/**.cpp",
 			"../../opencl/gpu_narrowphase/host/**.h",
 			"../../opencl/parallel_primitives/host/btBoundSearchCL.cpp",
--- a/demo/gpudemo/rigidbody/ConcaveScene.cpp
+++ b/demo/gpudemo/rigidbody/ConcaveScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include"../../ObjLoader/objLoader.h"
 #include "BulletCommon/btTransform.h"
--- a/demo/gpudemo/rigidbody/GpuCompoundScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuCompoundScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "BulletCommon/btTransform.h"
--- a/demo/gpudemo/rigidbody/GpuConvexScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuConvexScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "../gwenUserInterface.h"
--- a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp
+++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp
@@ -4,15 +4,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 static btKeyboardCallback oldCallback = 0;
@@ -104,15 +104,15 @@ void	GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
 		int errNum=0;
 		cl_program rbProg=0;
-		m_data->m_copyTransformsToVBOKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_rigidBodyKernelString,"copyTransformsToVBOKernel",&errNum,rbProg);
+		m_data->m_copyTransformsToVBOKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_rigidBodyKernelString,"copyTransformsToVBOKernel",&errNum,rbProg);
-		btConfig config;
+		b3Config config;
-		btGpuNarrowPhase* np = new btGpuNarrowPhase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue,config);
+		b3GpuNarrowPhase* np = new b3GpuNarrowPhase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue,config);
-		btGpuSapBroadphase* bp = new btGpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
+		b3GpuSapBroadphase* bp = new b3GpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 		m_data->m_np = np;
 		m_data->m_bp = bp;
-		m_data->m_rigidBodyPipeline = new btGpuRigidBodyPipeline(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue, np, bp);
+		m_data->m_rigidBodyPipeline = new b3GpuRigidBodyPipeline(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue, np, bp);
 		setupScene(ci);
--- a/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h
+++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h
@@ -1,7 +1,7 @@
 #ifndef GPU_RIGIDBODY_INTERNAL_DATA_H
 #define GPU_RIGIDBODY_INTERNAL_DATA_H
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "parallel_primitives/host/btOpenCLArray.h"
 #include "BulletCommon/btVector3.h"
@@ -12,10 +12,10 @@ struct	GpuRigidBodyDemoInternalData
 	btOpenCLArray<btVector4>*	m_instancePosOrnColor;
-	class btGpuRigidBodyPipeline* m_rigidBodyPipeline;
+	class b3GpuRigidBodyPipeline* m_rigidBodyPipeline;
-	class btGpuNarrowPhase* m_np;
+	class b3GpuNarrowPhase* m_np;
-	class btGpuSapBroadphase* m_bp;
+	class b3GpuSapBroadphase* m_bp;
 	GpuRigidBodyDemoInternalData()
 		:m_instancePosOrnColor(0),
--- a/demo/gpudemo/rigidbody/GpuSphereScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuSphereScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "../gwenUserInterface.h"
--- a/docs/GDC2013_ErwinCoumans_GPU_rigid_body_simulation.pdf
+++ b/docs/GDC2013_ErwinCoumans_GPU_rigid_body_simulation.pdf
--- a/opencl/basic_initialize/b3OpenCLInclude.h
+++ b/opencl/basic_initialize/b3OpenCLInclude.h
--- a/opencl/basic_initialize/b3OpenCLUtils.cpp
+++ b/opencl/basic_initialize/b3OpenCLUtils.cpp
@@ -25,8 +25,8 @@ bool gDebugSkipLoadingBinary = false;
 #ifdef _WIN32
 #pragma warning (disable:4996)
 #endif
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
-//#include "btOpenCLInclude.h"
+//#include "b3OpenCLInclude.h"
 #include <stdio.h>
 #include <stdlib.h>
@@ -134,7 +134,7 @@ cl_platform_id btOpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
 	return platform;
 }
-void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo)
+void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo)
 {
 	cl_int ciErrNum;
 	ciErrNum = clGetPlatformInfo(	platform,CL_PLATFORM_VENDOR,BT_MAX_STRING_LENGTH,platformInfo->m_platformVendor,NULL);
@@ -148,7 +148,7 @@ void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInf
 void btOpenCLUtils_printPlatformInfo(cl_platform_id platform)
 {
 	btOpenCLPlatformInfo platformInfo;
-	btOpenCLUtils::getPlatformInfo (platform, &platformInfo);
+	b3OpenCLUtils::getPlatformInfo (platform, &platformInfo);
 	printf("Platform info:\n");
 	printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
 	printf("  CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
@@ -310,7 +310,7 @@ cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int
 //				printf("OpenCL platform details:\n");
 				btOpenCLPlatformInfo platformInfo;
-				btOpenCLUtils::getPlatformInfo(platform, &platformInfo);
+				b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
 				if (retPlatformId)
 					*retPlatformId = platform;
@@ -368,7 +368,7 @@ int btOpenCLUtils_getNumDevices(cl_context cxMainContext)
-void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
+void b3OpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
 {
 	// CL_DEVICE_NAME
 	clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
@@ -453,7 +453,7 @@ void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
 void btOpenCLUtils_printDeviceInfo(cl_device_id device)
 {
 	btOpenCLDeviceInfo info;
-	btOpenCLUtils::getDeviceInfo(device,&info);
+	b3OpenCLUtils::getDeviceInfo(device,&info);
 	printf("Device Info:\n");
 	printf("  CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
 	printf("  CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
--- a/opencl/basic_initialize/b3OpenCLUtils.h
+++ b/opencl/basic_initialize/b3OpenCLUtils.h
@@ -19,7 +19,7 @@ subject to the following restrictions:
 #ifndef BT_OPENCL_UTILS_H
 #define BT_OPENCL_UTILS_H
-#include "btOpenCLInclude.h"
+#include "b3OpenCLInclude.h"
 #ifdef __cplusplus
 extern "C" {
@@ -110,7 +110,7 @@ typedef struct
 ///C++ API for OpenCL utilities: convenience functions
-struct btOpenCLUtils
+struct b3OpenCLUtils
 {
 	/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
 	/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
--- a/opencl/basic_initialize/main.cpp
+++ b/opencl/basic_initialize/main.cpp
@@ -15,7 +15,7 @@ subject to the following restrictions:
 ///original author: Erwin Coumans
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include <stdio.h>
 cl_context			g_cxMainContext;
@@ -28,33 +28,33 @@ int main(int argc, char* argv[])
 	int ciErrNum = 0;
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
-	const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
+	const char* vendorSDK = b3OpenCLUtils::getSdkVendorName();
 	printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
-	int numPlatforms = btOpenCLUtils::getNumPlatforms();
+	int numPlatforms = b3OpenCLUtils::getNumPlatforms();
 	printf("Num Platforms = %d\n", numPlatforms);
 	for (int i=0;i<numPlatforms;i++)
 	{
-		cl_platform_id platform = btOpenCLUtils::getPlatform(i);
+		cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
 		btOpenCLPlatformInfo platformInfo;
-		btOpenCLUtils::getPlatformInfo(platform,&platformInfo);
+		b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
 		printf("--------------------------------\n");
 		printf("Platform info for platform nr %d:\n",i);
 		printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
 		printf("  CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
 		printf("  CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
-		cl_context context = btOpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
+		cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
-		int numDevices = btOpenCLUtils::getNumDevices(context);
+		int numDevices = b3OpenCLUtils::getNumDevices(context);
 		printf("Num Devices = %d\n", numDevices);
 		for (int j=0;j<numDevices;j++)
 		{
-			cl_device_id dev = btOpenCLUtils::getDevice(context,j);
+			cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
 			btOpenCLDeviceInfo devInfo;
-			btOpenCLUtils::getDeviceInfo(dev,&devInfo);
+			b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
-			btOpenCLUtils::printDeviceInfo(dev);
+			b3OpenCLUtils::printDeviceInfo(dev);
 		}
 		clReleaseContext(context);
@@ -65,21 +65,21 @@ int main(int argc, char* argv[])
 	void* glCtx=0;
 	void* glDC = 0;
-	printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
+	printf("Initialize OpenCL using b3OpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
-	g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
+	g_cxMainContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	if (g_cxMainContext)
 	{
-		int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
+		int numDev = b3OpenCLUtils::getNumDevices(g_cxMainContext);
 		for (int i=0;i<numDev;i++)
 		{
 			cl_device_id		device;
-			device = btOpenCLUtils::getDevice(g_cxMainContext,i);
+			device = b3OpenCLUtils::getDevice(g_cxMainContext,i);
 			btOpenCLDeviceInfo clInfo;
-			btOpenCLUtils::getDeviceInfo(device,&clInfo);
+			b3OpenCLUtils::getDeviceInfo(device,&clInfo);
-			btOpenCLUtils::printDeviceInfo(device);
+			b3OpenCLUtils::printDeviceInfo(device);
 			// create a command-queue
 			g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
 			oclCHECKERROR(ciErrNum, CL_SUCCESS);
--- a/opencl/basic_initialize/premake4.lua
+++ b/opencl/basic_initialize/premake4.lua
@@ -15,8 +15,8 @@ function createProject(vendor)
 		files {
 			"main.cpp",
-			"btOpenCLUtils.cpp",
+			"b3OpenCLUtils.cpp",
-			"btOpenCLUtils.h"
+			"b3OpenCLUtils.h"
 		}
 	end
--- a/opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp
+++ b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp
@@ -1,9 +1,9 @@
-#include "btGpuSapBroadphase.h"
+#include "b3GpuSapBroadphase.h"
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btLauncherCL.h"
 #include "BulletCommon/btQuickprof.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "../kernels/sapKernels.h"
@@ -11,7 +11,7 @@
 #include "BulletCommon/btMinMax.h"
-btGpuSapBroadphase::btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q )
+b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q )
 :m_context(ctx),
 m_device(device),
 m_queue(q),
@@ -28,44 +28,44 @@ m_currentBuffer(-1)
 	cl_int errNum=0;
-	cl_program sapProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
+	cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
 	btAssert(errNum==CL_SUCCESS);
-	cl_program sapFastProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
+	cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
 	btAssert(errNum==CL_SUCCESS);
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
-	m_sap2Kernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
+	m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 #if 0
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 #else
 #ifndef __APPLE__
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
 	btAssert(errNum==CL_SUCCESS);
 #else
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 #endif
 #endif
-	m_flipFloatKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
+	m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
-	m_copyAabbsKernel= btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
+	m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
-	m_scatterKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
+	m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
 	m_sorter = new btRadixSort32CL(m_context,m_device,m_queue);
 }
-btGpuSapBroadphase::~btGpuSapBroadphase()
+b3GpuSapBroadphase::~b3GpuSapBroadphase()
 {
 	delete m_sorter;
 	clReleaseKernel(m_scatterKernel);
@@ -97,7 +97,7 @@ static unsigned int FloatFlip(float fl)
 	return f ^ mask;
 };
-void  btGpuSapBroadphase::init3dSap()
+void  b3GpuSapBroadphase::init3dSap()
 {
 	if (m_currentBuffer<0)
 	{
@@ -123,7 +123,7 @@ void  btGpuSapBroadphase::init3dSap()
 		}
 	}
 }
-void  btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
+void  b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
 {
 	btAssert(m_currentBuffer>=0);
 	if (m_currentBuffer<0)
@@ -155,7 +155,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
 }
-void  btGpuSapBroadphase::calculateOverlappingPairsHost()
+void  b3GpuSapBroadphase::calculateOverlappingPairsHost()
 {
 	//test
 	//if (m_currentBuffer>=0)
@@ -249,7 +249,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairsHost()
 }
-void  btGpuSapBroadphase::calculateOverlappingPairs()
+void  b3GpuSapBroadphase::calculateOverlappingPairs()
 {
 	int axis = 0;//todo on GPU for now hardcode
@@ -512,7 +512,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairs()
 }
-void btGpuSapBroadphase::writeAabbsToGpu()
+void b3GpuSapBroadphase::writeAabbsToGpu()
 {
 	m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
 	m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
@@ -520,10 +520,10 @@ void btGpuSapBroadphase::writeAabbsToGpu()
 }
-void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
+void b3GpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
 {
 	int index = userPtr;
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	for (int i=0;i<4;i++)
 	{
 		aabb.m_min[i] = aabbMin[i];
@@ -535,10 +535,10 @@ void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVec
 	m_allAabbsCPU.push_back(aabb);
 }
-void btGpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
+void b3GpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
 {
 	int index = userPtr;
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	for (int i=0;i<4;i++)
 	{
 		aabb.m_min[i] = aabbMin[i];
@@ -550,16 +550,16 @@ void btGpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3&
 	m_allAabbsCPU.push_back(aabb);
 }
-cl_mem	btGpuSapBroadphase::getAabbBufferWS()
+cl_mem	b3GpuSapBroadphase::getAabbBufferWS()
 {
 	return m_allAabbsGPU.getBufferCL();
 }
-int	btGpuSapBroadphase::getNumOverlap()
+int	b3GpuSapBroadphase::getNumOverlap()
 {
 	return m_overlappingPairs.size();
 }
-cl_mem	btGpuSapBroadphase::getOverlappingPairBuffer()
+cl_mem	b3GpuSapBroadphase::getOverlappingPairBuffer()
 {
 	return m_overlappingPairs.getBufferCL();
 }
--- a/opencl/gpu_broadphase/host/b3GpuSapBroadphase.h
+++ b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.h
@@ -6,11 +6,11 @@
 class btVector3;
 #include "parallel_primitives/host/btRadixSort32CL.h"
-#include "btSapAabb.h"
+#include "b3SapAabb.h"
-class btGpuSapBroadphase
+class b3GpuSapBroadphase
 {
 	cl_context				m_context;
@@ -30,24 +30,24 @@ class btGpuSapBroadphase
 	public:
-	btOpenCLArray<btSapAabb>	m_allAabbsGPU;
+	btOpenCLArray<b3SapAabb>	m_allAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_allAabbsCPU;
+	btAlignedObjectArray<b3SapAabb>	m_allAabbsCPU;
-	btOpenCLArray<btSapAabb>	m_smallAabbsGPU;
+	btOpenCLArray<b3SapAabb>	m_smallAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_smallAabbsCPU;
+	btAlignedObjectArray<b3SapAabb>	m_smallAabbsCPU;
-	btOpenCLArray<btSapAabb>	m_largeAabbsGPU;
+	btOpenCLArray<b3SapAabb>	m_largeAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_largeAabbsCPU;
+	btAlignedObjectArray<b3SapAabb>	m_largeAabbsCPU;
 	btOpenCLArray<btInt2>		m_overlappingPairs;
 	//temporary gpu work memory
 	btOpenCLArray<btSortData>	m_gpuSmallSortData;
-	btOpenCLArray<btSapAabb>	m_gpuSmallSortedAabbs;
+	btOpenCLArray<b3SapAabb>	m_gpuSmallSortedAabbs;
-	btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q );
+	b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q );
-	virtual ~btGpuSapBroadphase();
+	virtual ~b3GpuSapBroadphase();
 	void  calculateOverlappingPairs();
 	void  calculateOverlappingPairsHost();
--- a/opencl/gpu_broadphase/host/b3SapAabb.h
+++ b/opencl/gpu_broadphase/host/b3SapAabb.h
@@ -1,7 +1,7 @@
 #ifndef BT_SAP_AABB_H
 #define BT_SAP_AABB_H
-struct btSapAabb
+struct b3SapAabb
 {
 	union
 	{
--- a/opencl/gpu_broadphase/test/main.cpp
+++ b/opencl/gpu_broadphase/test/main.cpp
@@ -14,8 +14,8 @@ subject to the following restrictions:
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
-#include "../host/btGpuSapBroadphase.h"
+#include "../host/b3GpuSapBroadphase.h"
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btFillCL.h"
 #include "parallel_primitives/host/btBoundSearchCL.h"
@@ -47,17 +47,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
+        b3OpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
@@ -73,7 +73,7 @@ inline void broadphaseTest()
 {
 	TEST_INIT;
-	btGpuSapBroadphase* sap = new btGpuSapBroadphase(g_context,g_device,g_queue);
+	b3GpuSapBroadphase* sap = new b3GpuSapBroadphase(g_context,g_device,g_queue);
 	int group=1;
 	int mask=1;
 	btVector3 aabbMin(0,0,0);
--- a/opencl/gpu_broadphase/test/premake4.lua
+++ b/opencl/gpu_broadphase/test/premake4.lua
@@ -16,11 +16,11 @@ function createProject(vendor)
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLUtils.h",
-			"../host/btGpuSapBroadphase.cpp",
+			"../host/b3GpuSapBroadphase.cpp",
-			"../host/btGpuSapBroadphase.h",
+			"../host/b3GpuSapBroadphase.h",
 			"../../parallel_primitives/host/btFillCL.cpp",
 			"../../parallel_primitives/host/btFillCL.h",
 			"../../parallel_primitives/host/btBoundSearchCL.cpp",
--- a/opencl/gpu_narrowphase/host/b3Collidable.h
+++ b/opencl/gpu_narrowphase/host/b3Collidable.h
@@ -14,7 +14,7 @@ enum btShapeTypes
 	MAX_NUM_SHAPE_TYPES,
 };
-struct btCollidable
+struct b3Collidable
 {
 	int m_numChildShapes;
 	float m_radius;
--- a/opencl/gpu_narrowphase/host/b3Contact4.h
+++ b/opencl/gpu_narrowphase/host/b3Contact4.h
@@ -4,7 +4,7 @@
 #include "BulletCommon/btVector3.h"
-ATTRIBUTE_ALIGNED16(struct) btContact4
+ATTRIBUTE_ALIGNED16(struct) b3Contact4
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
--- a/opencl/gpu_narrowphase/host/b3ConvexHullContact.cpp
+++ b/opencl/gpu_narrowphase/host/b3ConvexHullContact.cpp
@@ -20,16 +20,16 @@ subject to the following restrictions:
 //#define BT_DEBUG_SAT_FACE
-#include "ConvexHullContact.h"
+#include "b3ConvexHullContact.h"
 #include <string.h>//memcpy
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
 typedef btAlignedObjectArray<btVector3> btVertexArray;
 #include "BulletCommon/btQuickprof.h"
 #include <float.h> //for FLT_MAX
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "parallel_primitives/host/btLauncherCL.h"
 //#include "AdlQuaternion.h"
@@ -63,21 +63,21 @@ m_totalContactsOut(m_context, m_queue)
 //		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
 //#endif
-		cl_program satProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl");
+		cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_findSeparatingAxisKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg );
+		m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg );
 		btAssert(m_findSeparatingAxisKernel);
 		btAssert(errNum==CL_SUCCESS);
-		m_findConcaveSeparatingAxisKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findConcaveSeparatingAxisKernel",&errNum,satProg );
+		m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findConcaveSeparatingAxisKernel",&errNum,satProg );
 		btAssert(m_findConcaveSeparatingAxisKernel);
 		btAssert(errNum==CL_SUCCESS);
-		m_findCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg );
+		m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg );
 		btAssert(m_findCompoundPairsKernel);
 		btAssert(errNum==CL_SUCCESS);
-		m_processCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg );
+		m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg );
 		btAssert(m_processCompoundPairsKernel);
 		btAssert(errNum==CL_SUCCESS);
 	}
@@ -91,29 +91,29 @@ m_totalContactsOut(m_context, m_queue)
 //		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
 //#endif
-		cl_program satClipContactsProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
+		cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_clipHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg);
+		m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
-		m_clipCompoundsHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg);
+		m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
-        m_findClippingFacesKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg);
+        m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
-        m_clipFacesAndContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg);
+        m_clipFacesAndContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);        
-		m_clipHullHullConcaveConvexKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
+		m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
-		m_extractManifoldAndAddContactKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
+		m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
-        m_newContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
+        m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
                            "newContactReductionKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 	}
@@ -131,27 +131,27 @@ m_totalContactsOut(m_context, m_queue)
 	 if (1)
 	{
 		const char* srcBvh = bvhTraversalKernelCL;
-		cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl");
+		cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_bvhTraversalKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"");
+		m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"");
 		btAssert(errNum==CL_SUCCESS);
 	}
 	 {
 		 const char* primitiveContactsSrc = primitiveContactsKernelsCL;
-		cl_program primitiveContactsProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl");
+		cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_primitiveContactsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,"");
+		m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,"");
 		btAssert(errNum==CL_SUCCESS);
-		m_findConcaveSphereContactsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg );
+		m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg );
 		btAssert(errNum==CL_SUCCESS);
 		btAssert(m_findConcaveSphereContactsKernel);
-		m_processCompoundPairsPrimitivesKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,"");
+		m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,"");
 		btAssert(errNum==CL_SUCCESS);
 		btAssert(m_processCompoundPairsPrimitivesKernel);
@@ -408,19 +408,19 @@ int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4&
 void computeContactPlaneConvex(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
+																const b3RigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
+																const b3Collidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
 		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
-	const btConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
+	const b3ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
 	btVector3 posB = rigidBodies[bodyIndexB].m_pos;
 	btQuaternion ornB = rigidBodies[bodyIndexB].m_quat;
@@ -513,7 +513,7 @@ void computeContactPlaneConvex(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = planeNormalWorld;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -541,13 +541,13 @@ void computeContactPlaneConvex(int pairIndex,
 void computeContactPlaneCompound(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
+																const b3RigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
+																const b3Collidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
@@ -558,7 +558,7 @@ void computeContactPlaneCompound(int pairIndex,
 	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
-	const btConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
+	const b3ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
 	btVector3 posB = rigidBodies[bodyIndexB].m_pos;
 	btQuaternion ornB = rigidBodies[bodyIndexB].m_quat;
@@ -651,7 +651,7 @@ void computeContactPlaneCompound(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = planeNormalWorld;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -680,13 +680,13 @@ void computeContactPlaneCompound(int pairIndex,
 void	computeContactSphereConvex(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
+																const b3RigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
+																const b3Collidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
@@ -814,7 +814,7 @@ void	computeContactSphereConvex(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = normalOnSurfaceB1;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -833,15 +833,15 @@ void	computeContactSphereConvex(int pairIndex,
 void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs,
-			const btOpenCLArray<btRigidBodyCL>* bodyBuf,
+			const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
-			btOpenCLArray<btContact4>* contactOut, int& nContacts,
+			btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
 			int maxContactCapacity,
-			const btOpenCLArray<btConvexPolyhedronCL>& convexData,
+			const btOpenCLArray<b3ConvexPolyhedronCL>& convexData,
 			const btOpenCLArray<btVector3>& gpuVertices,
 			const btOpenCLArray<btVector3>& gpuUniqueEdges,
 			const btOpenCLArray<btGpuFace>& gpuFaces,
 			const btOpenCLArray<int>& gpuIndices,
-			const btOpenCLArray<btCollidable>& gpuCollidables,
+			const btOpenCLArray<b3Collidable>& gpuCollidables,
 			const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
 			const btOpenCLArray<btYetAnotherAabb>& clAabbsWS,
@@ -850,7 +850,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
            btOpenCLArray<btVector3>& worldNormalsAGPU,
            btOpenCLArray<btVector3>& worldVertsA1GPU,
            btOpenCLArray<btVector3>& worldVertsB2GPU,    
-			btAlignedObjectArray<class btOptimizedBvh*>& bvhData,
+			btAlignedObjectArray<class b3OptimizedBvh*>& bvhData,
 			btOpenCLArray<btQuantizedBvhNode>*	treeNodesGPU,
 			btOpenCLArray<btBvhSubtreeInfo>*	subTreesGPU,
 			int numObjects,
@@ -870,12 +870,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 	btAlignedObjectArray<btInt2> hostPairs;
 	pairs->copyToHost(hostPairs);
-	btAlignedObjectArray<btRigidBodyCL> hostBodyBuf;
+	btAlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
 	bodyBuf->copyToHost(hostBodyBuf);
-	btAlignedObjectArray<btConvexPolyhedronCL> hostConvexData;
+	btAlignedObjectArray<b3ConvexPolyhedronCL> hostConvexData;
 	convexData.copyToHost(hostConvexData);
 	btAlignedObjectArray<btVector3> hostVertices;
@@ -887,7 +887,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 	gpuFaces.copyToHost(hostFaces);
 	btAlignedObjectArray<int> hostIndices;
 	gpuIndices.copyToHost(hostIndices);
-	btAlignedObjectArray<btCollidable> hostCollidables;
+	btAlignedObjectArray<b3Collidable> hostCollidables;
 	gpuCollidables.copyToHost(hostCollidables);
 	btAlignedObjectArray<btGpuChildShape> cpuChildShapes;
@@ -896,7 +896,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 	btAlignedObjectArray<btInt4> hostTriangleConvexPairs;
-	btAlignedObjectArray<btContact4> hostContacts;
+	btAlignedObjectArray<b3Contact4> hostContacts;
 	if (nContacts)
 	{
 		contactOut->copyToHost(hostContacts);
@@ -1355,7 +1355,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 			clFinish(m_queue);
 			nContacts = m_totalContactsOut.at(0);
 			contactOut->resize(nContacts);
-			btAlignedObjectArray<btContact4> cpuContacts;
+			btAlignedObjectArray<b3Contact4> cpuContacts;
 			contactOut->copyToHost(cpuContacts);
 //			printf("nContacts after = %d\n", nContacts);
 		}
--- a/opencl/gpu_narrowphase/host/b3ConvexHullContact.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexHullContact.h
@@ -3,15 +3,15 @@
 #define _CONVEX_HULL_CONTACT_H
 #include "parallel_primitives/host/btOpenCLArray.h"
-#include "btRigidBodyCL.h"
+#include "b3RigidBodyCL.h"
 #include "BulletCommon/btAlignedObjectArray.h"
-#include "btConvexUtility.h"
+#include "b3ConvexUtility.h"
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
-#include "btCollidable.h"
+#include "b3Collidable.h"
-#include "btContact4.h"
+#include "b3Contact4.h"
 #include "parallel_primitives/host/btInt2.h"
 #include "parallel_primitives/host/btInt4.h"
-#include "btOptimizedBvh.h"
+#include "b3OptimizedBvh.h"
 //#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
@@ -65,15 +65,15 @@ struct GpuSatCollision
 	void computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs, 
-			const btOpenCLArray<btRigidBodyCL>* bodyBuf,
+			const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
-			btOpenCLArray<btContact4>* contactOut, int& nContacts,
+			btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
 			int maxContactCapacity,
-			const btOpenCLArray<btConvexPolyhedronCL>& hostConvexData,
+			const btOpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
 			const btOpenCLArray<btVector3>& vertices,
 			const btOpenCLArray<btVector3>& uniqueEdges,
 			const btOpenCLArray<btGpuFace>& faces,
 			const btOpenCLArray<int>& indices,
-			const btOpenCLArray<btCollidable>& gpuCollidables,
+			const btOpenCLArray<b3Collidable>& gpuCollidables,
 			const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
 			const btOpenCLArray<btYetAnotherAabb>& clAabbs,
@@ -82,7 +82,7 @@ struct GpuSatCollision
           btOpenCLArray<btVector3>& worldNormalsAGPU,
           btOpenCLArray<btVector3>& worldVertsA1GPU,
           btOpenCLArray<btVector3>& worldVertsB2GPU,
-		   btAlignedObjectArray<class btOptimizedBvh*>& bvhData,
+		   btAlignedObjectArray<class b3OptimizedBvh*>& bvhData,
 		   btOpenCLArray<btQuantizedBvhNode>*	treeNodesGPU,
 			btOpenCLArray<btBvhSubtreeInfo>*	subTreesGPU,
 			int numObjects,
--- a/opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h
@@ -10,7 +10,7 @@ struct btGpuFace
 	int m_numIndices;
 };
-ATTRIBUTE_ALIGNED16(struct) btConvexPolyhedronCL
+ATTRIBUTE_ALIGNED16(struct) b3ConvexPolyhedronCL
 {
 	btVector3		m_localCenter;
 	btVector3		m_extents;
--- a/opencl/gpu_narrowphase/host/b3ConvexUtility.cpp
+++ b/opencl/gpu_narrowphase/host/b3ConvexUtility.cpp
@@ -14,21 +14,21 @@ subject to the following restrictions:
 //Originally written by Erwin Coumans
-#include "btConvexUtility.h"
+#include "b3ConvexUtility.h"
 #include "BulletGeometry/btConvexHullComputer.h"
 #include "BulletGeometry/btGrahamScan2dConvexHull.h"
 #include "BulletCommon/btQuaternion.h"
 #include "BulletCommon/btHashMap.h"
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
-btConvexUtility::~btConvexUtility()
+b3ConvexUtility::~b3ConvexUtility()
 {
 }
-bool	btConvexUtility::initializePolyhedralFeatures(const btVector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
+bool	b3ConvexUtility::initializePolyhedralFeatures(const btVector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
 {
@@ -310,7 +310,7 @@ struct btInternalEdge
 //
 #ifdef TEST_INTERNAL_OBJECTS
-bool btConvexUtility::testContainment() const
+bool b3ConvexUtility::testContainment() const
 {
 	for(int p=0;p<8;p++)
 	{
@@ -336,7 +336,7 @@ bool btConvexUtility::testContainment() const
 }
 #endif
-void	btConvexUtility::initialize()
+void	b3ConvexUtility::initialize()
 {
 	btHashMap<btInternalVertexPair,btInternalEdge> edges;
--- a/opencl/gpu_narrowphase/host/b3ConvexUtility.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexUtility.h
@@ -20,7 +20,7 @@ subject to the following restrictions:
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btTransform.h"
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
 struct btMyFace
@@ -29,7 +29,7 @@ struct btMyFace
 	btScalar	m_plane[4];
 };
-ATTRIBUTE_ALIGNED16(class) btConvexUtility
+ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
 {
 	public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
@@ -45,10 +45,10 @@ ATTRIBUTE_ALIGNED16(class) btConvexUtility
 	btAlignedObjectArray<btVector3> m_uniqueEdges;
-	btConvexUtility()
+	b3ConvexUtility()
 	{
 	}
-	virtual ~btConvexUtility();
+	virtual ~b3ConvexUtility();
 	bool	initializePolyhedralFeatures(const btVector3* orgVertices, int numVertices, bool mergeCoplanarTriangles=true);
--- a/opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp
+++ b/opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp
@@ -14,22 +14,22 @@ subject to the following restrictions:
 */
-#include "btOptimizedBvh.h"
+#include "b3OptimizedBvh.h"
-#include "btStridingMeshInterface.h"
+#include "b3StridingMeshInterface.h"
 #include "BulletGeometry/btAabbUtil2.h"
 #include "BulletCommon/btIDebugDraw.h"
-btOptimizedBvh::btOptimizedBvh()
+b3OptimizedBvh::b3OptimizedBvh()
 { 
 }
-btOptimizedBvh::~btOptimizedBvh()
+b3OptimizedBvh::~b3OptimizedBvh()
 {
 }
-void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
+void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
 {
 	m_useQuantization = useQuantizedAabbCompression;
@@ -80,7 +80,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 	struct	QuantizedNodeTriangleCallback : public btInternalTriangleIndexCallback
 	{
 		QuantizedNodeArray&	m_triangleNodes;
-		const btQuantizedBvh* m_optimizedTree; // for quantization
+		const b3QuantizedBvh* m_optimizedTree; // for quantization
 		QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other)
 		{
@@ -89,7 +89,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 			return *this;
 		}
-		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const btQuantizedBvh* tree)
+		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const b3QuantizedBvh* tree)
 			:m_triangleNodes(triangleNodes),m_optimizedTree(tree)
 		{
 		}
@@ -203,7 +203,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
-void	btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+void	b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
 {
 	if (m_useQuantization)
 	{
@@ -230,7 +230,7 @@ void	btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector
-void	btOptimizedBvh::refitPartial(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+void	b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
 {
 	//incrementally initialize quantization values
 	btAssert(m_useQuantization);
@@ -269,7 +269,7 @@ void	btOptimizedBvh::refitPartial(btStridingMeshInterface* meshInterface,const b
 }
-void	btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index)
+void	b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index)
 {
 	(void)index;
@@ -382,10 +382,10 @@ void	btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int f
 }
 ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-btOptimizedBvh* btOptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
 {
-	btQuantizedBvh* bvh = btQuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+	b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
 	//we don't add additional data so just do a static upcast
-	return static_cast<btOptimizedBvh*>(bvh);
+	return static_cast<b3OptimizedBvh*>(bvh);
 }
--- a/opencl/gpu_narrowphase/host/b3OptimizedBvh.h
+++ b/opencl/gpu_narrowphase/host/b3OptimizedBvh.h
@@ -18,13 +18,13 @@ subject to the following restrictions:
 #ifndef BT_OPTIMIZED_BVH_H
 #define BT_OPTIMIZED_BVH_H
-#include "btQuantizedBvh.h"
+#include "b3QuantizedBvh.h"
-class btStridingMeshInterface;
+class b3StridingMeshInterface;
-///The btOptimizedBvh extends the btQuantizedBvh to create AABB tree for triangle meshes, through the btStridingMeshInterface.
+///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface.
-ATTRIBUTE_ALIGNED16(class) btOptimizedBvh : public btQuantizedBvh
+ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh
 {
 public:
@@ -34,27 +34,27 @@ protected:
 public:
-	btOptimizedBvh();
+	b3OptimizedBvh();
-	virtual ~btOptimizedBvh();
+	virtual ~b3OptimizedBvh();
-	void	build(btStridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax);
+	void	build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax);
-	void	refit(btStridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax);
+	void	refit(b3StridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax);
-	void	refitPartial(btStridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax);
+	void	refitPartial(b3StridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax);
-	void	updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
+	void	updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
 	/// Data buffer MUST be 16 byte aligned
 	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const
 	{
-		return btQuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+		return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
 	}
 	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-	static btOptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+	static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
 };
--- a/opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp
+++ b/opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp
@@ -13,7 +13,7 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
-#include "btQuantizedBvh.h"
+#include "b3QuantizedBvh.h"
 #include "BulletGeometry/btAabbUtil2.h"
 #include "BulletCommon/btIDebugDraw.h"
@@ -21,7 +21,7 @@ subject to the following restrictions:
 #define RAYAABB2
-btQuantizedBvh::btQuantizedBvh() : 
+b3QuantizedBvh::b3QuantizedBvh() : 
 					m_bulletVersion(BT_BULLET_VERSION),
 					m_useQuantization(false), 
 					m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY)
@@ -37,7 +37,7 @@ btQuantizedBvh::btQuantizedBvh() :
-void btQuantizedBvh::buildInternal()
+void b3QuantizedBvh::buildInternal()
 {
 	///assumes that caller filled in the m_quantizedLeafNodes
 	m_useQuantization = true;
@@ -88,7 +88,7 @@ btVector3 color[4]=
-void	btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin)
+void	b3QuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin)
 {
 	//enlarge the AABB to avoid division by zero when initializing the quantization values
 	btVector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin);
@@ -102,7 +102,7 @@ void	btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btV
-btQuantizedBvh::~btQuantizedBvh()
+b3QuantizedBvh::~b3QuantizedBvh()
 {
 }
@@ -111,7 +111,7 @@ int gStackDepth = 0;
 int gMaxStackDepth = 0;
 #endif //DEBUG_TREE_BUILDING
-void	btQuantizedBvh::buildTree	(int startIndex,int endIndex)
+void	b3QuantizedBvh::buildTree	(int startIndex,int endIndex)
 {
 #ifdef DEBUG_TREE_BUILDING
 	gStackDepth++;
@@ -194,7 +194,7 @@ void	btQuantizedBvh::buildTree	(int startIndex,int endIndex)
 }
-void	btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
+void	b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
 {
 	btAssert(m_useQuantization);
@@ -227,7 +227,7 @@ void	btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChild
 }
-int	btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis)
+int	b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis)
 {
 	int i;
 	int splitIndex =startIndex;
@@ -281,7 +281,7 @@ int	btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int sp
 }
-int	btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
+int	b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
 {
 	int i;
@@ -310,7 +310,7 @@ int	btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
-void	btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	//either choose recursive traversal (walkTree) or stackless (walkStacklessTree)
@@ -350,7 +350,7 @@ void	btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb
 int maxIterations = 0;
-void	btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	btAssert(!m_useQuantization);
@@ -395,7 +395,7 @@ void	btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const
 /*
 ///this was the original recursive traversal, before we optimized towards stackless traversal
-void	btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax);
 	if (aabbOverlap)
@@ -414,7 +414,7 @@ void	btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback
 }
 */
-void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
 {
 	btAssert(m_useQuantization);
@@ -446,7 +446,7 @@ void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantize
-void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(!m_useQuantization);
@@ -538,7 +538,7 @@ void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
-void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(m_useQuantization);
@@ -664,7 +664,7 @@ void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
 }
-void	btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(m_useQuantization);
@@ -730,7 +730,7 @@ void	btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
 }
 //This traversal can be called from Playstation 3 SPU
-void	btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+void	b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
 {
 	btAssert(m_useQuantization);
@@ -753,13 +753,13 @@ void	btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallba
 }
-void	btQuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const
+void	b3QuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const
 {
 	reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,btVector3(0,0,0),btVector3(0,0,0));
 }
-void	btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	//always use stackless
@@ -787,7 +787,7 @@ void	btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCa
 }
-void	btQuantizedBvh::swapLeafNodes(int i,int splitIndex)
+void	b3QuantizedBvh::swapLeafNodes(int i,int splitIndex)
 {
 	if (m_useQuantization)
 	{
@@ -802,7 +802,7 @@ void	btQuantizedBvh::swapLeafNodes(int i,int splitIndex)
 	}
 }
-void	btQuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex)
+void	b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex)
 {
 	if (m_useQuantization)
 	{
@@ -825,15 +825,15 @@ static const unsigned BVH_ALIGNMENT_BLOCKS = 2;
 #endif
-unsigned int btQuantizedBvh::getAlignmentSerializationPadding()
+unsigned int b3QuantizedBvh::getAlignmentSerializationPadding()
 {
 	// I changed this to 0 since the extra padding is not needed or used.
 	return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT;
 }
-unsigned btQuantizedBvh::calculateSerializeBufferSize() const
+unsigned b3QuantizedBvh::calculateSerializeBufferSize() const
 {
-	unsigned baseSize = sizeof(btQuantizedBvh) + getAlignmentSerializationPadding();
+	unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding();
 	baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
 	if (m_useQuantization)
 	{
@@ -842,7 +842,7 @@ unsigned btQuantizedBvh::calculateSerializeBufferSize() const
 	return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode);
 }
-bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
+bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
 {
 	btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
 	m_subtreeHeaderCount = m_SubtreeHeaders.size();
@@ -855,11 +855,11 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	}
 */
-	btQuantizedBvh *targetBvh = (btQuantizedBvh *)o_alignedDataBuffer;
+	b3QuantizedBvh *targetBvh = (b3QuantizedBvh *)o_alignedDataBuffer;
 	// construct the class so the virtual function table, etc will be set up
 	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
-	new (targetBvh) btQuantizedBvh;
+	new (targetBvh) b3QuantizedBvh;
 	if (i_swapEndian)
 	{
@@ -886,7 +886,7 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	targetBvh->m_useQuantization = m_useQuantization;
 	unsigned char *nodeData = (unsigned char *)targetBvh;
-	nodeData += sizeof(btQuantizedBvh);
+	nodeData += sizeof(b3QuantizedBvh);
 	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
 	nodeData += sizeToAdd;
@@ -1028,14 +1028,14 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	return true;
 }
-btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
 {
 	if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))
 	{
 		return NULL;
 	}
-	btQuantizedBvh *bvh = (btQuantizedBvh *)i_alignedDataBuffer;
+	b3QuantizedBvh *bvh = (b3QuantizedBvh *)i_alignedDataBuffer;
 	if (i_swapEndian)
 	{
@@ -1058,7 +1058,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 	}
 	unsigned char *nodeData = (unsigned char *)bvh;
-	nodeData += sizeof(btQuantizedBvh);
+	nodeData += sizeof(b3QuantizedBvh);
 	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
 	nodeData += sizeToAdd;
@@ -1067,7 +1067,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 	// Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor
 	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
-	new (bvh) btQuantizedBvh(*bvh, false);
+	new (bvh) b3QuantizedBvh(*bvh, false);
 	if (bvh->m_useQuantization)
 	{
@@ -1135,7 +1135,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 }
 // Constructor that prevents btVector3's default constructor from being called
-btQuantizedBvh::btQuantizedBvh(btQuantizedBvh &self, bool /* ownsMemory */) :
+b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) :
 m_bvhAabbMin(self.m_bvhAabbMin),
 m_bvhAabbMax(self.m_bvhAabbMax),
 m_bvhQuantization(self.m_bvhQuantization),
@@ -1144,7 +1144,7 @@ m_bulletVersion(BT_BULLET_VERSION)
 }
-void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
+void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
 {
 	m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax);
 	m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin);
@@ -1215,7 +1215,7 @@ void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
 	}
 }
-void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
+void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
 {
 	m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax);
 	m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin);
@@ -1290,7 +1290,7 @@ void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
 ///fills the dataBuffer and returns the struct name (and 0 on failure)
-const char*	btQuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
+const char*	b3QuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
 {
 	btAssert(0);
 	return 0;
--- a/opencl/gpu_narrowphase/host/b3QuantizedBvh.h
+++ b/opencl/gpu_narrowphase/host/b3QuantizedBvh.h
@@ -168,10 +168,10 @@ typedef btAlignedObjectArray<btQuantizedBvhNode>	QuantizedNodeArray;
 typedef btAlignedObjectArray<btBvhSubtreeInfo>		BvhSubtreeInfoArray;
-///The btQuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
+///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
 ///It is used by the btBvhTriangleMeshShape as midphase, and by the btMultiSapBroadphase.
 ///It is recommended to use quantization for better performance and lower memory requirements.
-ATTRIBUTE_ALIGNED16(class) btQuantizedBvh
+ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh
 {
 public:
 	enum btTraversalMode
@@ -334,9 +334,9 @@ public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
-	btQuantizedBvh();
+	b3QuantizedBvh();
-	virtual ~btQuantizedBvh();
+	virtual ~b3QuantizedBvh();
 	///***************************************** expert/internal use only *************************
@@ -468,7 +468,7 @@ public:
 	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const;
 	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-	static btQuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+	static b3QuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
 	static unsigned int getAlignmentSerializationPadding();
 //////////////////////////////////////////////////////////////////////
@@ -495,7 +495,7 @@ private:
 	// Special "copy" constructor that allows for in-place deserialization
 	// Prevents btVector3's default constructor from being called, but doesn't inialize much else
 	// ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need)
-	btQuantizedBvh(btQuantizedBvh &other, bool ownsMemory);
+	b3QuantizedBvh(b3QuantizedBvh &other, bool ownsMemory);
 }
 ;
@@ -572,7 +572,7 @@ struct	btQuantizedBvhDoubleData
 };
-SIMD_FORCE_INLINE	int	btQuantizedBvh::calculateSerializeBufferSizeNew() const
+SIMD_FORCE_INLINE	int	b3QuantizedBvh::calculateSerializeBufferSizeNew() const
 {
 	return sizeof(btQuantizedBvhData);
 }
--- a/opencl/gpu_narrowphase/host/b3RigidBodyCL.h
+++ b/opencl/gpu_narrowphase/host/b3RigidBodyCL.h
@@ -4,7 +4,7 @@
 #include "BulletCommon/btScalar.h"
 #include "BulletCommon/btMatrix3x3.h"
-ATTRIBUTE_ALIGNED16(struct) btRigidBodyCL
+ATTRIBUTE_ALIGNED16(struct) b3RigidBodyCL
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
--- a/opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp
+++ b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp
@@ -13,16 +13,16 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
-#include "btStridingMeshInterface.h"
+#include "b3StridingMeshInterface.h"
-btStridingMeshInterface::~btStridingMeshInterface()
+b3StridingMeshInterface::~b3StridingMeshInterface()
 {
 }
-void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	(void)aabbMin;
 	(void)aabbMax;
@@ -173,7 +173,7 @@ void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
 	}
 }
-void	btStridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax)
+void	b3StridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax)
 {
 	struct	AabbCalculationCallback : public btInternalTriangleIndexCallback
--- a/opencl/gpu_narrowphase/host/b3StridingMeshInterface.h
+++ b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.h
@@ -17,7 +17,7 @@ subject to the following restrictions:
 #define BT_STRIDING_MESHINTERFACE_H
 #include "BulletCommon/btVector3.h"
-#include "btTriangleCallback.h"
+#include "b3TriangleCallback.h"
 //#include "btConcaveShape.h"
@@ -27,10 +27,10 @@ enum  	PHY_ScalarType {
 };
-///	The btStridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
+///	The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
 /// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.
 /// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory.
-ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface
+ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
 {
 	protected:
@@ -39,12 +39,12 @@ ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface
 	public:
 		BT_DECLARE_ALIGNED_ALLOCATOR();
-		btStridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
+		b3StridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
 		{
 		}
-		virtual ~btStridingMeshInterface();
+		virtual ~b3StridingMeshInterface();
@@ -157,7 +157,7 @@ struct	btStridingMeshInterfaceData
-SIMD_FORCE_INLINE	int	btStridingMeshInterface::calculateSerializeBufferSize() const
+SIMD_FORCE_INLINE	int	b3StridingMeshInterface::calculateSerializeBufferSize() const
 {
 	return sizeof(btStridingMeshInterfaceData);
 }
--- a/opencl/gpu_narrowphase/host/b3TriangleCallback.cpp
+++ b/opencl/gpu_narrowphase/host/b3TriangleCallback.cpp
@@ -13,9 +13,9 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
-#include "btTriangleCallback.h"
+#include "b3TriangleCallback.h"
-btTriangleCallback::~btTriangleCallback()
+b3TriangleCallback::~b3TriangleCallback()
 {
 }
--- a/opencl/gpu_narrowphase/host/b3TriangleCallback.h
+++ b/opencl/gpu_narrowphase/host/b3TriangleCallback.h
@@ -19,13 +19,13 @@ subject to the following restrictions:
 #include "BulletCommon/btVector3.h"
-///The btTriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
+///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
 ///This callback is called by processAllTriangles for all btConcaveShape derived class, such as  btBvhTriangleMeshShape, btStaticPlaneShape and btHeightfieldTerrainShape.
-class btTriangleCallback
+class b3TriangleCallback
 {
 public:
-	virtual ~btTriangleCallback();
+	virtual ~b3TriangleCallback();
 	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex) = 0;
 };
--- a/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp
+++ b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp
@@ -13,9 +13,9 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
-#include "btTriangleIndexVertexArray.h"
+#include "b3TriangleIndexVertexArray.h"
-btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride)
+b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride)
 : m_hasAabb(0)
 {
 	btIndexedMesh mesh;
@@ -31,12 +31,12 @@ btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* tri
 }
-btTriangleIndexVertexArray::~btTriangleIndexVertexArray()
+b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray()
 {
 }
-void	btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
+void	b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
 {
 	btAssert(subpart< getNumSubParts() );
@@ -56,7 +56,7 @@ void	btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertex
 	indicestype = mesh.m_indexType;
 }
-void	btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
+void	b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
 {
 	const btIndexedMesh& mesh = m_indexedMeshes[subpart];
@@ -73,20 +73,20 @@ void	btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned
 	indicestype = mesh.m_indexType;
 }
-bool	btTriangleIndexVertexArray::hasPremadeAabb() const
+bool	b3TriangleIndexVertexArray::hasPremadeAabb() const
 {
 	return (m_hasAabb == 1);
 }
-void	btTriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
+void	b3TriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
 {
 	m_aabbMin = aabbMin;
 	m_aabbMax = aabbMax;
 	m_hasAabb = 1; // this is intentionally an int see notes in header
 }
-void	btTriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
+void	b3TriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
 {
 	*aabbMin = m_aabbMin;
 	*aabbMax = m_aabbMax;
--- a/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h
+++ b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h
@@ -16,12 +16,12 @@ subject to the following restrictions:
 #ifndef BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
 #define BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
-#include "btStridingMeshInterface.h"
+#include "b3StridingMeshInterface.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btScalar.h"
-///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a btTriangleIndexVertexArray using addIndexedMesh.
+///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh.
 ///Instead of the number of indices, we pass the number of triangles.
 ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
 {
@@ -37,7 +37,7 @@ ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
   int                     m_vertexStride;
   // The index type is set when adding an indexed mesh to the
-   // btTriangleIndexVertexArray, do not set it manually
+   // b3TriangleIndexVertexArray, do not set it manually
   PHY_ScalarType m_indexType;
   // The vertex type has a default type similar to Bullet's precision mode (float or double)
@@ -61,11 +61,11 @@ ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
 typedef btAlignedObjectArray<btIndexedMesh>	IndexedMeshArray;
-///The btTriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
+///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
 ///Additional meshes can be added using addIndexedMesh
 ///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays.
-///So keep those arrays around during the lifetime of this btTriangleIndexVertexArray.
+///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray.
-ATTRIBUTE_ALIGNED16( class) btTriangleIndexVertexArray : public btStridingMeshInterface
+ATTRIBUTE_ALIGNED16( class) b3TriangleIndexVertexArray : public b3StridingMeshInterface
 {
 protected:
 	IndexedMeshArray	m_indexedMeshes;
@@ -78,14 +78,14 @@ public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
-	btTriangleIndexVertexArray() : m_hasAabb(0)
+	b3TriangleIndexVertexArray() : m_hasAabb(0)
 	{
 	}
-	virtual ~btTriangleIndexVertexArray();
+	virtual ~b3TriangleIndexVertexArray();
 	//just to be backwards compatible
-	btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride);
+	b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride);
 	void	addIndexedMesh(const btIndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)
 	{
--- a/opencl/gpu_narrowphase/test/main.cpp
+++ b/opencl/gpu_narrowphase/test/main.cpp
@@ -14,8 +14,8 @@ subject to the following restrictions:
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
-#include "../host/ConvexHullContact.h"
+#include "../host/b3ConvexHullContact.h"
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btFillCL.h"
@@ -23,7 +23,7 @@ subject to the following restrictions:
 #include "parallel_primitives/host/btRadixSort32CL.h"
 #include "parallel_primitives/host/btPrefixScanCL.h"
 #include "BulletCommon/CommandLineArgs.h"
-#include "../host/ConvexHullContact.h"
+#include "../host/b3ConvexHullContact.h"
 #include "BulletCommon/btMinMax.h"
 int g_nPassed = 0;
@@ -49,17 +49,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
+        b3OpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
--- a/opencl/gpu_narrowphase/test/premake4.lua
+++ b/opencl/gpu_narrowphase/test/premake4.lua
@@ -16,9 +16,9 @@ function createProject(vendor)
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/**.cpp",
 			"../host/**.h",
 			"../../parallel_primitives/host/btFillCL.cpp",
--- a/opencl/gpu_rigidbody/host/b3Config.h
+++ b/opencl/gpu_rigidbody/host/b3Config.h
@@ -1,7 +1,7 @@
 #ifndef BT_CONFIG_H
 #define BT_CONFIG_H
-struct	btConfig
+struct	b3Config
 {
 	int	m_maxConvexBodies;
 	int	m_maxConvexShapes;
@@ -18,7 +18,7 @@ struct	btConfig
 	int m_maxTriConvexPairCapacity;
-	btConfig()
+	b3Config()
 		:m_maxConvexBodies(32*1024),
 		m_maxConvexShapes(8192),
 		m_maxVerticesPerFace(64),
@@ -27,7 +27,8 @@ struct	btConfig
 		m_maxConvexIndices(8192),
 		m_maxConvexUniqueEdges(8192),
 		m_maxCompoundChildShapes(8192),
-		m_maxTriConvexPairCapacity(512*1024)
+		//m_maxTriConvexPairCapacity(512*1024)
 		m_maxTriConvexPairCapacity(256*1024)
 	{
 		m_maxBroadphasePairs = 16*m_maxConvexBodies;
 		m_maxContactCapacity = m_maxBroadphasePairs;
--- a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp
@@ -1,15 +1,15 @@
-#include "btGpuBatchingPgsSolver.h"
+#include "b3GpuBatchingPgsSolver.h"
 #include "../../parallel_primitives/host/btRadixSort32CL.h"
 #include "BulletCommon/btQuickprof.h"
 #include "../../parallel_primitives/host/btLauncherCL.h"
 #include "../../parallel_primitives/host/btBoundSearchCL.h"
 #include "../../parallel_primitives/host/btPrefixScanCL.h"
 #include <string.h>
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
-#include "../host/btConfig.h"
+#include "../host/b3Config.h"
-#include "Solver.h"
+#include "b3Solver.h"
 #define SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl"
@@ -49,11 +49,11 @@ struct	btGpuBatchingPgsSolverInternalData
 	int m_pairCapacity;
 	int m_nIterations;
-	btOpenCLArray<btGpuConstraint4>* m_contactCGPU;
+	btOpenCLArray<b3GpuConstraint4>* m_contactCGPU;
 	btOpenCLArray<unsigned int>* m_numConstraints;
 	btOpenCLArray<unsigned int>* m_offsets;
-	Solver*		m_solverGPU;		
+	b3Solver*		m_solverGPU;		
 	cl_kernel m_batchingKernel;
 	cl_kernel m_batchingKernelNew;
@@ -69,21 +69,21 @@ struct	btGpuBatchingPgsSolverInternalData
 	class btPrefixScanCL*	m_scan;
 	btOpenCLArray<btSortData>* m_sortDataBuffer;
-	btOpenCLArray<btContact4>* m_contactBuffer;
+	btOpenCLArray<b3Contact4>* m_contactBuffer;
-	btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
+	btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
 	btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
-	btOpenCLArray<btContact4>* m_pBufContactOutGPU;
+	btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
 	btAlignedObjectArray<unsigned int> m_idxBuffer;
 	btAlignedObjectArray<btSortData> m_sortData;
-	btAlignedObjectArray<btContact4> m_old;
+	btAlignedObjectArray<b3Contact4> m_old;
 };
-btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity)
+b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity)
 {
 	m_data = new btGpuBatchingPgsSolverInternalData;
 	m_data->m_context = ctx;
@@ -92,11 +92,11 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	m_data->m_pairCapacity = pairCapacity;
 	m_data->m_nIterations = 4;
-	m_data->m_bodyBufferGPU = new btOpenCLArray<btRigidBodyCL>(ctx,q);
+	m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,q);
 	m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,q);
-	m_data->m_pBufContactOutGPU = new btOpenCLArray<btContact4>(ctx,q);
+	m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,q);
-	m_data->m_solverGPU = new Solver(ctx,device,q,512*1024);
+	m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024);
 	m_data->m_sort32 = new btRadixSort32CL(ctx,device,m_data->m_queue);
 	m_data->m_scan = new btPrefixScanCL(ctx,device,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
@@ -105,12 +105,12 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
 	m_data->m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,m_data->m_queue,sortSize);
-	m_data->m_contactBuffer = new btOpenCLArray<btContact4>(ctx,m_data->m_queue);
+	m_data->m_contactBuffer = new btOpenCLArray<b3Contact4>(ctx,m_data->m_queue);
 	m_data->m_numConstraints = new btOpenCLArray<unsigned int>(ctx,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
 	m_data->m_numConstraints->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
-	m_data->m_contactCGPU = new btOpenCLArray<btGpuConstraint4>(ctx,q,pairCapacity);
+	m_data->m_contactCGPU = new btOpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
 	m_data->m_offsets = new btOpenCLArray<unsigned int>( ctx,m_data->m_queue, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
 	m_data->m_offsets->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
@@ -131,54 +131,54 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	{
-		cl_program solveContactProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
+		cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
 		btAssert(solveContactProg);
-		cl_program solveFrictionProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
+		cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
 		btAssert(solveFrictionProg);
-		cl_program solverSetup2Prog= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
+		cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
 		btAssert(solverSetup2Prog);
-		cl_program solverSetupProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
+		cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
 		btAssert(solverSetupProg);
-		m_data->m_solveFrictionKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
+		m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
 		btAssert(m_data->m_solveFrictionKernel);
-		m_data->m_solveContactKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
+		m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
 		btAssert(m_data->m_solveContactKernel);
-		m_data->m_contactToConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
+		m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
 		btAssert(m_data->m_contactToConstraintKernel);
-		m_data->m_setSortDataKernel =  btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_setSortDataKernel =  b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_setSortDataKernel);
-		m_data->m_reorderContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_reorderContactKernel);
-		m_data->m_copyConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_copyConstraintKernel);
 	}
 	{
-		cl_program batchingProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
+		cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
 		btAssert(batchingProg);
-		m_data->m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
+		m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
 		btAssert(m_data->m_batchingKernel);
 	}
 	{
-		cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
+		cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
 		btAssert(batchingNewProg);
-		m_data->m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
+		m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
 		btAssert(m_data->m_batchingKernelNew);
 	}
@@ -190,7 +190,7 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 }
-btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
+b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver()
 {
 	delete m_data->m_sortDataBuffer;
 	delete m_data->m_contactBuffer;
@@ -232,8 +232,8 @@ struct btConstraintCfg
-void btGpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
+void b3GpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
 {
@@ -404,7 +404,7 @@ void btGpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<btRigi
-void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const btConfig& config)
+void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config)
 {
 	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
 	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
@@ -423,11 +423,11 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
        csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
-        btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
+        btOpenCLArray<b3RigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
        void* additionalData = 0;//m_data->m_frictionCGPU;
        const btOpenCLArray<btInertiaCL>* shapeBuf = m_data->m_inertiaBufferGPU;
-        btOpenCLArray<btGpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
+        btOpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
        int nContacts = nContactOut;
@@ -442,7 +442,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
            if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
            {
-				m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
+				m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
@@ -456,13 +456,13 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
-                const btOpenCLArray<btRigidBodyCL>* bodyNative = bodyBuf;
+                const btOpenCLArray<b3RigidBodyCL>* bodyNative = bodyBuf;
                {
-                    //btOpenCLArray<btRigidBodyCL>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
+                    //btOpenCLArray<b3RigidBodyCL>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
-                    //btOpenCLArray<btContact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
+                    //btOpenCLArray<b3Contact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
                    const int sortAlignment = 512; // todo. get this out of sort
                    if( csCfg.m_enableParallelSolve )
@@ -596,8 +596,8 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 					} else
 					{
 						BT_PROFILE("cpu batchContacts");
-						btAlignedObjectArray<btContact4> cpuContacts;
+						btAlignedObjectArray<b3Contact4> cpuContacts;
-						btOpenCLArray<btContact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
+						btOpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
 						contactsIn->copyToHost(cpuContacts);
 						btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
@@ -641,7 +641,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 						}
 						{
 							BT_PROFILE("m_contactBuffer->copyFromHost");
-							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
+							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<b3Contact4>&)cpuContacts);
 						}
 					} 
@@ -658,7 +658,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 						shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
 						contactConstraintOut, 
 						additionalData, nContacts, 
-						(SolverBase::ConstraintCfg&) csCfg );
+						(b3SolverBase::ConstraintCfg&) csCfg );
                    clFinish(m_data->m_queue);
                }
@@ -711,7 +711,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 }
-void btGpuBatchingPgsSolver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx )
+void b3GpuBatchingPgsSolver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx )
 {
 }
@@ -731,10 +731,10 @@ btAlignedObjectArray<int> bodyUsed;
 btAlignedObjectArray<unsigned int> idxBuffer;
 btAlignedObjectArray<btSortData> sortData;
-btAlignedObjectArray<btContact4> old;
+btAlignedObjectArray<b3Contact4> old;
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
 {
 	btAlignedObjectArray<int> bodyUsed;
 	bodyUsed.resize(numBodies);
@@ -841,7 +841,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
        BT_PROFILE("reorder");
 		//	reorder
-		memcpy( &old[0], cs, sizeof(btContact4)*n);
+		memcpy( &old[0], cs, sizeof(b3Contact4)*n);
 		for(int i=0; i<n; i++)
 		{
 			int idx = sortData[i].m_value;
@@ -861,7 +861,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
 }
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
 {
 	BT_PROFILE("sortConstraintByBatch");
@@ -993,7 +993,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int n
        BT_PROFILE("reorder");
 		//	reorder
-		memcpy( &m_data->m_old[0], cs, sizeof(btContact4)*numConstraints);
+		memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints);
 		for(int i=0; i<numConstraints; i++)
 		{
@@ -1016,7 +1016,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int n
 }
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch3( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
 {
 	BT_PROFILE("sortConstraintByBatch");
--- a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h
+++ b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h
@@ -0,0 +1,40 @@
 #ifndef BT_GPU_BATCHING_PGS_SOLVER_H
 #define BT_GPU_BATCHING_PGS_SOLVER_H
 #include "../../basic_initialize/b3OpenCLInclude.h"
 #include "../../parallel_primitives/host/btOpenCLArray.h"
 #include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
 #include "../../gpu_narrowphase/host/b3Contact4.h"
 #include "b3GpuConstraint4.h"
 class b3GpuBatchingPgsSolver
 {
 protected:
 	struct btGpuBatchingPgsSolverInternalData*		m_data;
 	void batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
 	inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	inline int sortConstraintByBatch3( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	void solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
 			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
 public:
 	b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity);
 	virtual ~b3GpuBatchingPgsSolver();
 	void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config);
 };
 #endif //BT_GPU_BATCHING_PGS_SOLVER_H
--- a/opencl/gpu_rigidbody/host/b3GpuConstraint4.h
+++ b/opencl/gpu_rigidbody/host/b3GpuConstraint4.h
@@ -3,7 +3,7 @@
 #define BT_CONSTRAINT4_h
 #include "BulletCommon/btVector3.h"
-ATTRIBUTE_ALIGNED16(struct) btGpuConstraint4
+ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
--- a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp
@@ -1,26 +1,26 @@
-#include "btGpuNarrowPhase.h"
+#include "b3GpuNarrowPhase.h"
 #include "parallel_primitives/host/btOpenCLArray.h"
-#include "../../gpu_narrowphase/host/btConvexPolyhedronCL.h"
+#include "../../gpu_narrowphase/host/b3ConvexPolyhedronCL.h"
-#include "../../gpu_narrowphase/host/ConvexHullContact.h"
+#include "../../gpu_narrowphase/host/b3ConvexHullContact.h"
-#include "../../gpu_broadphase/host/btSapAabb.h"
+#include "../../gpu_broadphase/host/b3SapAabb.h"
 #include <string.h>
-#include "btConfig.h"
+#include "b3Config.h"
-#include "../../gpu_narrowphase/host/btOptimizedBvh.h"
+#include "../../gpu_narrowphase/host/b3OptimizedBvh.h"
-#include "../../gpu_narrowphase/host/btTriangleIndexVertexArray.h"
+#include "../../gpu_narrowphase/host/b3TriangleIndexVertexArray.h"
 #include "BulletGeometry/btAabbUtil2.h"
 struct btGpuNarrowPhaseInternalData
 {
-	btAlignedObjectArray<btConvexUtility*>* m_convexData;
+	btAlignedObjectArray<b3ConvexUtility*>* m_convexData;
-	btAlignedObjectArray<btConvexPolyhedronCL> m_convexPolyhedra;
+	btAlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
 	btAlignedObjectArray<btVector3> m_uniqueEdges;
 	btAlignedObjectArray<btVector3> m_convexVertices;
 	btAlignedObjectArray<int> m_convexIndices;
-	btOpenCLArray<btConvexPolyhedronCL>* m_convexPolyhedraGPU;
+	btOpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
 	btOpenCLArray<btVector3>* m_uniqueEdgesGPU;
 	btOpenCLArray<btVector3>* m_convexVerticesGPU;
 	btOpenCLArray<int>* m_convexIndicesGPU;
@@ -44,12 +44,12 @@ struct btGpuNarrowPhaseInternalData
 	btOpenCLArray<btInt2>* m_convexPairsOutGPU;
 	btOpenCLArray<btInt2>* m_planePairs;
-	btOpenCLArray<btContact4>* m_pBufContactOutGPU;
+	btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
-	btAlignedObjectArray<btContact4>* m_pBufContactOutCPU;
+	btAlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
-	btAlignedObjectArray<btRigidBodyCL>* m_bodyBufferCPU;
+	btAlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
-	btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
+	btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
 	btAlignedObjectArray<btInertiaCL>*	m_inertiaBufferCPU;
 	btOpenCLArray<btInertiaCL>*	m_inertiaBufferGPU;
@@ -57,18 +57,18 @@ struct btGpuNarrowPhaseInternalData
 	int m_numAcceleratedShapes;
 	int m_numAcceleratedRigidBodies;
-	btAlignedObjectArray<btCollidable>	m_collidablesCPU;
+	btAlignedObjectArray<b3Collidable>	m_collidablesCPU;
-	btOpenCLArray<btCollidable>*	m_collidablesGPU;
+	btOpenCLArray<b3Collidable>*	m_collidablesGPU;
-	btOpenCLArray<btSapAabb>* m_localShapeAABBGPU;
+	btOpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
-	btAlignedObjectArray<btSapAabb>* m_localShapeAABBCPU;
+	btAlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
-	btAlignedObjectArray<class btOptimizedBvh*> m_bvhData;
+	btAlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
 	btOpenCLArray<btQuantizedBvhNode>*	m_treeNodesGPU;
 	btOpenCLArray<btBvhSubtreeInfo>*	m_subTreesGPU;
-	btConfig	m_config;
+	b3Config	m_config;
 };
@@ -76,7 +76,7 @@ struct btGpuNarrowPhaseInternalData
-btGpuNarrowPhase::btGpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const btConfig& config)
+b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config)
 :m_data(0) ,m_planeBodyIndex(-1),m_static0Index(-1),
 m_context(ctx),
 m_device(device),
@@ -95,30 +95,30 @@ m_queue(queue)
 	m_data->m_convexPairsOutGPU = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
 	m_data->m_planePairs = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
-	m_data->m_pBufContactOutCPU = new btAlignedObjectArray<btContact4>();
+	m_data->m_pBufContactOutCPU = new btAlignedObjectArray<b3Contact4>();
 	m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs);
-	m_data->m_bodyBufferCPU = new btAlignedObjectArray<btRigidBodyCL>();
+	m_data->m_bodyBufferCPU = new btAlignedObjectArray<b3RigidBodyCL>();
 	m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies);
 	m_data->m_inertiaBufferCPU = new btAlignedObjectArray<btInertiaCL>();
 	m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies);
-	m_data->m_pBufContactOutGPU = new btOpenCLArray<btContact4>(ctx,queue, config.m_maxContactCapacity,true);
+	m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true);
 	m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,queue,config.m_maxConvexBodies,false);
-	m_data->m_collidablesGPU = new btOpenCLArray<btCollidable>(ctx,queue,config.m_maxConvexShapes);
+	m_data->m_collidablesGPU = new btOpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes);
-	m_data->m_localShapeAABBCPU = new btAlignedObjectArray<btSapAabb>;
+	m_data->m_localShapeAABBCPU = new btAlignedObjectArray<b3SapAabb>;
-	m_data->m_localShapeAABBGPU = new btOpenCLArray<btSapAabb>(ctx,queue,config.m_maxConvexShapes);
+	m_data->m_localShapeAABBGPU = new btOpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes);
 	//m_data->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate(ctx,queue, config.m_maxBroadphasePairs,false);
-	m_data->m_bodyBufferGPU = new btOpenCLArray<btRigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
+	m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
 	m_data->m_convexFacesGPU = new btOpenCLArray<btGpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false);
 	m_data->m_gpuChildShapes = new btOpenCLArray<btGpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false);
-	m_data->m_convexPolyhedraGPU = new btOpenCLArray<btConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
+	m_data->m_convexPolyhedraGPU = new btOpenCLArray<b3ConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
 	m_data->m_uniqueEdgesGPU = new btOpenCLArray<btVector3>(ctx,queue,config.m_maxConvexUniqueEdges,true);
 	m_data->m_convexVerticesGPU = new btOpenCLArray<btVector3>(ctx,queue,config.m_maxConvexVertices,true);
 	m_data->m_convexIndicesGPU = new btOpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true);
@@ -132,7 +132,7 @@ m_queue(queue)
-	m_data->m_convexData = new btAlignedObjectArray<btConvexUtility* >();
+	m_data->m_convexData = new btAlignedObjectArray<b3ConvexUtility* >();
 	m_data->m_convexData->resize(config.m_maxConvexShapes);
@@ -151,7 +151,7 @@ m_queue(queue)
 }
-btGpuNarrowPhase::~btGpuNarrowPhase()
+b3GpuNarrowPhase::~b3GpuNarrowPhase()
 {
 	delete m_data->m_gpuSatCollision;
 	delete m_data->m_pBufPairsCPU;
@@ -186,7 +186,7 @@ btGpuNarrowPhase::~btGpuNarrowPhase()
 }
-int	btGpuNarrowPhase::allocateCollidable()
+int	b3GpuNarrowPhase::allocateCollidable()
 {
 	int curSize = m_data->m_collidablesCPU.size();
 	m_data->m_collidablesCPU.expand();
@@ -197,18 +197,18 @@ int	btGpuNarrowPhase::allocateCollidable()
-int		btGpuNarrowPhase::registerSphereShape(float radius)
+int		b3GpuNarrowPhase::registerSphereShape(float radius)
 {
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_SPHERE;
 	col.m_shapeIndex = 0;
 	col.m_radius = radius;
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		btVector3 myAabbMin(-radius,-radius,-radius);
 		btVector3 myAabbMax(radius,radius,radius);
@@ -231,7 +231,7 @@ int		btGpuNarrowPhase::registerSphereShape(float radius)
 }
-int btGpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConstant)
+int b3GpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConstant)
 {
 	int faceOffset = m_data->m_convexFaces.size();
 	btGpuFace& face = m_data->m_convexFaces.expand();
@@ -243,18 +243,18 @@ int btGpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConsta
 	return faceOffset;
 }
-int		btGpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float planeConstant)
+int		b3GpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float planeConstant)
 {
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_PLANE;
 	col.m_shapeIndex = registerFace(planeNormal,planeConstant);
 	col.m_radius = planeConstant;
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		aabb.m_min[0] = -1e30f;
 		aabb.m_min[1] = -1e30f;
 		aabb.m_min[2] = -1e30f;
@@ -274,13 +274,13 @@ int		btGpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float pl
 }
-int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* convexPtr,btCollidable& col)
+int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Collidable& col)
 {
 	m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1);
 	m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
-	btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
+	b3ConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
 	convex.mC = convexPtr->mC;
 	convex.mE = convexPtr->mE;
 	convex.m_extents= convexPtr->m_extents;
@@ -344,7 +344,7 @@ int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* convexPtr,btColli
 }
-int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
+int		b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
 {
 	btAlignedObjectArray<btVector3> verts;
@@ -355,7 +355,7 @@ int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int stride
 		verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
 	}
-	btConvexUtility* utilPtr = new btConvexUtility();
+	b3ConvexUtility* utilPtr = new b3ConvexUtility();
 	bool merge = true;
 	if (numVertices)
 	{
@@ -366,10 +366,10 @@ int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int stride
 	return collidableIndex;
 }
-int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
+int		b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
 {
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_CONVEX_HULL;
 	col.m_shapeIndex = -1;
@@ -386,7 +386,7 @@ int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		btVector3 myAabbMin(1e30f,1e30f,1e30f);
 		btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
@@ -414,11 +414,11 @@ int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
 }
-int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShape>* childShapes)
+int		b3GpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShape>* childShapes)
 {
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS;
 	col.m_shapeIndex = m_data->m_cpuChildShapes.size();
@@ -437,7 +437,7 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 	col.m_numChildShapes = childShapes->size();
-	btSapAabb aabbWS;
+	b3SapAabb aabbWS;
 	btVector3 myAabbMin(1e30f,1e30f,1e30f);
 	btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
@@ -445,8 +445,8 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 	for (int i=0;i<childShapes->size();i++)
 	{
 		int childColIndex = childShapes->at(i).m_shapeIndex;
-		btCollidable& childCol = getCollidableCpu(childColIndex);
+		b3Collidable& childCol = getCollidableCpu(childColIndex);
-		btSapAabb aabbLoc =m_data->m_localShapeAABBCPU->at(childColIndex);
+		b3SapAabb aabbLoc =m_data->m_localShapeAABBCPU->at(childColIndex);
 		btVector3 childLocalAabbMin(aabbLoc.m_min[0],aabbLoc.m_min[1],aabbLoc.m_min[2]);
 		btVector3 childLocalAabbMax(aabbLoc.m_max[0],aabbLoc.m_max[1],aabbLoc.m_max[2]);
@@ -485,7 +485,7 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 }
-int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
+int		b3GpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
 {
 	//right now we only support one single mesh, it is on the todo to merge all mesh data etc
 	btAssert(m_data->m_treeNodesGPU ==0);
@@ -499,14 +499,14 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]);
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_CONCAVE_TRIMESH;
 	col.m_shapeIndex = registerConcaveMeshShape(vertices,indices,col,scaling);
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	btVector3 myAabbMin(1e30f,1e30f,1e30f);
 	btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
@@ -529,11 +529,11 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	m_data->m_localShapeAABBCPU->push_back(aabb);
 	m_data->m_localShapeAABBGPU->push_back(aabb);
-	btOptimizedBvh* bvh = new btOptimizedBvh();
+	b3OptimizedBvh* bvh = new b3OptimizedBvh();
-	//void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
+	//void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
 	bool useQuantizedAabbCompression = true;
-	btTriangleIndexVertexArray* meshInterface=new btTriangleIndexVertexArray();
+	b3TriangleIndexVertexArray* meshInterface=new b3TriangleIndexVertexArray();
 	btIndexedMesh mesh;
 	mesh.m_numTriangles = indices->size()/3;
 	mesh.m_numVertices = vertices->size();
@@ -560,7 +560,7 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	return collidableIndex;
 }
-int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,btCollidable& col, const float* scaling1)
+int b3GpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,b3Collidable& col, const float* scaling1)
 {
@@ -570,7 +570,7 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
 	m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
-	btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
+	b3ConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
 	convex.mC = btVector3(0,0,0);
 	convex.mE = btVector3(0,0,0);
 	convex.m_extents= btVector3(0,0,0);
@@ -639,48 +639,48 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
-cl_mem	btGpuNarrowPhase::getBodiesGpu()
+cl_mem	b3GpuNarrowPhase::getBodiesGpu()
 {
 	return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL();
 }
-int	btGpuNarrowPhase::getNumBodiesGpu() const
+int	b3GpuNarrowPhase::getNumBodiesGpu() const
 {
 	return m_data->m_bodyBufferGPU->size();
 }
-cl_mem	btGpuNarrowPhase::getBodyInertiasGpu()
+cl_mem	b3GpuNarrowPhase::getBodyInertiasGpu()
 {
 	return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL();
 }
-int	btGpuNarrowPhase::getNumBodyInertiasGpu() const
+int	b3GpuNarrowPhase::getNumBodyInertiasGpu() const
 {
 	return m_data->m_inertiaBufferGPU->size();
 }
-btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex)
+b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex)
 {
 	return m_data->m_collidablesCPU[collidableIndex];
 }
-const btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex) const
+const b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) const
 {
 	return m_data->m_collidablesCPU[collidableIndex];
 }
-cl_mem btGpuNarrowPhase::getCollidablesGpu()
+cl_mem b3GpuNarrowPhase::getCollidablesGpu()
 {
 	return m_data->m_collidablesGPU->getBufferCL();
 }
-cl_mem	btGpuNarrowPhase::getAabbBufferGpu()
+cl_mem	b3GpuNarrowPhase::getAabbBufferGpu()
 {
 	return m_data->m_localShapeAABBGPU->getBufferCL();
 }
-int	btGpuNarrowPhase::getNumCollidablesGpu() const
+int	b3GpuNarrowPhase::getNumCollidablesGpu() const
 {
 	return m_data->m_collidablesGPU->size();
 }
@@ -689,17 +689,17 @@ int	btGpuNarrowPhase::getNumCollidablesGpu() const
-int	btGpuNarrowPhase::getNumContactsGpu() const
+int	b3GpuNarrowPhase::getNumContactsGpu() const
 {
 	return m_data->m_pBufContactOutGPU->size();
 }
-cl_mem btGpuNarrowPhase::getContactsGpu()
+cl_mem b3GpuNarrowPhase::getContactsGpu()
 {
 	return m_data->m_pBufContactOutGPU->getBufferCL();
 }
-void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
+void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
 {
 	int nContactOut = 0;
@@ -742,7 +742,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
 }
-const btSapAabb& btGpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
+const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
 {
 	return m_data->m_localShapeAABBCPU->at(collidableIndex);
 }
@@ -751,7 +751,7 @@ const btSapAabb& btGpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
-int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu)
+int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu)
 {
 	btVector3 aabbMin(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]);
 	btVector3 aabbMax (aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]);
@@ -760,7 +760,7 @@ int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
 	m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1);
-	btRigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
+	b3RigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
 	float friction = 1.f;
 	float restitution = 0.f;
@@ -844,7 +844,7 @@ int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
 	return m_data->m_numAcceleratedRigidBodies++;
 }
-void	btGpuNarrowPhase::writeAllBodiesToGpu()
+void	b3GpuNarrowPhase::writeAllBodiesToGpu()
 {
 	m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);
 	m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);
--- a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h
+++ b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h
@@ -1,12 +1,12 @@
 #ifndef BT_GPU_NARROWPHASE_H
 #define BT_GPU_NARROWPHASE_H
-#include "../../gpu_narrowphase/host/btCollidable.h"
+#include "../../gpu_narrowphase/host/b3Collidable.h"
-#include "basic_initialize/btOpenCLInclude.h"
+#include "basic_initialize/b3OpenCLInclude.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btVector3.h"
-class btGpuNarrowPhase
+class b3GpuNarrowPhase
 {
 protected:
@@ -19,17 +19,17 @@ protected:
 	cl_device_id m_device;
 	cl_command_queue m_queue;
-	int registerConvexHullShape(class btConvexUtility* convexPtr, btCollidable& col);
+	int registerConvexHullShape(class b3ConvexUtility* convexPtr, b3Collidable& col);
-	int registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices, btCollidable& col, const float* scaling);
+	int registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling);
 public:
-	btGpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct btConfig& config);
+	b3GpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct b3Config& config);
-	virtual ~btGpuNarrowPhase(void);
+	virtual ~b3GpuNarrowPhase(void);
 	int		registerSphereShape(float radius);
 	int		registerPlaneShape(const btVector3& planeNormal, float planeConstant);
@@ -41,7 +41,7 @@ public:
 	//do they need to be merged?
-	int	registerConvexHullShape(btConvexUtility* utilPtr);
+	int	registerConvexHullShape(b3ConvexUtility* utilPtr);
 	int	registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
 	int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu);
@@ -72,10 +72,10 @@ public:
 	int allocateCollidable();
-	btCollidable& getCollidableCpu(int collidableIndex);
+	b3Collidable& getCollidableCpu(int collidableIndex);
-	const btCollidable& getCollidableCpu(int collidableIndex) const;
+	const b3Collidable& getCollidableCpu(int collidableIndex) const;
-	const struct btSapAabb& getLocalSpaceAabb(int collidableIndex) const;
+	const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
 };
 #endif //BT_GPU_NARROWPHASE_H
--- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp
@@ -1,34 +1,41 @@
-#include "btGpuRigidBodyPipeline.h"
+#include "b3GpuRigidBodyPipeline.h"
-#include "btGpuRigidBodyPipelineInternalData.h"
+#include "b3GpuRigidBodyPipelineInternalData.h"
 #include "../kernels/integrateKernel.h"
 #include "../kernels/updateAabbsKernel.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
-#include "btGpuNarrowPhase.h"
+#include "b3GpuNarrowPhase.h"
 #include "BulletGeometry/btAabbUtil2.h"
-#include "../../gpu_broadphase/host/btSapAabb.h"
+#include "../../gpu_broadphase/host/b3SapAabb.h"
-#include "../../gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "../../gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "btPgsJacobiSolver.h"
+//#define TEST_OTHER_GPU_SOLVER
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
+#ifdef TEST_OTHER_GPU_SOLVER
 #include "../../gpu_narrowphase/host/btContact4.h"
 #include "btGpuBatchingPgsSolver.h"
 #include "Solver.h"
 #include "btGpuJacobiSolver.h"
-#include "BulletCommon/btQuickprof.h"
+#include "btPgsJacobiSolver.h"
-#include "btConfig.h"
+#endif //TEST_OTHER_GPU_SOLVER
-btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q,class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap )
+#include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
 #include "../../gpu_narrowphase/host/b3Contact4.h"
 #include "b3GpuBatchingPgsSolver.h"
 #include "b3Solver.h"
 #include "BulletCommon/btQuickprof.h"
 #include "b3Config.h"
 b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap )
 {
-	m_data = new btGpuRigidBodyPipelineInternalData;
+	m_data = new b3GpuRigidBodyPipelineInternalData;
 	m_data->m_context = ctx;
 	m_data->m_device = device;
 	m_data->m_queue = q;
 #ifdef TEST_OTHER_GPU_SOLVER
 	m_data->m_solver = new btPgsJacobiSolver();
 	btConfig config;
 	m_data->m_solver2 = new btGpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs);
 	m_data->m_solver3 = new btGpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs);	
 #endif //	TEST_OTHER_GPU_SOLVER
 	b3Config config;
 	m_data->m_solver2 = new b3GpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs);
 	m_data->m_broadphaseSap = broadphaseSap;
@@ -37,16 +44,16 @@ btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
 	cl_int errNum=0;
 	{
-		cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl");
+		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_data->m_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog);
+		m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog);
 		btAssert(errNum==CL_SUCCESS);
 		clReleaseProgram(prog);
 	}
 	{
-		cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl");
+		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_data->m_updateAabbsKernel = btOpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog);
+		m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog);
 		btAssert(errNum==CL_SUCCESS);
 		clReleaseProgram(prog);
 	}
@@ -54,17 +61,22 @@ btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
 }
-btGpuRigidBodyPipeline::~btGpuRigidBodyPipeline()
+b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
 {
 	clReleaseKernel(m_data->m_integrateTransformsKernel);
 #ifdef TEST_OTHER_GPU_SOLVER
 	delete m_data->m_solver;
 	delete m_data->m_solver2;
 	delete m_data->m_solver3;
 #endif //TEST_OTHER_GPU_SOLVER
 	delete m_data->m_solver2;
 	delete m_data;
 }
-void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
+void	b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
 {
 	//update worldspace AABBs from local AABB/worldtransform
@@ -104,14 +116,15 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 	if (numContacts)
 	{
-		btOpenCLArray<btRigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
+		btOpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
 		gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(),m_data->m_narrowphase->getNumBodiesGpu());
 		btOpenCLArray<btInertiaCL> gpuInertias(m_data->m_context,m_data->m_queue,0,true);
 		gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(),m_data->m_narrowphase->getNumBodiesGpu());
-		btOpenCLArray<btContact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
+		btOpenCLArray<b3Contact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
 		gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(),m_data->m_narrowphase->getNumContactsGpu());
 		bool useJacobi = false;
 #ifdef TEST_OTHER_GPU_SOLVER
 		if (useJacobi)
 		{
 			bool useGpu = true;
@@ -120,9 +133,9 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 				bool forceHost = false;
 				if (forceHost)
 				{
-					btAlignedObjectArray<btRigidBodyCL> hostBodies;
+					btAlignedObjectArray<b3RigidBodyCL> hostBodies;
 					btAlignedObjectArray<btInertiaCL> hostInertias;
-					btAlignedObjectArray<btContact4> hostContacts;
+					btAlignedObjectArray<b3Contact4> hostContacts;
 					{
 						BT_PROFILE("copyToHost");
@@ -148,11 +161,11 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 				}
 			} else
 			{
-				btAlignedObjectArray<btRigidBodyCL> hostBodies;
+				btAlignedObjectArray<b3RigidBodyCL> hostBodies;
 				gpuBodies.copyToHost(hostBodies);
 				btAlignedObjectArray<btInertiaCL> hostInertias;
 				gpuInertias.copyToHost(hostInertias);
-				btAlignedObjectArray<btContact4> hostContacts;
+				btAlignedObjectArray<b3Contact4> hostContacts;
 				gpuContacts.copyToHost(hostContacts);
 				{
 					m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
@@ -161,8 +174,9 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 			}
 		} else
 #endif //TEST_OTHER_GPU_SOLVER
 		{
-			btConfig config;
+			b3Config config;
 			m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),config);
 			//m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
@@ -181,7 +195,7 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 }
-void	btGpuRigidBodyPipeline::integrate(float timeStep)
+void	b3GpuRigidBodyPipeline::integrate(float timeStep)
 {
 	//integrate
@@ -201,7 +215,7 @@ void	btGpuRigidBodyPipeline::integrate(float timeStep)
-void	btGpuRigidBodyPipeline::setupGpuAabbsFull()
+void	b3GpuRigidBodyPipeline::setupGpuAabbsFull()
 {
 	cl_int ciErrNum=0;
@@ -226,12 +240,12 @@ void	btGpuRigidBodyPipeline::setupGpuAabbsFull()
-cl_mem	btGpuRigidBodyPipeline::getBodyBuffer()
+cl_mem	b3GpuRigidBodyPipeline::getBodyBuffer()
 {
 	return m_data->m_narrowphase->getBodiesGpu();
 }
-int	btGpuRigidBodyPipeline::getNumBodies() const
+int	b3GpuRigidBodyPipeline::getNumBodies() const
 {
 	return m_data->m_narrowphase->getNumBodiesGpu();
 }
@@ -240,12 +254,12 @@ int	btGpuRigidBodyPipeline::getNumBodies() const
-int		btGpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex)
+int		b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex)
 {
 	btVector3 aabbMin(0,0,0),aabbMax(0,0,0);
 	if (collidableIndex>=0)
 	{
-		btSapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
+		b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
 		btVector3 localAabbMin(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]);
 		btVector3 localAabbMax(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]);
--- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h
@@ -1,25 +1,25 @@
 #ifndef BT_GPU_RIGIDBODY_PIPELINE_H
 #define BT_GPU_RIGIDBODY_PIPELINE_H
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
-class btGpuRigidBodyPipeline
+class b3GpuRigidBodyPipeline
 {
 protected:
-	struct btGpuRigidBodyPipelineInternalData*	m_data;
+	struct b3GpuRigidBodyPipelineInternalData*	m_data;
 	int allocateCollidable();
 public:
-	btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q , class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap);
+	b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap);
-	virtual ~btGpuRigidBodyPipeline();
+	virtual ~b3GpuRigidBodyPipeline();
 	void	stepSimulation(float deltaTime);
 	void	integrate(float timeStep);
 	void	setupGpuAabbsFull();
-	int		registerConvexPolyhedron(class btConvexUtility* convex);
+	int		registerConvexPolyhedron(class b3ConvexUtility* convex);
 	//int		registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
 	//int		registerSphereShape(float radius);
--- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h
@@ -1,14 +1,14 @@
 #ifndef BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
 #define BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "../../parallel_primitives/host/btOpenCLArray.h"
-#include "../../gpu_narrowphase/host/btCollidable.h"
+#include "../../gpu_narrowphase/host/b3Collidable.h"
-struct btGpuRigidBodyPipelineInternalData
+struct b3GpuRigidBodyPipelineInternalData
 {
 	cl_context			m_context;
@@ -19,12 +19,12 @@ struct btGpuRigidBodyPipelineInternalData
 	cl_kernel	m_updateAabbsKernel;
 	class btPgsJacobiSolver* m_solver;
-	class btGpuBatchingPgsSolver* m_solver2;
+	class b3GpuBatchingPgsSolver* m_solver2;
 	class btGpuJacobiSolver* m_solver3;
-	class btGpuSapBroadphase* m_broadphaseSap;
+	class b3GpuSapBroadphase* m_broadphaseSap;
-	class btGpuNarrowPhase*	m_narrowphase;
+	class b3GpuNarrowPhase*	m_narrowphase;
 };
--- a/opencl/gpu_rigidbody/host/b3Solver.cpp
+++ b/opencl/gpu_rigidbody/host/b3Solver.cpp
@@ -14,7 +14,7 @@ subject to the following restrictions:
 //Originally written by Takahiro Harada
-#include "Solver.h"
+#include "b3Solver.h"
 ///useNewBatchingKernel  is a rewritten kernel using just a single thread of the warp, for experiments
 bool useNewBatchingKernel = false;
@@ -87,7 +87,7 @@ public:
-Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
+b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
 			:m_nIterations(4),
 			m_context(ctx),
 			m_device(device),
@@ -100,7 +100,7 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
 	const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
 	m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
-	m_contactBuffer2 = new btOpenCLArray<btContact4>(ctx,queue);
+	m_contactBuffer2 = new btOpenCLArray<b3Contact4>(ctx,queue);
 	m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
 	m_numConstraints->resize(N_SPLIT*N_SPLIT);
@@ -125,59 +125,59 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
 	{
-		cl_program solveContactProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
+		cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
 		btAssert(solveContactProg);
-		cl_program solveFrictionProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
+		cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
 		btAssert(solveFrictionProg);
-		cl_program solverSetup2Prog= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
+		cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
 		btAssert(solverSetup2Prog);
-		cl_program solverSetupProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
+		cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
 		btAssert(solverSetupProg);
-		m_solveFrictionKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
+		m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
 		btAssert(m_solveFrictionKernel);
-		m_solveContactKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
+		m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
 		btAssert(m_solveContactKernel);
-		m_contactToConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
+		m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
 		btAssert(m_contactToConstraintKernel);
-		m_setSortDataKernel =  btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_setSortDataKernel =  b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_setSortDataKernel);
-		m_reorderContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_reorderContactKernel);
-		m_copyConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_copyConstraintKernel);
 	}
 	{
-		cl_program batchingProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
+		cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
 		btAssert(batchingProg);
-		m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
+		m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
 		btAssert(m_batchingKernel);
 	}
 	{
-		cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
+		cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
 		btAssert(batchingNewProg);
-		m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
+		m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
-		//m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
+		//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
 		btAssert(m_batchingKernelNew);
 	}
 }
-Solver::~Solver()
+b3Solver::~b3Solver()
 {
 	delete m_sortDataBuffer;
 	delete m_contactBuffer2;
@@ -204,10 +204,10 @@ Solver::~Solver()
-/*void Solver::reorderConvertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+/*void b3Solver::reorderConvertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 	const btOpenCLArray<btInertiaCL>* shapeBuf,
-	btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
+	btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
-	int nContacts, const Solver::ConstraintCfg& cfg )
+	int nContacts, const b3Solver::ConstraintCfg& cfg )
 {
 	if( m_contactBuffer )
 	{
@@ -216,7 +216,7 @@ Solver::~Solver()
 	if( m_contactBuffer == 0 )
 	{
 		BT_PROFILE("new m_contactBuffer;");
-		m_contactBuffer = new btOpenCLArray<btContact4>(m_context,m_queue,nContacts );
+		m_contactBuffer = new btOpenCLArray<b3Contact4>(m_context,m_queue,nContacts );
 		m_contactBuffer->resize(nContacts);
 	}
@@ -256,7 +256,7 @@ Solver::~Solver()
 		{
 			BT_PROFILE("batchContacts");
-			Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
+			b3Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
 		}
 	}
@@ -269,7 +269,7 @@ Solver::~Solver()
 	{
 		BT_PROFILE("convertToConstraints");
-		Solver::convertToConstraints(  bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
+		b3Solver::convertToConstraints(  bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
 	}
 	{
@@ -303,7 +303,7 @@ Solver::~Solver()
 template<bool JACOBI>
 static
 __inline
-void solveContact(btGpuConstraint4& cs, 
+void solveContact(b3GpuConstraint4& cs, 
 	const btVector3& posA, btVector3& linVelA, btVector3& angVelA, float invMassA, const btMatrix3x3& invInertiaA,
 	const btVector3& posB, btVector3& linVelB, btVector3& angVelB, float invMassB, const btMatrix3x3& invInertiaB, 
 	float maxRambdaDt[4], float minRambdaDt[4])
@@ -380,7 +380,7 @@ void solveContact(btGpuConstraint4& cs,
 	static
 	__inline
-	void solveFriction(btGpuConstraint4& cs, 
+	void solveFriction(b3GpuConstraint4& cs, 
 		const btVector3& posA, btVector3& linVelA, btVector3& angVelA, float invMassA, const btMatrix3x3& invInertiaA,
 		const btVector3& posB, btVector3& linVelB, btVector3& angVelB, float invMassB, const btMatrix3x3& invInertiaB, 
 		float maxRambdaDt[4], float minRambdaDt[4])
@@ -454,7 +454,7 @@ void solveContact(btGpuConstraint4& cs,
 struct SolveTask// : public ThreadPool::Task
 {
-	SolveTask(btAlignedObjectArray<btRigidBodyCL>& bodies,  btAlignedObjectArray<btInertiaCL>& shapes, btAlignedObjectArray<btGpuConstraint4>& constraints,
+	SolveTask(btAlignedObjectArray<b3RigidBodyCL>& bodies,  btAlignedObjectArray<btInertiaCL>& shapes, btAlignedObjectArray<b3GpuConstraint4>& constraints,
 		int start, int nConstraints)
 		: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
 		m_solveFriction( true ){}
@@ -472,8 +472,8 @@ struct SolveTask// : public ThreadPool::Task
 			float frictionCoeff = m_constraints[i].getFrictionCoeff();
 			int aIdx = (int)m_constraints[i].m_bodyA;
 			int bIdx = (int)m_constraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = m_bodies[aIdx];
+			b3RigidBodyCL& bodyA = m_bodies[aIdx];
-			btRigidBodyCL& bodyB = m_bodies[bIdx];
+			b3RigidBodyCL& bodyB = m_bodies[bIdx];
 			if( !m_solveFriction )
 			{
@@ -512,24 +512,24 @@ struct SolveTask// : public ThreadPool::Task
 	}
-	btAlignedObjectArray<btRigidBodyCL>& m_bodies;
+	btAlignedObjectArray<b3RigidBodyCL>& m_bodies;
 	btAlignedObjectArray<btInertiaCL>& m_shapes;
-	btAlignedObjectArray<btGpuConstraint4>& m_constraints;
+	btAlignedObjectArray<b3GpuConstraint4>& m_constraints;
 	int m_start;
 	int m_nConstraints;
 	bool m_solveFriction;
 };
-void Solver::solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
+void b3Solver::solveContactConstraintHost(  btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
 {
-	btAlignedObjectArray<btRigidBodyCL> bodyNative;
+	btAlignedObjectArray<b3RigidBodyCL> bodyNative;
 	bodyBuf->copyToHost(bodyNative);
 	btAlignedObjectArray<btInertiaCL> shapeNative;
 	shapeBuf->copyToHost(shapeNative);
-	btAlignedObjectArray<btGpuConstraint4> constraintNative;
+	btAlignedObjectArray<b3GpuConstraint4> constraintNative;
 	constraint->copyToHost(constraintNative);
 	for(int iter=0; iter<m_nIterations; iter++)
@@ -553,8 +553,8 @@ void Solver::solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf,
 }
-void Solver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
+void b3Solver::solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
 {
@@ -712,12 +712,12 @@ void Solver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBu
 }
-void Solver::convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 	const btOpenCLArray<btInertiaCL>* shapeBuf, 
-	btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
+	btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
 	int nContacts, const ConstraintCfg& cfg )
 {
-	btOpenCLArray<btGpuConstraint4>* constraintNative =0;
+	btOpenCLArray<b3GpuConstraint4>* constraintNative =0;
 	struct CB
 	{
@@ -756,9 +756,9 @@ void Solver::convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 }
 /*
-void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+void b3Solver::sortContacts(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
-			btOpenCLArray<btContact4>* contactsIn, void* additionalData, 
+			btOpenCLArray<b3Contact4>* contactsIn, void* additionalData, 
-			int nContacts, const Solver::ConstraintCfg& cfg )
+			int nContacts, const b3Solver::ConstraintCfg& cfg )
 {
@@ -813,7 +813,7 @@ void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 		{	//	5. sort constraints by cellIdx
 			//	todo. preallocate this
 //			btAssert( contactsIn->getType() == TYPE_HOST );
-//			btOpenCLArray<btContact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn );	//	copying contacts to this buffer
+//			btOpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn );	//	copying contacts to this buffer
 			{
@@ -834,7 +834,7 @@ void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 */
-void	Solver::batchContacts(  btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
+void	b3Solver::batchContacts(  btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
 {
 	int numWorkItems = 64*N_SPLIT*N_SPLIT;
@@ -893,7 +893,7 @@ void	Solver::batchContacts(  btOpenCLArray<btContact4>* contacts, int nContacts,
 #ifdef BATCH_DEBUG
 	aaaa
-		btContact4* hostContacts = new btContact4[nContacts];
+		b3Contact4* hostContacts = new b3Contact4[nContacts];
 		m_contactBuffer->read(hostContacts,nContacts);
 		clFinish(m_queue);
--- a/opencl/gpu_rigidbody/host/b3Solver.h
+++ b/opencl/gpu_rigidbody/host/b3Solver.h
@@ -18,21 +18,21 @@ subject to the following restrictions:
 #define __ADL_SOLVER_H
 #include "../../parallel_primitives/host/btOpenCLArray.h"
-#include "../host/btGpuConstraint4.h"
+#include "../host/b3GpuConstraint4.h"
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
+#include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
+#include "../../gpu_narrowphase/host/b3Contact4.h"
-#include "../host/btGpuConstraint4.h"
+#include "../host/b3GpuConstraint4.h"
 #include "../../parallel_primitives/host/btPrefixScanCL.h"
 #include "../../parallel_primitives/host/btRadixSort32CL.h"
 #include "../../parallel_primitives/host/btBoundSearchCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #define BTNEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
-class SolverBase
+class b3SolverBase
 {
 	public:
@@ -60,7 +60,7 @@ class SolverBase
 		};
 };
-class Solver : public SolverBase
+class b3Solver : public b3SolverBase
 {
 	public:
@@ -88,7 +88,7 @@ class Solver : public SolverBase
 		class btPrefixScanCL*	m_scan;
 		btOpenCLArray<btSortData>* m_sortDataBuffer;
-		btOpenCLArray<btContact4>* m_contactBuffer2;
+		btOpenCLArray<b3Contact4>* m_contactBuffer2;
 		enum
 		{
@@ -98,23 +98,23 @@ class Solver : public SolverBase
-		Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
+		b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
-		virtual ~Solver();
+		virtual ~b3Solver();
-		void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* inertiaBuf, 
+		void solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* inertiaBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
-		void solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
+		void solveContactConstraintHost(  btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
-		void convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+		void convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 			const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
+			btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
 			int nContacts, const ConstraintCfg& cfg );
-		void	batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
+		void	batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
 };
--- a/opencl/gpu_rigidbody/host/btBroadphaseProxy.h
+++ b/opencl/gpu_rigidbody/host/btBroadphaseProxy.h
@@ -1,270 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_BROADPHASE_PROXY_H
 #define BT_BROADPHASE_PROXY_H
 #include "BulletCommon/btScalar.h" //for SIMD_FORCE_INLINE
 #include "BulletCommon/btVector3.h"
 #include "BulletCommon/btAlignedAllocator.h"
 /// btDispatcher uses these types
 /// IMPORTANT NOTE:The types are ordered polyhedral, implicit convex and concave
 /// to facilitate type checking
 /// CUSTOM_POLYHEDRAL_SHAPE_TYPE,CUSTOM_CONVEX_SHAPE_TYPE and CUSTOM_CONCAVE_SHAPE_TYPE can be used to extend Bullet without modifying source code
 enum BroadphaseNativeTypes
 {
 	// polyhedral convex shapes
 	BOX_SHAPE_PROXYTYPE,
 	TRIANGLE_SHAPE_PROXYTYPE,
 	TETRAHEDRAL_SHAPE_PROXYTYPE,
 	CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE,
 	CONVEX_HULL_SHAPE_PROXYTYPE,
 	CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE,
 	CUSTOM_POLYHEDRAL_SHAPE_TYPE,
 //implicit convex shapes
 IMPLICIT_CONVEX_SHAPES_START_HERE,
 	SPHERE_SHAPE_PROXYTYPE,
 	MULTI_SPHERE_SHAPE_PROXYTYPE,
 	CAPSULE_SHAPE_PROXYTYPE,
 	CONE_SHAPE_PROXYTYPE,
 	CONVEX_SHAPE_PROXYTYPE,
 	CYLINDER_SHAPE_PROXYTYPE,
 	UNIFORM_SCALING_SHAPE_PROXYTYPE,
 	MINKOWSKI_SUM_SHAPE_PROXYTYPE,
 	MINKOWSKI_DIFFERENCE_SHAPE_PROXYTYPE,
 	BOX_2D_SHAPE_PROXYTYPE,
 	CONVEX_2D_SHAPE_PROXYTYPE,
 	CUSTOM_CONVEX_SHAPE_TYPE,
 //concave shapes
 CONCAVE_SHAPES_START_HERE,
 	//keep all the convex shapetype below here, for the check IsConvexShape in broadphase proxy!
 	TRIANGLE_MESH_SHAPE_PROXYTYPE,
 	SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE,
 	///used for demo integration FAST/Swift collision library and Bullet
 	FAST_CONCAVE_MESH_PROXYTYPE,
 	//terrain
 	TERRAIN_SHAPE_PROXYTYPE,
 ///Used for GIMPACT Trimesh integration
 	GIMPACT_SHAPE_PROXYTYPE,
 ///Multimaterial mesh
    MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE,
 	EMPTY_SHAPE_PROXYTYPE,
 	STATIC_PLANE_PROXYTYPE,
 	CUSTOM_CONCAVE_SHAPE_TYPE,
 CONCAVE_SHAPES_END_HERE,
 	COMPOUND_SHAPE_PROXYTYPE,
 	SOFTBODY_SHAPE_PROXYTYPE,
 	HFFLUID_SHAPE_PROXYTYPE,
 	HFFLUID_BUOYANT_CONVEX_SHAPE_PROXYTYPE,
 	INVALID_SHAPE_PROXYTYPE,
 	MAX_BROADPHASE_COLLISION_TYPES
 };
 ///The btBroadphaseProxy is the main class that can be used with the Bullet broadphases. 
 ///It stores collision shape type information, collision filter information and a client object, typically a btCollisionObject or btRigidBody.
 ATTRIBUTE_ALIGNED16(struct) btBroadphaseProxy
 {
 BT_DECLARE_ALIGNED_ALLOCATOR();
 	///optional filtering to cull potential collisions
 	enum CollisionFilterGroups
 	{
 	        DefaultFilter = 1,
 	        StaticFilter = 2,
 	        KinematicFilter = 4,
 	        DebrisFilter = 8,
 			SensorTrigger = 16,
 			CharacterFilter = 32,
 	        AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
 	};
 	//Usually the client btCollisionObject or Rigidbody class
 	void*	m_clientObject;
 	short int m_collisionFilterGroup;
 	short int m_collisionFilterMask;
 	void*	m_multiSapParentProxy;		
 	int			m_uniqueId;//m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.
 	btVector3	m_aabbMin;
 	btVector3	m_aabbMax;
 	SIMD_FORCE_INLINE int getUid() const
 	{
 		return m_uniqueId;
 	}
 	//used for memory pools
 	btBroadphaseProxy() :m_clientObject(0),m_multiSapParentProxy(0)
 	{
 	}
 	btBroadphaseProxy(const btVector3& aabbMin,const btVector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask,void* multiSapParentProxy=0)
 		:m_clientObject(userPtr),
 		m_collisionFilterGroup(collisionFilterGroup),
 		m_collisionFilterMask(collisionFilterMask),
 		m_aabbMin(aabbMin),
 		m_aabbMax(aabbMax)
 	{
 		m_multiSapParentProxy = multiSapParentProxy;
 	}
 	static SIMD_FORCE_INLINE bool isPolyhedral(int proxyType)
 	{
 		return (proxyType  < IMPLICIT_CONVEX_SHAPES_START_HERE);
 	}
 	static SIMD_FORCE_INLINE bool	isConvex(int proxyType)
 	{
 		return (proxyType < CONCAVE_SHAPES_START_HERE);
 	}
 	static SIMD_FORCE_INLINE bool	isNonMoving(int proxyType)
 	{
 		return (isConcave(proxyType) && !(proxyType==GIMPACT_SHAPE_PROXYTYPE));
 	}
 	static SIMD_FORCE_INLINE bool	isConcave(int proxyType)
 	{
 		return ((proxyType > CONCAVE_SHAPES_START_HERE) &&
 			(proxyType < CONCAVE_SHAPES_END_HERE));
 	}
 	static SIMD_FORCE_INLINE bool	isCompound(int proxyType)
 	{
 		return (proxyType == COMPOUND_SHAPE_PROXYTYPE);
 	}
 	static SIMD_FORCE_INLINE bool	isSoftBody(int proxyType)
 	{
 		return (proxyType == SOFTBODY_SHAPE_PROXYTYPE);
 	}
 	static SIMD_FORCE_INLINE bool isInfinite(int proxyType)
 	{
 		return (proxyType == STATIC_PLANE_PROXYTYPE);
 	}
 	static SIMD_FORCE_INLINE bool isConvex2d(int proxyType)
 	{
 		return (proxyType == BOX_2D_SHAPE_PROXYTYPE) ||	(proxyType == CONVEX_2D_SHAPE_PROXYTYPE);
 	}
 }
 ;
 class btCollisionAlgorithm;
 struct btBroadphaseProxy;
 ///The btBroadphasePair class contains a pair of aabb-overlapping objects.
 ///A btDispatcher can search a btCollisionAlgorithm that performs exact/narrowphase collision detection on the actual collision shapes.
 ATTRIBUTE_ALIGNED16(struct) btBroadphasePair
 {
 	btBroadphasePair ()
 		:
 	m_pProxy0(0),
 		m_pProxy1(0),
 		m_algorithm(0),
 		m_internalInfo1(0)
 	{
 	}
 BT_DECLARE_ALIGNED_ALLOCATOR();
 	btBroadphasePair(const btBroadphasePair& other)
 		:		m_pProxy0(other.m_pProxy0),
 				m_pProxy1(other.m_pProxy1),
 				m_algorithm(other.m_algorithm),
 				m_internalInfo1(other.m_internalInfo1)
 	{
 	}
 	btBroadphasePair(btBroadphaseProxy& proxy0,btBroadphaseProxy& proxy1)
 	{
 		//keep them sorted, so the std::set operations work
 		if (proxy0.m_uniqueId < proxy1.m_uniqueId)
        { 
            m_pProxy0 = &proxy0; 
            m_pProxy1 = &proxy1; 
        }
        else 
        { 
 			m_pProxy0 = &proxy1; 
            m_pProxy1 = &proxy0; 
        }
 		m_algorithm = 0;
 		m_internalInfo1 = 0;
 	}
 	btBroadphaseProxy* m_pProxy0;
 	btBroadphaseProxy* m_pProxy1;
 	mutable btCollisionAlgorithm* m_algorithm;
 	union { void* m_internalInfo1; int m_internalTmpValue;};//don't use this data, it will be removed in future version.
 };
 /*
 //comparison for set operation, see Solid DT_Encounter
 SIMD_FORCE_INLINE bool operator<(const btBroadphasePair& a, const btBroadphasePair& b) 
 { 
    return a.m_pProxy0 < b.m_pProxy0 || 
        (a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 < b.m_pProxy1); 
 }
 */
 class btBroadphasePairSortPredicate
 {
 	public:
 		bool operator() ( const btBroadphasePair& a, const btBroadphasePair& b ) const
 		{
 			const int uidA0 = a.m_pProxy0 ? a.m_pProxy0->m_uniqueId : -1;
 			const int uidB0 = b.m_pProxy0 ? b.m_pProxy0->m_uniqueId : -1;
 			const int uidA1 = a.m_pProxy1 ? a.m_pProxy1->m_uniqueId : -1;
 			const int uidB1 = b.m_pProxy1 ? b.m_pProxy1->m_uniqueId : -1;
 			 return uidA0 > uidB0 || 
 				(a.m_pProxy0 == b.m_pProxy0 && uidA1 > uidB1) ||
 				(a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 == b.m_pProxy1 && a.m_algorithm > b.m_algorithm); 
 		}
 };
 SIMD_FORCE_INLINE bool operator==(const btBroadphasePair& a, const btBroadphasePair& b) 
 {
 	 return (a.m_pProxy0 == b.m_pProxy0) && (a.m_pProxy1 == b.m_pProxy1);
 }
 #endif //BT_BROADPHASE_PROXY_H
--- a/opencl/gpu_rigidbody/host/btCollisionObject.h
+++ b/opencl/gpu_rigidbody/host/btCollisionObject.h
@@ -1,534 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_COLLISION_OBJECT_H
 #define BT_COLLISION_OBJECT_H
 #include "BulletCommon/btTransform.h"
 //island management, m_activationState1
 #define ACTIVE_TAG 1
 #define ISLAND_SLEEPING 2
 #define WANTS_DEACTIVATION 3
 #define DISABLE_DEACTIVATION 4
 #define DISABLE_SIMULATION 5
 struct	btBroadphaseProxy;
 class	btCollisionShape;
 struct btCollisionShapeData;
 #include "BulletCommon/btMotionState.h"
 #include "BulletCommon/btAlignedAllocator.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 typedef btAlignedObjectArray<class btCollisionObject*> btCollisionObjectArray;
 #ifdef BT_USE_DOUBLE_PRECISION
 #define btCollisionObjectData btCollisionObjectDoubleData
 #define btCollisionObjectDataName "btCollisionObjectDoubleData"
 #else
 #define btCollisionObjectData btCollisionObjectFloatData
 #define btCollisionObjectDataName "btCollisionObjectFloatData"
 #endif
 /// btCollisionObject can be used to manage collision detection objects. 
 /// btCollisionObject maintains all information that is needed for a collision detection: Shape, Transform and AABB proxy.
 /// They can be added to the btCollisionWorld.
 ATTRIBUTE_ALIGNED16(class)	btCollisionObject
 {
 protected:
 	btTransform	m_worldTransform;
 	///m_interpolationWorldTransform is used for CCD and interpolation
 	///it can be either previous or future (predicted) transform
 	btTransform	m_interpolationWorldTransform;
 	//those two are experimental: just added for bullet time effect, so you can still apply impulses (directly modifying velocities) 
 	//without destroying the continuous interpolated motion (which uses this interpolation velocities)
 	btVector3	m_interpolationLinearVelocity;
 	btVector3	m_interpolationAngularVelocity;
 	btVector3	m_anisotropicFriction;
 	int			m_hasAnisotropicFriction;
 	btScalar	m_contactProcessingThreshold;	
 	btBroadphaseProxy*		m_broadphaseHandle;
 	btCollisionShape*		m_collisionShape;
 	///m_extensionPointer is used by some internal low-level Bullet extensions.
 	void*					m_extensionPointer;
 	///m_rootCollisionShape is temporarily used to store the original collision shape
 	///The m_collisionShape might be temporarily replaced by a child collision shape during collision detection purposes
 	///If it is NULL, the m_collisionShape is not temporarily replaced.
 	btCollisionShape*		m_rootCollisionShape;
 	int				m_collisionFlags;
 	int				m_islandTag1;
 	int				m_companionId;
 	mutable int				m_activationState1;
 	mutable btScalar			m_deactivationTime;
 	btScalar		m_friction;
 	btScalar		m_restitution;
 	btScalar		m_rollingFriction;
 	///m_internalType is reserved to distinguish Bullet's btCollisionObject, btRigidBody, btSoftBody, btGhostObject etc.
 	///do not assign your own m_internalType unless you write a new dynamics object class.
 	int				m_internalType;
 	///users can point to their objects, m_userPointer is not used by Bullet, see setUserPointer/getUserPointer
 	void*			m_userObjectPointer;
 	///time of impact calculation
 	btScalar		m_hitFraction; 
 	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
 	btScalar		m_ccdSweptSphereRadius;
 	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
 	btScalar		m_ccdMotionThreshold;
 	/// If some object should have elaborate collision filtering by sub-classes
 	int			m_checkCollideWith;
 	virtual bool	checkCollideWithOverride(const btCollisionObject* /* co */) const
 	{
 		return true;
 	}
 public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 	enum CollisionFlags
 	{
 		CF_STATIC_OBJECT= 1,
 		CF_KINEMATIC_OBJECT= 2,
 		CF_NO_CONTACT_RESPONSE = 4,
 		CF_CUSTOM_MATERIAL_CALLBACK = 8,//this allows per-triangle material (friction/restitution)
 		CF_CHARACTER_OBJECT = 16,
 		CF_DISABLE_VISUALIZE_OBJECT = 32, //disable debug drawing
 		CF_DISABLE_SPU_COLLISION_PROCESSING = 64//disable parallel/SPU processing
 	};
 	enum	CollisionObjectTypes
 	{
 		CO_COLLISION_OBJECT =1,
 		CO_RIGID_BODY=2,
 		///CO_GHOST_OBJECT keeps track of all objects overlapping its AABB and that pass its collision filter
 		///It is useful for collision sensors, explosion objects, character controller etc.
 		CO_GHOST_OBJECT=4,
 		CO_SOFT_BODY=8,
 		CO_HF_FLUID=16,
 		CO_USER_TYPE=32
 	};
 	enum AnisotropicFrictionFlags
 	{
 		CF_ANISOTROPIC_FRICTION_DISABLED=0,
 		CF_ANISOTROPIC_FRICTION = 1,
 		CF_ANISOTROPIC_ROLLING_FRICTION = 2
 	};
 	SIMD_FORCE_INLINE bool mergesSimulationIslands() const
 	{
 		///static objects, kinematic and object without contact response don't merge islands
 		return  ((m_collisionFlags & (CF_STATIC_OBJECT | CF_KINEMATIC_OBJECT | CF_NO_CONTACT_RESPONSE) )==0);
 	}
 	const btVector3& getAnisotropicFriction() const
 	{
 		return m_anisotropicFriction;
 	}
 	void	setAnisotropicFriction(const btVector3& anisotropicFriction, int frictionMode = CF_ANISOTROPIC_FRICTION)
 	{
 		m_anisotropicFriction = anisotropicFriction;
 		bool isUnity = (anisotropicFriction[0]!=1.f) || (anisotropicFriction[1]!=1.f) || (anisotropicFriction[2]!=1.f);
 		m_hasAnisotropicFriction = isUnity?frictionMode : 0;
 	}
 	bool	hasAnisotropicFriction(int frictionMode = CF_ANISOTROPIC_FRICTION) const
 	{
 		return (m_hasAnisotropicFriction&frictionMode)!=0;
 	}
 	///the constraint solver can discard solving contacts, if the distance is above this threshold. 0 by default.
 	///Note that using contacts with positive distance can improve stability. It increases, however, the chance of colliding with degerate contacts, such as 'interior' triangle edges
 	void	setContactProcessingThreshold( btScalar contactProcessingThreshold)
 	{
 		m_contactProcessingThreshold = contactProcessingThreshold;
 	}
 	btScalar	getContactProcessingThreshold() const
 	{
 		return m_contactProcessingThreshold;
 	}
 	SIMD_FORCE_INLINE bool		isStaticObject() const {
 		return (m_collisionFlags & CF_STATIC_OBJECT) != 0;
 	}
 	SIMD_FORCE_INLINE bool		isKinematicObject() const
 	{
 		return (m_collisionFlags & CF_KINEMATIC_OBJECT) != 0;
 	}
 	SIMD_FORCE_INLINE bool		isStaticOrKinematicObject() const
 	{
 		return (m_collisionFlags & (CF_KINEMATIC_OBJECT | CF_STATIC_OBJECT)) != 0 ;
 	}
 	SIMD_FORCE_INLINE bool		hasContactResponse() const {
 		return (m_collisionFlags & CF_NO_CONTACT_RESPONSE)==0;
 	}
 	btCollisionObject();
 	virtual ~btCollisionObject();
 	virtual void	setCollisionShape(btCollisionShape* collisionShape)
 	{
 		m_collisionShape = collisionShape;
 		m_rootCollisionShape = collisionShape;
 	}
 	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const
 	{
 		return m_collisionShape;
 	}
 	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape()
 	{
 		return m_collisionShape;
 	}
 	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions. 
 	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
 	void*		internalGetExtensionPointer() const
 	{
 		return m_extensionPointer;
 	}
 	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions
 	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
 	void	internalSetExtensionPointer(void* pointer)
 	{
 		m_extensionPointer = pointer;
 	}
 	SIMD_FORCE_INLINE	int	getActivationState() const { return m_activationState1;}
 	void setActivationState(int newState) const;
 	void	setDeactivationTime(btScalar time)
 	{
 		m_deactivationTime = time;
 	}
 	btScalar	getDeactivationTime() const
 	{
 		return m_deactivationTime;
 	}
 	void forceActivationState(int newState) const;
 	void	activate(bool forceActivation = false) const;
 	SIMD_FORCE_INLINE bool isActive() const
 	{
 		return ((getActivationState() != ISLAND_SLEEPING) && (getActivationState() != DISABLE_SIMULATION));
 	}
 	void	setRestitution(btScalar rest)
 	{
 		m_restitution = rest;
 	}
 	btScalar	getRestitution() const
 	{
 		return m_restitution;
 	}
 	void	setFriction(btScalar frict)
 	{
 		m_friction = frict;
 	}
 	btScalar	getFriction() const
 	{
 		return m_friction;
 	}
 	void	setRollingFriction(btScalar frict)
 	{
 		m_rollingFriction = frict;
 	}
 	btScalar	getRollingFriction() const
 	{
 		return m_rollingFriction;
 	}
 	///reserved for Bullet internal usage
 	int	getInternalType() const
 	{
 		return m_internalType;
 	}
 	btTransform&	getWorldTransform()
 	{
 		return m_worldTransform;
 	}
 	const btTransform&	getWorldTransform() const
 	{
 		return m_worldTransform;
 	}
 	void	setWorldTransform(const btTransform& worldTrans)
 	{
 		m_worldTransform = worldTrans;
 	}
 	SIMD_FORCE_INLINE btBroadphaseProxy*	getBroadphaseHandle()
 	{
 		return m_broadphaseHandle;
 	}
 	SIMD_FORCE_INLINE const btBroadphaseProxy*	getBroadphaseHandle() const
 	{
 		return m_broadphaseHandle;
 	}
 	void	setBroadphaseHandle(btBroadphaseProxy* handle)
 	{
 		m_broadphaseHandle = handle;
 	}
 	const btTransform&	getInterpolationWorldTransform() const
 	{
 		return m_interpolationWorldTransform;
 	}
 	btTransform&	getInterpolationWorldTransform()
 	{
 		return m_interpolationWorldTransform;
 	}
 	void	setInterpolationWorldTransform(const btTransform&	trans)
 	{
 		m_interpolationWorldTransform = trans;
 	}
 	void	setInterpolationLinearVelocity(const btVector3& linvel)
 	{
 		m_interpolationLinearVelocity = linvel;
 	}
 	void	setInterpolationAngularVelocity(const btVector3& angvel)
 	{
 		m_interpolationAngularVelocity = angvel;
 	}
 	const btVector3&	getInterpolationLinearVelocity() const
 	{
 		return m_interpolationLinearVelocity;
 	}
 	const btVector3&	getInterpolationAngularVelocity() const
 	{
 		return m_interpolationAngularVelocity;
 	}
 	SIMD_FORCE_INLINE int getIslandTag() const
 	{
 		return	m_islandTag1;
 	}
 	void	setIslandTag(int tag)
 	{
 		m_islandTag1 = tag;
 	}
 	SIMD_FORCE_INLINE int getCompanionId() const
 	{
 		return	m_companionId;
 	}
 	void	setCompanionId(int id)
 	{
 		m_companionId = id;
 	}
 	SIMD_FORCE_INLINE btScalar			getHitFraction() const
 	{
 		return m_hitFraction; 
 	}
 	void	setHitFraction(btScalar hitFraction)
 	{
 		m_hitFraction = hitFraction;
 	}
 	SIMD_FORCE_INLINE int	getCollisionFlags() const
 	{
 		return m_collisionFlags;
 	}
 	void	setCollisionFlags(int flags)
 	{
 		m_collisionFlags = flags;
 	}
 	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
 	btScalar			getCcdSweptSphereRadius() const
 	{
 		return m_ccdSweptSphereRadius;
 	}
 	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
 	void	setCcdSweptSphereRadius(btScalar radius)
 	{
 		m_ccdSweptSphereRadius = radius;
 	}
 	btScalar 	getCcdMotionThreshold() const
 	{
 		return m_ccdMotionThreshold;
 	}
 	btScalar 	getCcdSquareMotionThreshold() const
 	{
 		return m_ccdMotionThreshold*m_ccdMotionThreshold;
 	}
 	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
 	void	setCcdMotionThreshold(btScalar ccdMotionThreshold)
 	{
 		m_ccdMotionThreshold = ccdMotionThreshold;
 	}
 	///users can point to their objects, userPointer is not used by Bullet
 	void*	getUserPointer() const
 	{
 		return m_userObjectPointer;
 	}
 	///users can point to their objects, userPointer is not used by Bullet
 	void	setUserPointer(void* userPointer)
 	{
 		m_userObjectPointer = userPointer;
 	}
 	inline bool checkCollideWith(const btCollisionObject* co) const
 	{
 		if (m_checkCollideWith)
 			return checkCollideWithOverride(co);
 		return true;
 	}
 	virtual	int	calculateSerializeBufferSize()	const;
 	///fills the dataBuffer and returns the struct name (and 0 on failure)
 	virtual	const char*	serialize(void* dataBuffer, class btSerializer* serializer) const;
 	virtual void serializeSingleObject(class btSerializer* serializer) const;
 };
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct	btCollisionObjectDoubleData
 {
 	void					*m_broadphaseHandle;
 	void					*m_collisionShape;
 	btCollisionShapeData	*m_rootCollisionShape;
 	char					*m_name;
 	btTransformDoubleData	m_worldTransform;
 	btTransformDoubleData	m_interpolationWorldTransform;
 	btVector3DoubleData		m_interpolationLinearVelocity;
 	btVector3DoubleData		m_interpolationAngularVelocity;
 	btVector3DoubleData		m_anisotropicFriction;
 	double					m_contactProcessingThreshold;	
 	double					m_deactivationTime;
 	double					m_friction;
 	double					m_rollingFriction;
 	double					m_restitution;
 	double					m_hitFraction; 
 	double					m_ccdSweptSphereRadius;
 	double					m_ccdMotionThreshold;
 	int						m_hasAnisotropicFriction;
 	int						m_collisionFlags;
 	int						m_islandTag1;
 	int						m_companionId;
 	int						m_activationState1;
 	int						m_internalType;
 	int						m_checkCollideWith;
 	char	m_padding[4];
 };
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct	btCollisionObjectFloatData
 {
 	void					*m_broadphaseHandle;
 	void					*m_collisionShape;
 	btCollisionShapeData	*m_rootCollisionShape;
 	char					*m_name;
 	btTransformFloatData	m_worldTransform;
 	btTransformFloatData	m_interpolationWorldTransform;
 	btVector3FloatData		m_interpolationLinearVelocity;
 	btVector3FloatData		m_interpolationAngularVelocity;
 	btVector3FloatData		m_anisotropicFriction;
 	float					m_contactProcessingThreshold;	
 	float					m_deactivationTime;
 	float					m_friction;
 	float					m_rollingFriction;
 	float					m_restitution;
 	float					m_hitFraction; 
 	float					m_ccdSweptSphereRadius;
 	float					m_ccdMotionThreshold;
 	int						m_hasAnisotropicFriction;
 	int						m_collisionFlags;
 	int						m_islandTag1;
 	int						m_companionId;
 	int						m_activationState1;
 	int						m_internalType;
 	int						m_checkCollideWith;
 	char					m_padding[4];
 };
 SIMD_FORCE_INLINE	int	btCollisionObject::calculateSerializeBufferSize() const
 {
 	return sizeof(btCollisionObjectData);
 }
 #endif //BT_COLLISION_OBJECT_H
--- a/opencl/gpu_rigidbody/host/btConstraintSolver.h
+++ b/opencl/gpu_rigidbody/host/btConstraintSolver.h
@@ -1,52 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_CONSTRAINT_SOLVER_H
 #define BT_CONSTRAINT_SOLVER_H
 #include "BulletCommon/btScalar.h"
 class btPersistentManifold;
 class btRigidBody;
 class btCollisionObject;
 class btTypedConstraint;
 struct btContactSolverInfo;
 struct btBroadphaseProxy;
 class btIDebugDraw;
 class btStackAlloc;
 class	btDispatcher;
 /// btConstraintSolver provides solver interface
 class btConstraintSolver
 {
 public:
 	virtual ~btConstraintSolver() {}
 	virtual void prepareSolve (int /* numBodies */, int /* numManifolds */) {;}
 	///solve a group of constraints
 	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints, const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher) = 0;
 	virtual void allSolved (const btContactSolverInfo& /* info */,class btIDebugDraw* /* debugDrawer */, btStackAlloc* /* stackAlloc */) {;}
 	///clear internal cached data and reset random seed
 	virtual	void	reset() = 0;
 };
 #endif //BT_CONSTRAINT_SOLVER_H
--- a/opencl/gpu_rigidbody/host/btContactSolverInfo.h
+++ b/opencl/gpu_rigidbody/host/btContactSolverInfo.h
@@ -1,159 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_CONTACT_SOLVER_INFO
 #define BT_CONTACT_SOLVER_INFO
 #include "BulletCommon/btScalar.h"
 enum	btSolverMode
 {
 	SOLVER_RANDMIZE_ORDER = 1,
 	SOLVER_FRICTION_SEPARATE = 2,
 	SOLVER_USE_WARMSTARTING = 4,
 	SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
 	SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
 	SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
 	SOLVER_CACHE_FRIENDLY = 128,
 	SOLVER_SIMD = 256,
 	SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
 	SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
 };
 struct btContactSolverInfoData
 {
 	btScalar	m_tau;
 	btScalar	m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
 	btScalar	m_friction;
 	btScalar	m_timeStep;
 	btScalar	m_restitution;
 	int		m_numIterations;
 	btScalar	m_maxErrorReduction;
 	btScalar	m_sor;
 	btScalar	m_erp;//used as Baumgarte factor
 	btScalar	m_erp2;//used in Split Impulse
 	btScalar	m_globalCfm;//constraint force mixing
 	int			m_splitImpulse;
 	btScalar	m_splitImpulsePenetrationThreshold;
 	btScalar	m_splitImpulseTurnErp;
 	btScalar	m_linearSlop;
 	btScalar	m_warmstartingFactor;
 	int			m_solverMode;
 	int	m_restingContactRestitutionThreshold;
 	int			m_minimumSolverBatchSize;
 	btScalar	m_maxGyroscopicForce;
 	btScalar	m_singleAxisRollingFrictionThreshold;
 };
 struct btContactSolverInfo : public btContactSolverInfoData
 {
 	inline btContactSolverInfo()
 	{
 		m_tau = btScalar(0.6);
 		m_damping = btScalar(1.0);
 		m_friction = btScalar(0.3);
 		m_timeStep = btScalar(1.f/60.f);
 		m_restitution = btScalar(0.);
 		m_maxErrorReduction = btScalar(20.);
 		m_numIterations = 10;
 		m_erp = btScalar(0.2);
 		m_erp2 = btScalar(0.8);
 		m_globalCfm = btScalar(0.);
 		m_sor = btScalar(1.);
 		m_splitImpulse = true;
 		m_splitImpulsePenetrationThreshold = -.04f;
 		m_splitImpulseTurnErp = 0.1f;
 		m_linearSlop = btScalar(0.0);
 		m_warmstartingFactor=btScalar(0.85);
 		//m_solverMode =  SOLVER_USE_WARMSTARTING |  SOLVER_SIMD | SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|SOLVER_USE_2_FRICTION_DIRECTIONS|SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | SOLVER_RANDMIZE_ORDER;
 		m_solverMode = SOLVER_USE_WARMSTARTING | SOLVER_SIMD;// | SOLVER_RANDMIZE_ORDER;
 		m_restingContactRestitutionThreshold = 2;//unused as of 2.81
 		m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
 		m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their BT_ENABLE_GYROPSCOPIC_FORCE flag set (using btRigidBody::setFlag)
 		m_singleAxisRollingFrictionThreshold = 1e30f;///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
 	}
 };
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct btContactSolverInfoDoubleData
 {
 	double		m_tau;
 	double		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
 	double		m_friction;
 	double		m_timeStep;
 	double		m_restitution;
 	double		m_maxErrorReduction;
 	double		m_sor;
 	double		m_erp;//used as Baumgarte factor
 	double		m_erp2;//used in Split Impulse
 	double		m_globalCfm;//constraint force mixing
 	double		m_splitImpulsePenetrationThreshold;
 	double		m_splitImpulseTurnErp;
 	double		m_linearSlop;
 	double		m_warmstartingFactor;
 	double		m_maxGyroscopicForce;
 	double		m_singleAxisRollingFrictionThreshold;
 	int			m_numIterations;
 	int			m_solverMode;
 	int			m_restingContactRestitutionThreshold;
 	int			m_minimumSolverBatchSize;
 	int			m_splitImpulse;
 	char		m_padding[4];
 };
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct btContactSolverInfoFloatData
 {
 	float		m_tau;
 	float		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
 	float		m_friction;
 	float		m_timeStep;
 	float		m_restitution;
 	float		m_maxErrorReduction;
 	float		m_sor;
 	float		m_erp;//used as Baumgarte factor
 	float		m_erp2;//used in Split Impulse
 	float		m_globalCfm;//constraint force mixing
 	float		m_splitImpulsePenetrationThreshold;
 	float		m_splitImpulseTurnErp;
 	float		m_linearSlop;
 	float		m_warmstartingFactor;
 	float		m_maxGyroscopicForce;
 	float		m_singleAxisRollingFrictionThreshold;
 	int			m_numIterations;
 	int			m_solverMode;
 	int			m_restingContactRestitutionThreshold;
 	int			m_minimumSolverBatchSize;
 	int			m_splitImpulse;
 	char		m_padding[4];
 };
 #endif //BT_CONTACT_SOLVER_INFO
--- a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.h
+++ b/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.h
@@ -1,40 +0,0 @@
 #ifndef BT_GPU_BATCHING_PGS_SOLVER_H
 #define BT_GPU_BATCHING_PGS_SOLVER_H
 #include "../../basic_initialize/btOpenCLInclude.h"
 #include "../../parallel_primitives/host/btOpenCLArray.h"
 #include "../../gpu_narrowphase/host/btRigidBodyCL.h"
 #include "../../gpu_narrowphase/host/btContact4.h"
 #include "btGpuConstraint4.h"
 class btGpuBatchingPgsSolver
 {
 protected:
 	struct btGpuBatchingPgsSolverInternalData*		m_data;
 	void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
 	inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	inline int sortConstraintByBatch2( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	inline int sortConstraintByBatch3( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
 	void solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
 			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
 public:
 	btGpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity);
 	virtual ~btGpuBatchingPgsSolver();
 	void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct btConfig& config);
 };
 #endif //BT_GPU_BATCHING_PGS_SOLVER_H
--- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp
+++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp
--- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h
+++ b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h
@@ -1,53 +0,0 @@
 #ifndef BT_GPU_JACOBI_SOLVER_H
 #define BT_GPU_JACOBI_SOLVER_H
 #include "../../basic_initialize/btOpenCLUtils.h"
 #include "../../gpu_narrowphase/host/btRigidBodyCL.h"
 #include "../../gpu_narrowphase/host/btContact4.h"
 #include "../../parallel_primitives/host/btOpenCLArray.h"
 class btTypedConstraint;
 struct btJacobiSolverInfo
 {
 	int m_fixedBodyIndex;
 	float m_deltaTime;
 	float m_positionDrift;
 	float m_positionConstraintCoeff;
 	int	m_numIterations;
 	btJacobiSolverInfo()
 		:m_fixedBodyIndex(0),
 		m_deltaTime(1./60.f),
 		m_positionDrift( 0.005f ), 
 		m_positionConstraintCoeff( 0.99f ),
 		m_numIterations(14)
 	{
 	}
 };
 class btGpuJacobiSolver
 {
 protected:
 	struct btGpuJacobiSolverInternalData* m_data;
 	cl_context m_context;
 	cl_device_id m_device;
 	cl_command_queue m_queue;
 public:
 	btGpuJacobiSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
 	virtual ~btGpuJacobiSolver();
 	void  solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo);
 	void  solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo);
 	void  solveGroupMixed(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo);
 };
 #endif //BT_GPU_JACOBI_SOLVER_H
--- a/opencl/gpu_rigidbody/host/btJacobianEntry.h
+++ b/opencl/gpu_rigidbody/host/btJacobianEntry.h
@@ -1,155 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_JACOBIAN_ENTRY_H
 #define BT_JACOBIAN_ENTRY_H
 #include "BulletCommon/btMatrix3x3.h"
 //notes:
 // Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
 // which makes the btJacobianEntry memory layout 16 bytes
 // if you only are interested in angular part, just feed massInvA and massInvB zero
 /// Jacobian entry is an abstraction that allows to describe constraints
 /// it can be used in combination with a constraint solver
 /// Can be used to relate the effect of an impulse to the constraint error
 ATTRIBUTE_ALIGNED16(class) btJacobianEntry
 {
 public:
 	btJacobianEntry() {};
 	//constraint between two different rigidbodies
 	btJacobianEntry(
 		const btMatrix3x3& world2A,
 		const btMatrix3x3& world2B,
 		const btVector3& rel_pos1,const btVector3& rel_pos2,
 		const btVector3& jointAxis,
 		const btVector3& inertiaInvA, 
 		const btScalar massInvA,
 		const btVector3& inertiaInvB,
 		const btScalar massInvB)
 		:m_linearJointAxis(jointAxis)
 	{
 		m_aJ = world2A*(rel_pos1.cross(m_linearJointAxis));
 		m_bJ = world2B*(rel_pos2.cross(-m_linearJointAxis));
 		m_0MinvJt	= inertiaInvA * m_aJ;
 		m_1MinvJt = inertiaInvB * m_bJ;
 		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);
 		btAssert(m_Adiag > btScalar(0.0));
 	}
 	//angular constraint between two different rigidbodies
 	btJacobianEntry(const btVector3& jointAxis,
 		const btMatrix3x3& world2A,
 		const btMatrix3x3& world2B,
 		const btVector3& inertiaInvA,
 		const btVector3& inertiaInvB)
 		:m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
 	{
 		m_aJ= world2A*jointAxis;
 		m_bJ = world2B*-jointAxis;
 		m_0MinvJt	= inertiaInvA * m_aJ;
 		m_1MinvJt = inertiaInvB * m_bJ;
 		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
 		btAssert(m_Adiag > btScalar(0.0));
 	}
 	//angular constraint between two different rigidbodies
 	btJacobianEntry(const btVector3& axisInA,
 		const btVector3& axisInB,
 		const btVector3& inertiaInvA,
 		const btVector3& inertiaInvB)
 		: m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
 		, m_aJ(axisInA)
 		, m_bJ(-axisInB)
 	{
 		m_0MinvJt	= inertiaInvA * m_aJ;
 		m_1MinvJt = inertiaInvB * m_bJ;
 		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
 		btAssert(m_Adiag > btScalar(0.0));
 	}
 	//constraint on one rigidbody
 	btJacobianEntry(
 		const btMatrix3x3& world2A,
 		const btVector3& rel_pos1,const btVector3& rel_pos2,
 		const btVector3& jointAxis,
 		const btVector3& inertiaInvA, 
 		const btScalar massInvA)
 		:m_linearJointAxis(jointAxis)
 	{
 		m_aJ= world2A*(rel_pos1.cross(jointAxis));
 		m_bJ = world2A*(rel_pos2.cross(-jointAxis));
 		m_0MinvJt	= inertiaInvA * m_aJ;
 		m_1MinvJt = btVector3(btScalar(0.),btScalar(0.),btScalar(0.));
 		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);
 		btAssert(m_Adiag > btScalar(0.0));
 	}
 	btScalar	getDiagonal() const { return m_Adiag; }
 	// for two constraints on the same rigidbody (for example vehicle friction)
 	btScalar	getNonDiagonal(const btJacobianEntry& jacB, const btScalar massInvA) const
 	{
 		const btJacobianEntry& jacA = *this;
 		btScalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
 		btScalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
 		return lin + ang;
 	}
 	// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
 	btScalar	getNonDiagonal(const btJacobianEntry& jacB,const btScalar massInvA,const btScalar massInvB) const
 	{
 		const btJacobianEntry& jacA = *this;
 		btVector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
 		btVector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
 		btVector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
 		btVector3 lin0 = massInvA * lin ;
 		btVector3 lin1 = massInvB * lin;
 		btVector3 sum = ang0+ang1+lin0+lin1;
 		return sum[0]+sum[1]+sum[2];
 	}
 	btScalar getRelativeVelocity(const btVector3& linvelA,const btVector3& angvelA,const btVector3& linvelB,const btVector3& angvelB)
 	{
 		btVector3 linrel = linvelA - linvelB;
 		btVector3 angvela  = angvelA * m_aJ;
 		btVector3 angvelb  = angvelB * m_bJ;
 		linrel *= m_linearJointAxis;
 		angvela += angvelb;
 		angvela += linrel;
 		btScalar rel_vel2 = angvela[0]+angvela[1]+angvela[2];
 		return rel_vel2 + SIMD_EPSILON;
 	}
 //private:
 	btVector3	m_linearJointAxis;
 	btVector3	m_aJ;
 	btVector3	m_bJ;
 	btVector3	m_0MinvJt;
 	btVector3	m_1MinvJt;
 	//Optimization: can be stored in the w/last component of one of the vectors
 	btScalar	m_Adiag;
 };
 #endif //BT_JACOBIAN_ENTRY_H
--- a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.cpp
+++ b/opencl/gpu_rigidbody/host/btPgsJacobiSolver.cpp
--- a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.h
+++ b/opencl/gpu_rigidbody/host/btPgsJacobiSolver.h
@@ -1,145 +0,0 @@
 #ifndef BT_PGS_JACOBI_SOLVER
 #define BT_PGS_JACOBI_SOLVER
 struct btContact4;
 struct btContactPoint;
 class btDispatcher;
 #include "btTypedConstraint.h"
 #include "btContactSolverInfo.h"
 #include "btSolverBody.h"
 #include "btSolverConstraint.h"
 #include "btConstraintSolver.h"
 struct btRigidBodyCL;
 struct btInertiaCL;
 class btPgsJacobiSolver
 {
 protected:
 	btAlignedObjectArray<btSolverBody>      m_tmpSolverBodyPool;
 	btConstraintArray			m_tmpSolverContactConstraintPool;
 	btConstraintArray			m_tmpSolverNonContactConstraintPool;
 	btConstraintArray			m_tmpSolverContactFrictionConstraintPool;
 	btConstraintArray			m_tmpSolverContactRollingFrictionConstraintPool;
 	btAlignedObjectArray<int>	m_orderTmpConstraintPool;
 	btAlignedObjectArray<int>	m_orderNonContactConstraintPool;
 	btAlignedObjectArray<int>	m_orderFrictionConstraintPool;
 	btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
 	btAlignedObjectArray<int>		m_bodyCount;
 	btAlignedObjectArray<int>		m_bodyCountCheck;
 	btAlignedObjectArray<btVector3>	m_deltaLinearVelocities;
 	btAlignedObjectArray<btVector3>	m_deltaAngularVelocities;
 	bool						m_usePgs;
 	void						averageVelocities();
 	int							m_maxOverrideNumSolverIterations;
 	btScalar	getContactProcessingThreshold(btContact4* contact)
 	{
 		return 0.02f;
 	}
 	void setupFrictionConstraint(	btRigidBodyCL* bodies,btInertiaCL* inertias, btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
 									btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
 									btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, 
 									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
 	void setupRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,	btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
 									btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
 									btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, 
 									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
 	btSolverConstraint&	addFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity=0., btScalar cfmSlip=0.);
 	btSolverConstraint&	addRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity=0, btScalar cfmSlip=0.f);
 	void setupContactConstraint(btRigidBodyCL* bodies, btInertiaCL* inertias,
 								btSolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, btContactPoint& cp, 
 								const btContactSolverInfo& infoGlobal, btVector3& vel, btScalar& rel_vel, btScalar& relaxation, 
 								btVector3& rel_pos1, btVector3& rel_pos2);
 	void setFrictionConstraintImpulse( btRigidBodyCL* bodies, btInertiaCL* inertias,btSolverConstraint& solverConstraint, int solverBodyIdA,int solverBodyIdB, 
 										 btContactPoint& cp, const btContactSolverInfo& infoGlobal);
 	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
 	unsigned long	m_btSeed2;
 	btScalar restitutionCurve(btScalar rel_vel, btScalar restitution);
 	void	convertContact(btRigidBodyCL* bodies, btInertiaCL* inertias,btContact4* manifold,const btContactSolverInfo& infoGlobal);
 	void	resolveSplitPenetrationSIMD(
     btSolverBody& bodyA,btSolverBody& bodyB,
        const btSolverConstraint& contactConstraint);
 	void	resolveSplitPenetrationImpulseCacheFriendly(
       btSolverBody& bodyA,btSolverBody& bodyB,
        const btSolverConstraint& contactConstraint);
 	//internal method
 	int		getOrInitSolverBody(int bodyIndex, btRigidBodyCL* bodies,btInertiaCL* inertias);
 	void	initSolverBody(int bodyIndex, btSolverBody* solverBody, btRigidBodyCL* collisionObject);
 	void	resolveSingleConstraintRowGeneric(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 	void	resolveSingleConstraintRowGenericSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 	void	resolveSingleConstraintRowLowerLimit(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 	void	resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 protected:
 	virtual btScalar solveGroupCacheFriendlySetup(btRigidBodyCL* bodies, btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
 	virtual btScalar solveGroupCacheFriendlyIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
 	virtual void solveGroupCacheFriendlySplitImpulseIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
 	btScalar solveSingleIteration(int iteration, btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
 	virtual btScalar solveGroupCacheFriendlyFinish(btRigidBodyCL* bodies, btInertiaCL* inertias,int numBodies,const btContactSolverInfo& infoGlobal);
 public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 	btPgsJacobiSolver();
 	virtual ~btPgsJacobiSolver();
 	void	solveContacts(int numBodies, btRigidBodyCL* bodies, btInertiaCL* inertias, int numContacts, btContact4* contacts);
 	btScalar solveGroup(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
 	///clear internal cached data and reset random seed
 	virtual	void	reset();
 	unsigned long btRand2();
 	int btRandInt2 (int n);
 	void	setRandSeed(unsigned long seed)
 	{
 		m_btSeed2 = seed;
 	}
 	unsigned long	getRandSeed() const
 	{
 		return m_btSeed2;
 	}
 };
 #endif //BT_PGS_JACOBI_SOLVER
--- a/opencl/gpu_rigidbody/host/btRigidBody.h
+++ b/opencl/gpu_rigidbody/host/btRigidBody.h
@@ -1,594 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_RIGIDBODY_H
 #define BT_RIGIDBODY_H
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btTransform.h"
 #include "btBroadphaseProxy.h"
 #include "btCollisionObject.h"
 class btCollisionShape;
 class btMotionState;
 class btTypedConstraint;
 extern btScalar gDeactivationTime;
 extern bool gDisableDeactivation;
 #ifdef BT_USE_DOUBLE_PRECISION
 #define btRigidBodyData	btRigidBodyDoubleData
 #define btRigidBodyDataName	"btRigidBodyDoubleData"
 #else
 #define btRigidBodyData	btRigidBodyFloatData
 #define btRigidBodyDataName	"btRigidBodyFloatData"
 #endif //BT_USE_DOUBLE_PRECISION
 enum	btRigidBodyFlags
 {
 	BT_DISABLE_WORLD_GRAVITY = 1,
 	///The BT_ENABLE_GYROPSCOPIC_FORCE can easily introduce instability
 	///So generally it is best to not enable it. 
 	///If really needed, run at a high frequency like 1000 Hertz:	///See Demos/GyroscopicDemo for an example use
 	BT_ENABLE_GYROPSCOPIC_FORCE = 2
 };
 ///The btRigidBody is the main class for rigid body objects. It is derived from btCollisionObject, so it keeps a pointer to a btCollisionShape.
 ///It is recommended for performance and memory use to share btCollisionShape objects whenever possible.
 ///There are 3 types of rigid bodies: 
 ///- A) Dynamic rigid bodies, with positive mass. Motion is controlled by rigid body dynamics.
 ///- B) Fixed objects with zero mass. They are not moving (basically collision objects)
 ///- C) Kinematic objects, which are objects without mass, but the user can move them. There is on-way interaction, and Bullet calculates a velocity based on the timestep and previous and current world transform.
 ///Bullet automatically deactivates dynamic rigid bodies, when the velocity is below a threshold for a given time.
 ///Deactivated (sleeping) rigid bodies don't take any processing time, except a minor broadphase collision detection impact (to allow active objects to activate/wake up sleeping objects)
 class btRigidBody  : public btCollisionObject
 {
 	btMatrix3x3	m_invInertiaTensorWorld;
 	btVector3		m_linearVelocity;
 	btVector3		m_angularVelocity;
 	btScalar		m_inverseMass;
 	btVector3		m_linearFactor;
 	btVector3		m_gravity;	
 	btVector3		m_gravity_acceleration;
 	btVector3		m_invInertiaLocal;
 	btVector3		m_totalForce;
 	btVector3		m_totalTorque;
 	btScalar		m_linearDamping;
 	btScalar		m_angularDamping;
 	bool			m_additionalDamping;
 	btScalar		m_additionalDampingFactor;
 	btScalar		m_additionalLinearDampingThresholdSqr;
 	btScalar		m_additionalAngularDampingThresholdSqr;
 	btScalar		m_additionalAngularDampingFactor;
 	btScalar		m_linearSleepingThreshold;
 	btScalar		m_angularSleepingThreshold;
 	//m_optionalMotionState allows to automatic synchronize the world transform for active objects
 	btMotionState*	m_optionalMotionState;
 	//keep track of typed constraints referencing this rigid body
 	btAlignedObjectArray<btTypedConstraint*> m_constraintRefs;
 	int				m_rigidbodyFlags;
 	int				m_debugBodyId;
 protected:
 	btVector3		m_angularFactor;
 public:
 	///The btRigidBodyConstructionInfo structure provides information to create a rigid body. Setting mass to zero creates a fixed (non-dynamic) rigid body.
 	///For dynamic objects, you can use the collision shape to approximate the local inertia tensor, otherwise use the zero vector (default argument)
 	///You can use the motion state to synchronize the world transform between physics and graphics objects. 
 	///And if the motion state is provided, the rigid body will initialize its initial world transform from the motion state,
 	///m_startWorldTransform is only used when you don't provide a motion state.
 	struct	btRigidBodyConstructionInfo
 	{
 		btScalar			m_mass;
 		///When a motionState is provided, the rigid body will initialize its world transform from the motion state
 		///In this case, m_startWorldTransform is ignored.
 		btMotionState*		m_motionState;
 		btTransform	m_startWorldTransform;
 		btCollisionShape*	m_collisionShape;
 		btVector3			m_localInertia;
 		btScalar			m_linearDamping;
 		btScalar			m_angularDamping;
 		///best simulation results when friction is non-zero
 		btScalar			m_friction;
 		///the m_rollingFriction prevents rounded shapes, such as spheres, cylinders and capsules from rolling forever.
 		///See Bullet/Demos/RollingFrictionDemo for usage
 		btScalar			m_rollingFriction;
 		///best simulation results using zero restitution.
 		btScalar			m_restitution;
 		btScalar			m_linearSleepingThreshold;
 		btScalar			m_angularSleepingThreshold;
 		//Additional damping can help avoiding lowpass jitter motion, help stability for ragdolls etc.
 		//Such damping is undesirable, so once the overall simulation quality of the rigid body dynamics system has improved, this should become obsolete
 		bool				m_additionalDamping;
 		btScalar			m_additionalDampingFactor;
 		btScalar			m_additionalLinearDampingThresholdSqr;
 		btScalar			m_additionalAngularDampingThresholdSqr;
 		btScalar			m_additionalAngularDampingFactor;
 		btRigidBodyConstructionInfo(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0)):
 		m_mass(mass),
 			m_motionState(motionState),
 			m_collisionShape(collisionShape),
 			m_localInertia(localInertia),
 			m_linearDamping(btScalar(0.)),
 			m_angularDamping(btScalar(0.)),
 			m_friction(btScalar(0.5)),
 			m_rollingFriction(btScalar(0)),
 			m_restitution(btScalar(0.)),
 			m_linearSleepingThreshold(btScalar(0.8)),
 			m_angularSleepingThreshold(btScalar(1.f)),
 			m_additionalDamping(false),
 			m_additionalDampingFactor(btScalar(0.005)),
 			m_additionalLinearDampingThresholdSqr(btScalar(0.01)),
 			m_additionalAngularDampingThresholdSqr(btScalar(0.01)),
 			m_additionalAngularDampingFactor(btScalar(0.01))
 		{
 			m_startWorldTransform.setIdentity();
 		}
 	};
 	///btRigidBody constructor using construction info
 	btRigidBody(	const btRigidBodyConstructionInfo& constructionInfo);
 	///btRigidBody constructor for backwards compatibility. 
 	///To specify friction (etc) during rigid body construction, please use the other constructor (using btRigidBodyConstructionInfo)
 	btRigidBody(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0));
 	virtual ~btRigidBody()
        { 
                //No constraints should point to this rigidbody
 		//Remove constraints from the dynamics world before you delete the related rigidbodies. 
                btAssert(m_constraintRefs.size()==0); 
        }
 protected:
 	///setupRigidBody is only used internally by the constructor
 	void	setupRigidBody(const btRigidBodyConstructionInfo& constructionInfo);
 public:
 	void			proceedToTransform(const btTransform& newTrans); 
 	///to keep collision detection and dynamics separate we don't store a rigidbody pointer
 	///but a rigidbody is derived from btCollisionObject, so we can safely perform an upcast
 	static const btRigidBody*	upcast(const btCollisionObject* colObj)
 	{
 		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
 			return (const btRigidBody*)colObj;
 		return 0;
 	}
 	static btRigidBody*	upcast(btCollisionObject* colObj)
 	{
 		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
 			return (btRigidBody*)colObj;
 		return 0;
 	}
 	/// continuous collision detection needs prediction
 	void			predictIntegratedTransform(btScalar step, btTransform& predictedTransform) ;
 	void			saveKinematicState(btScalar step);
 	void			applyGravity();
 	void			setGravity(const btVector3& acceleration);  
 	const btVector3&	getGravity() const
 	{
 		return m_gravity_acceleration;
 	}
 	void			setDamping(btScalar lin_damping, btScalar ang_damping);
 	btScalar getLinearDamping() const
 	{
 		return m_linearDamping;
 	}
 	btScalar getAngularDamping() const
 	{
 		return m_angularDamping;
 	}
 	btScalar getLinearSleepingThreshold() const
 	{
 		return m_linearSleepingThreshold;
 	}
 	btScalar getAngularSleepingThreshold() const
 	{
 		return m_angularSleepingThreshold;
 	}
 	void			applyDamping(btScalar timeStep);
 	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const {
 		return m_collisionShape;
 	}
 	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape() {
 			return m_collisionShape;
 	}
 	void			setMassProps(btScalar mass, const btVector3& inertia);
 	const btVector3& getLinearFactor() const
 	{
 		return m_linearFactor;
 	}
 	void setLinearFactor(const btVector3& linearFactor)
 	{
 		m_linearFactor = linearFactor;
 	}
 	btScalar		getInvMass() const { return m_inverseMass; }
 	const btMatrix3x3& getInvInertiaTensorWorld() const { 
 		return m_invInertiaTensorWorld; 
 	}
 	void			integrateVelocities(btScalar step);
 	void			setCenterOfMassTransform(const btTransform& xform);
 	void			applyCentralForce(const btVector3& force)
 	{
 		m_totalForce += force*m_linearFactor;
 	}
 	const btVector3& getTotalForce() const
 	{
 		return m_totalForce;
 	};
 	const btVector3& getTotalTorque() const
 	{
 		return m_totalTorque;
 	};
 	const btVector3& getInvInertiaDiagLocal() const
 	{
 		return m_invInertiaLocal;
 	};
 	void	setInvInertiaDiagLocal(const btVector3& diagInvInertia)
 	{
 		m_invInertiaLocal = diagInvInertia;
 	}
 	void	setSleepingThresholds(btScalar linear,btScalar angular)
 	{
 		m_linearSleepingThreshold = linear;
 		m_angularSleepingThreshold = angular;
 	}
 	void	applyTorque(const btVector3& torque)
 	{
 		m_totalTorque += torque*m_angularFactor;
 	}
 	void	applyForce(const btVector3& force, const btVector3& rel_pos) 
 	{
 		applyCentralForce(force);
 		applyTorque(rel_pos.cross(force*m_linearFactor));
 	}
 	void applyCentralImpulse(const btVector3& impulse)
 	{
 		m_linearVelocity += impulse *m_linearFactor * m_inverseMass;
 	}
  	void applyTorqueImpulse(const btVector3& torque)
 	{
 			m_angularVelocity += m_invInertiaTensorWorld * torque * m_angularFactor;
 	}
 	void applyImpulse(const btVector3& impulse, const btVector3& rel_pos) 
 	{
 		if (m_inverseMass != btScalar(0.))
 		{
 			applyCentralImpulse(impulse);
 			if (m_angularFactor)
 			{
 				applyTorqueImpulse(rel_pos.cross(impulse*m_linearFactor));
 			}
 		}
 	}
 	void clearForces() 
 	{
 		m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
 		m_totalTorque.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
 	}
 	void updateInertiaTensor();    
 	const btVector3&     getCenterOfMassPosition() const { 
 		return m_worldTransform.getOrigin(); 
 	}
 	btQuaternion getOrientation() const;
 	const btTransform&  getCenterOfMassTransform() const { 
 		return m_worldTransform; 
 	}
 	const btVector3&   getLinearVelocity() const { 
 		return m_linearVelocity; 
 	}
 	const btVector3&    getAngularVelocity() const { 
 		return m_angularVelocity; 
 	}
 	inline void setLinearVelocity(const btVector3& lin_vel)
 	{ 
 		m_linearVelocity = lin_vel; 
 	}
 	inline void setAngularVelocity(const btVector3& ang_vel) 
 	{ 
 		m_angularVelocity = ang_vel; 
 	}
 	btVector3 getVelocityInLocalPoint(const btVector3& rel_pos) const
 	{
 		//we also calculate lin/ang velocity for kinematic objects
 		return m_linearVelocity + m_angularVelocity.cross(rel_pos);
 		//for kinematic objects, we could also use use:
 		//		return 	(m_worldTransform(rel_pos) - m_interpolationWorldTransform(rel_pos)) / m_kinematicTimeStep;
 	}
 	void translate(const btVector3& v) 
 	{
 		m_worldTransform.getOrigin() += v; 
 	}
 	void	getAabb(btVector3& aabbMin,btVector3& aabbMax) const;
 	SIMD_FORCE_INLINE btScalar computeImpulseDenominator(const btVector3& pos, const btVector3& normal) const
 	{
 		btVector3 r0 = pos - getCenterOfMassPosition();
 		btVector3 c0 = (r0).cross(normal);
 		btVector3 vec = (c0 * getInvInertiaTensorWorld()).cross(r0);
 		return m_inverseMass + normal.dot(vec);
 	}
 	SIMD_FORCE_INLINE btScalar computeAngularImpulseDenominator(const btVector3& axis) const
 	{
 		btVector3 vec = axis * getInvInertiaTensorWorld();
 		return axis.dot(vec);
 	}
 	SIMD_FORCE_INLINE void	updateDeactivation(btScalar timeStep)
 	{
 		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION))
 			return;
 		if ((getLinearVelocity().length2() < m_linearSleepingThreshold*m_linearSleepingThreshold) &&
 			(getAngularVelocity().length2() < m_angularSleepingThreshold*m_angularSleepingThreshold))
 		{
 			m_deactivationTime += timeStep;
 		} else
 		{
 			m_deactivationTime=btScalar(0.);
 			setActivationState(0);
 		}
 	}
 	SIMD_FORCE_INLINE bool	wantsSleeping()
 	{
 		if (getActivationState() == DISABLE_DEACTIVATION)
 			return false;
 		//disable deactivation
 		if (gDisableDeactivation || (gDeactivationTime == btScalar(0.)))
 			return false;
 		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION))
 			return true;
 		if (m_deactivationTime> gDeactivationTime)
 		{
 			return true;
 		}
 		return false;
 	}
 	const btBroadphaseProxy*	getBroadphaseProxy() const
 	{
 		return m_broadphaseHandle;
 	}
 	btBroadphaseProxy*	getBroadphaseProxy() 
 	{
 		return m_broadphaseHandle;
 	}
 	void	setNewBroadphaseProxy(btBroadphaseProxy* broadphaseProxy)
 	{
 		m_broadphaseHandle = broadphaseProxy;
 	}
 	//btMotionState allows to automatic synchronize the world transform for active objects
 	btMotionState*	getMotionState()
 	{
 		return m_optionalMotionState;
 	}
 	const btMotionState*	getMotionState() const
 	{
 		return m_optionalMotionState;
 	}
 	void	setMotionState(btMotionState* motionState)
 	{
 		m_optionalMotionState = motionState;
 		if (m_optionalMotionState)
 			motionState->getWorldTransform(m_worldTransform);
 	}
 	//for experimental overriding of friction/contact solver func
 	int	m_contactSolverType;
 	int	m_frictionSolverType;
 	void	setAngularFactor(const btVector3& angFac)
 	{
 		m_angularFactor = angFac;
 	}
 	void	setAngularFactor(btScalar angFac)
 	{
 		m_angularFactor.setValue(angFac,angFac,angFac);
 	}
 	const btVector3&	getAngularFactor() const
 	{
 		return m_angularFactor;
 	}
 	//is this rigidbody added to a btCollisionWorld/btDynamicsWorld/btBroadphase?
 	bool	isInWorld() const
 	{
 		return (getBroadphaseProxy() != 0);
 	}
 	virtual bool checkCollideWithOverride(const  btCollisionObject* co) const;
 	void addConstraintRef(btTypedConstraint* c);
 	void removeConstraintRef(btTypedConstraint* c);
 	btTypedConstraint* getConstraintRef(int index)
 	{
 		return m_constraintRefs[index];
 	}
 	int getNumConstraintRefs() const
 	{
 		return m_constraintRefs.size();
 	}
 	void	setFlags(int flags)
 	{
 		m_rigidbodyFlags = flags;
 	}
 	int getFlags() const
 	{
 		return m_rigidbodyFlags;
 	}
 	btVector3 computeGyroscopicForce(btScalar maxGyroscopicForce) const;
 	///////////////////////////////////////////////
 	virtual	int	calculateSerializeBufferSize()	const;
 	///fills the dataBuffer and returns the struct name (and 0 on failure)
 	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
 	virtual void serializeSingleObject(class btSerializer* serializer) const;
 };
 //@todo add m_optionalMotionState and m_constraintRefs to btRigidBodyData
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct	btRigidBodyFloatData
 {
 	btCollisionObjectFloatData	m_collisionObjectData;
 	btMatrix3x3FloatData		m_invInertiaTensorWorld;
 	btVector3FloatData		m_linearVelocity;
 	btVector3FloatData		m_angularVelocity;
 	btVector3FloatData		m_angularFactor;
 	btVector3FloatData		m_linearFactor;
 	btVector3FloatData		m_gravity;	
 	btVector3FloatData		m_gravity_acceleration;
 	btVector3FloatData		m_invInertiaLocal;
 	btVector3FloatData		m_totalForce;
 	btVector3FloatData		m_totalTorque;
 	float					m_inverseMass;
 	float					m_linearDamping;
 	float					m_angularDamping;
 	float					m_additionalDampingFactor;
 	float					m_additionalLinearDampingThresholdSqr;
 	float					m_additionalAngularDampingThresholdSqr;
 	float					m_additionalAngularDampingFactor;
 	float					m_linearSleepingThreshold;
 	float					m_angularSleepingThreshold;
 	int						m_additionalDamping;
 };
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct	btRigidBodyDoubleData
 {
 	btCollisionObjectDoubleData	m_collisionObjectData;
 	btMatrix3x3DoubleData		m_invInertiaTensorWorld;
 	btVector3DoubleData		m_linearVelocity;
 	btVector3DoubleData		m_angularVelocity;
 	btVector3DoubleData		m_angularFactor;
 	btVector3DoubleData		m_linearFactor;
 	btVector3DoubleData		m_gravity;	
 	btVector3DoubleData		m_gravity_acceleration;
 	btVector3DoubleData		m_invInertiaLocal;
 	btVector3DoubleData		m_totalForce;
 	btVector3DoubleData		m_totalTorque;
 	double					m_inverseMass;
 	double					m_linearDamping;
 	double					m_angularDamping;
 	double					m_additionalDampingFactor;
 	double					m_additionalLinearDampingThresholdSqr;
 	double					m_additionalAngularDampingThresholdSqr;
 	double					m_additionalAngularDampingFactor;
 	double					m_linearSleepingThreshold;
 	double					m_angularSleepingThreshold;
 	int						m_additionalDamping;
 	char	m_padding[4];
 };
 #endif //BT_RIGIDBODY_H
--- a/opencl/gpu_rigidbody/host/btSolverBody.h
+++ b/opencl/gpu_rigidbody/host/btSolverBody.h
@@ -1,299 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_SOLVER_BODY_H
 #define BT_SOLVER_BODY_H
 class	btRigidBody;
 #include "BulletCommon/btVector3.h"
 #include "BulletCommon/btMatrix3x3.h"
 #include "BulletCommon/btAlignedAllocator.h"
 #include "BulletCommon/btTransformUtil.h"
 ///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
 #ifdef BT_USE_SSE
 #define USE_SIMD 1
 #endif //
 #ifdef USE_SIMD
 struct	btSimdScalar
 {
 	SIMD_FORCE_INLINE	btSimdScalar()
 	{
 	}
 	SIMD_FORCE_INLINE	btSimdScalar(float	fl)
 	:m_vec128 (_mm_set1_ps(fl))
 	{
 	}
 	SIMD_FORCE_INLINE	btSimdScalar(__m128 v128)
 		:m_vec128(v128)
 	{
 	}
 	union
 	{
 		__m128		m_vec128;
 		float		m_floats[4];
 		int			m_ints[4];
 		btScalar	m_unusedPadding;
 	};
 	SIMD_FORCE_INLINE	__m128	get128()
 	{
 		return m_vec128;
 	}
 	SIMD_FORCE_INLINE	const __m128	get128() const
 	{
 		return m_vec128;
 	}
 	SIMD_FORCE_INLINE	void	set128(__m128 v128)
 	{
 		m_vec128 = v128;
 	}
 	SIMD_FORCE_INLINE	operator       __m128()       
 	{ 
 		return m_vec128; 
 	}
 	SIMD_FORCE_INLINE	operator const __m128() const 
 	{ 
 		return m_vec128; 
 	}
 	SIMD_FORCE_INLINE	operator float() const 
 	{ 
 		return m_floats[0]; 
 	}
 };
 ///@brief Return the elementwise product of two btSimdScalar
 SIMD_FORCE_INLINE btSimdScalar 
 operator*(const btSimdScalar& v1, const btSimdScalar& v2) 
 {
 	return btSimdScalar(_mm_mul_ps(v1.get128(),v2.get128()));
 }
 ///@brief Return the elementwise product of two btSimdScalar
 SIMD_FORCE_INLINE btSimdScalar 
 operator+(const btSimdScalar& v1, const btSimdScalar& v2) 
 {
 	return btSimdScalar(_mm_add_ps(v1.get128(),v2.get128()));
 }
 #else
 #define btSimdScalar btScalar
 #endif
 ///The btSolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
 ATTRIBUTE_ALIGNED64 (struct)	btSolverBody
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 	btTransform		m_worldTransform;
 	btVector3		m_deltaLinearVelocity;
 	btVector3		m_deltaAngularVelocity;
 	btVector3		m_angularFactor;
 	btVector3		m_linearFactor;
 	btVector3		m_invMass;
 	btVector3		m_pushVelocity;
 	btVector3		m_turnVelocity;
 	btVector3		m_linearVelocity;
 	btVector3		m_angularVelocity;
 	union 
 	{
 		void*	m_originalBody;
 		int		m_originalBodyIndex;
 	};
 	void	setWorldTransform(const btTransform& worldTransform)
 	{
 		m_worldTransform = worldTransform;
 	}
 	const btTransform& getWorldTransform() const
 	{
 		return m_worldTransform;
 	}
 	SIMD_FORCE_INLINE void	getVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
 	{
 		if (m_originalBody)
 			velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
 		else
 			velocity.setValue(0,0,0);
 	}
 	SIMD_FORCE_INLINE void	getAngularVelocity(btVector3& angVel) const
 	{
 		if (m_originalBody)
 			angVel =m_angularVelocity+m_deltaAngularVelocity;
 		else
 			angVel.setValue(0,0,0);
 	}
 	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
 	SIMD_FORCE_INLINE void applyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
 	{
 		if (m_originalBody)
 		{
 			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
 			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
 		}
 	}
 	SIMD_FORCE_INLINE void internalApplyPushImpulse(const btVector3& linearComponent, const btVector3& angularComponent,btScalar impulseMagnitude)
 	{
 		if (m_originalBody)
 		{
 			m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor;
 			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
 		}
 	}
 	const btVector3& getDeltaLinearVelocity() const
 	{
 		return m_deltaLinearVelocity;
 	}
 	const btVector3& getDeltaAngularVelocity() const
 	{
 		return m_deltaAngularVelocity;
 	}
 	const btVector3& getPushVelocity() const 
 	{
 		return m_pushVelocity;
 	}
 	const btVector3& getTurnVelocity() const 
 	{
 		return m_turnVelocity;
 	}
 	////////////////////////////////////////////////
 	///some internal methods, don't use them
 	btVector3& internalGetDeltaLinearVelocity()
 	{
 		return m_deltaLinearVelocity;
 	}
 	btVector3& internalGetDeltaAngularVelocity()
 	{
 		return m_deltaAngularVelocity;
 	}
 	const btVector3& internalGetAngularFactor() const
 	{
 		return m_angularFactor;
 	}
 	const btVector3& internalGetInvMass() const
 	{
 		return m_invMass;
 	}
 	void internalSetInvMass(const btVector3& invMass)
 	{
 		m_invMass = invMass;
 	}
 	btVector3& internalGetPushVelocity()
 	{
 		return m_pushVelocity;
 	}
 	btVector3& internalGetTurnVelocity()
 	{
 		return m_turnVelocity;
 	}
 	SIMD_FORCE_INLINE void	internalGetVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
 	{
 		velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
 	}
 	SIMD_FORCE_INLINE void	internalGetAngularVelocity(btVector3& angVel) const
 	{
 		angVel = m_angularVelocity+m_deltaAngularVelocity;
 	}
 	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
 	SIMD_FORCE_INLINE void internalApplyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
 	{
 		if (m_originalBody)
 		{
 			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
 			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
 		}
 	}
 	void	writebackVelocity()
 	{
 		if (m_originalBody)
 		{
 			m_linearVelocity +=m_deltaLinearVelocity;
 			m_angularVelocity += m_deltaAngularVelocity;
 			//m_originalBody->setCompanionId(-1);
 		}
 	}
 	void	writebackVelocityAndTransform(btScalar timeStep, btScalar splitImpulseTurnErp)
 	{
        (void) timeStep;
 		if (m_originalBody)
 		{
 			m_linearVelocity += m_deltaLinearVelocity;
 			m_angularVelocity += m_deltaAngularVelocity;
 			//correct the position/orientation based on push/turn recovery
 			btTransform newTransform;
 			if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0)
 			{
 			//	btQuaternion orn = m_worldTransform.getRotation();
 				btTransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform);
 				m_worldTransform = newTransform;
 			}
 			//m_worldTransform.setRotation(orn);
 			//m_originalBody->setCompanionId(-1);
 		}
 	}
 };
 #endif //BT_SOLVER_BODY_H
--- a/opencl/gpu_rigidbody/host/btSolverConstraint.h
+++ b/opencl/gpu_rigidbody/host/btSolverConstraint.h
@@ -1,79 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_SOLVER_CONSTRAINT_H
 #define BT_SOLVER_CONSTRAINT_H
 class	btRigidBody;
 #include "BulletCommon/btVector3.h"
 #include "BulletCommon/btMatrix3x3.h"
 #include "btJacobianEntry.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 //#define NO_FRICTION_TANGENTIALS 1
 #include "btSolverBody.h"
 ///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
 ATTRIBUTE_ALIGNED16 (struct)	btSolverConstraint
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 	btVector3		m_relpos1CrossNormal;
 	btVector3		m_contactNormal;
 	btVector3		m_relpos2CrossNormal;
 	//btVector3		m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
 	btVector3		m_angularComponentA;
 	btVector3		m_angularComponentB;
 	mutable btSimdScalar	m_appliedPushImpulse;
 	mutable btSimdScalar	m_appliedImpulse;
 	btScalar	m_friction;
 	btScalar	m_jacDiagABInv;
 	btScalar		m_rhs;
 	btScalar		m_cfm;
    btScalar		m_lowerLimit;
 	btScalar		m_upperLimit;
 	btScalar		m_rhsPenetration;
    union
 	{
 		void*		m_originalContactPoint;
 		btScalar	m_unusedPadding4;
 	};
 	int	m_overrideNumSolverIterations;
    int			m_frictionIndex;
 	int m_solverBodyIdA;
 	int m_solverBodyIdB;
 	enum		btSolverConstraintType
 	{
 		BT_SOLVER_CONTACT_1D = 0,
 		BT_SOLVER_FRICTION_1D
 	};
 };
 typedef btAlignedObjectArray<btSolverConstraint>	btConstraintArray;
 #endif //BT_SOLVER_CONSTRAINT_H
--- a/opencl/gpu_rigidbody/host/btTypedConstraint.h
+++ b/opencl/gpu_rigidbody/host/btTypedConstraint.h
@@ -1,482 +0,0 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
 Permission is granted to anyone to use this software for any purpose, 
 including commercial applications, and to alter it and redistribute it freely, 
 subject to the following restrictions:
 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
 #ifndef BT_TYPED_CONSTRAINT_H
 #define BT_TYPED_CONSTRAINT_H
 #include "BulletCommon/btScalar.h"
 #include "btSolverConstraint.h"
 #include "btRigidBody.h"
 class btSerializer;
 //Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
 enum btTypedConstraintType
 {
 	POINT2POINT_CONSTRAINT_TYPE=3,
 	HINGE_CONSTRAINT_TYPE,
 	CONETWIST_CONSTRAINT_TYPE,
 	D6_CONSTRAINT_TYPE,
 	SLIDER_CONSTRAINT_TYPE,
 	CONTACT_CONSTRAINT_TYPE,
 	D6_SPRING_CONSTRAINT_TYPE,
 	GEAR_CONSTRAINT_TYPE,
 	MAX_CONSTRAINT_TYPE
 };
 enum btConstraintParams
 {
 	BT_CONSTRAINT_ERP=1,
 	BT_CONSTRAINT_STOP_ERP,
 	BT_CONSTRAINT_CFM,
 	BT_CONSTRAINT_STOP_CFM
 };
 #if 1
 	#define btAssertConstrParams(_par) btAssert(_par) 
 #else
 	#define btAssertConstrParams(_par)
 #endif
 ATTRIBUTE_ALIGNED16(struct)	btJointFeedback
 {
 	btVector3	m_appliedForceBodyA;
 	btVector3	m_appliedTorqueBodyA;
 	btVector3	m_appliedForceBodyB;
 	btVector3	m_appliedTorqueBodyB;
 };
 ///TypedConstraint is the baseclass for Bullet constraints and vehicles
 ATTRIBUTE_ALIGNED16(class) btTypedConstraint : public btTypedObject
 {
 	int	m_userConstraintType;
 	union
 	{
 		int	m_userConstraintId;
 		void* m_userConstraintPtr;
 	};
 	btScalar	m_breakingImpulseThreshold;
 	bool		m_isEnabled;
 	bool		m_needsFeedback;
 	int			m_overrideNumSolverIterations;
 	btTypedConstraint&	operator=(btTypedConstraint&	other)
 	{
 		btAssert(0);
 		(void) other;
 		return *this;
 	}
 protected:
 	btRigidBody&	m_rbA;
 	btRigidBody&	m_rbB;
 	btScalar	m_appliedImpulse;
 	btScalar	m_dbgDrawSize;
 	btJointFeedback*	m_jointFeedback;
 	///internal method used by the constraint solver, don't use them directly
 	btScalar getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact);
 public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 	virtual ~btTypedConstraint() {};
 	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA);
 	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA,btRigidBody& rbB);
 	struct btConstraintInfo1 {
 		int m_numConstraintRows,nub;
 	};
 	static btRigidBody& getFixedBody();
 	struct btConstraintInfo2 {
 		// integrator parameters: frames per second (1/stepsize), default error
 		// reduction parameter (0..1).
 		btScalar fps,erp;
 		// for the first and second body, pointers to two (linear and angular)
 		// n*3 jacobian sub matrices, stored by rows. these matrices will have
 		// been initialized to 0 on entry. if the second body is zero then the
 		// J2xx pointers may be 0.
 		btScalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis;
 		// elements to jump from one row to the next in J's
 		int rowskip;
 		// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
 		// "constraint force mixing" vector. c is set to zero on entry, cfm is
 		// set to a constant value (typically very small or zero) value on entry.
 		btScalar *m_constraintError,*cfm;
 		// lo and hi limits for variables (set to -/+ infinity on entry).
 		btScalar *m_lowerLimit,*m_upperLimit;
 		// findex vector for variables. see the LCP solver interface for a
 		// description of what this does. this is set to -1 on entry.
 		// note that the returned indexes are relative to the first index of
 		// the constraint.
 		int *findex;
 		// number of solver iterations
 		int m_numIterations;
 		//damping of the velocity
 		btScalar	m_damping;
 	};
 	int	getOverrideNumSolverIterations() const
 	{
 		return m_overrideNumSolverIterations;
 	}
 	///override the number of constraint solver iterations used to solve this constraint
 	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
 	void setOverrideNumSolverIterations(int overideNumIterations)
 	{
 		m_overrideNumSolverIterations = overideNumIterations;
 	}
 	///internal method used by the constraint solver, don't use them directly
 	virtual void	buildJacobian() {};
 	///internal method used by the constraint solver, don't use them directly
 	virtual	void	setupSolverConstraint(btConstraintArray& ca, int solverBodyA,int solverBodyB, btScalar timeStep)
 	{
        (void)ca;
        (void)solverBodyA;
        (void)solverBodyB;
        (void)timeStep;
 	}
 	///internal method used by the constraint solver, don't use them directly
 	virtual void getInfo1 (btConstraintInfo1* info)=0;
 	///internal method used by the constraint solver, don't use them directly
 	virtual void getInfo2 (btConstraintInfo2* info)=0;
 	///internal method used by the constraint solver, don't use them directly
 	void	internalSetAppliedImpulse(btScalar appliedImpulse)
 	{
 		m_appliedImpulse = appliedImpulse;
 	}
 	///internal method used by the constraint solver, don't use them directly
 	btScalar	internalGetAppliedImpulse()
 	{
 		return m_appliedImpulse;
 	}
 	btScalar	getBreakingImpulseThreshold() const
 	{
 		return 	m_breakingImpulseThreshold;
 	}
 	void	setBreakingImpulseThreshold(btScalar threshold)
 	{
 		m_breakingImpulseThreshold = threshold;
 	}
 	bool	isEnabled() const
 	{
 		return m_isEnabled;
 	}
 	void	setEnabled(bool enabled)
 	{
 		m_isEnabled=enabled;
 	}
 	///internal method used by the constraint solver, don't use them directly
 	virtual	void	solveConstraintObsolete(btSolverBody& /*bodyA*/,btSolverBody& /*bodyB*/,btScalar	/*timeStep*/) {};
 	const btRigidBody& getRigidBodyA() const
 	{
 		return m_rbA;
 	}
 	const btRigidBody& getRigidBodyB() const
 	{
 		return m_rbB;
 	}
 	btRigidBody& getRigidBodyA() 
 	{
 		return m_rbA;
 	}
 	btRigidBody& getRigidBodyB()
 	{
 		return m_rbB;
 	}
 	int getUserConstraintType() const
 	{
 		return m_userConstraintType ;
 	}
 	void	setUserConstraintType(int userConstraintType)
 	{
 		m_userConstraintType = userConstraintType;
 	};
 	void	setUserConstraintId(int uid)
 	{
 		m_userConstraintId = uid;
 	}
 	int getUserConstraintId() const
 	{
 		return m_userConstraintId;
 	}
 	void	setUserConstraintPtr(void* ptr)
 	{
 		m_userConstraintPtr = ptr;
 	}
 	void*	getUserConstraintPtr()
 	{
 		return m_userConstraintPtr;
 	}
 	void	setJointFeedback(btJointFeedback* jointFeedback)
 	{
 		m_jointFeedback = jointFeedback;
 	}
 	const btJointFeedback* getJointFeedback() const
 	{
 		return m_jointFeedback;
 	}
 	btJointFeedback* getJointFeedback()
 	{
 		return m_jointFeedback;
 	}
 	int getUid() const
 	{
 		return m_userConstraintId;   
 	} 
 	bool	needsFeedback() const
 	{
 		return m_needsFeedback;
 	}
 	///enableFeedback will allow to read the applied linear and angular impulse
 	///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
 	void	enableFeedback(bool needsFeedback)
 	{
 		m_needsFeedback = needsFeedback;
 	}
 	///getAppliedImpulse is an estimated total applied impulse. 
 	///This feedback could be used to determine breaking constraints or playing sounds.
 	btScalar	getAppliedImpulse() const
 	{
 		btAssert(m_needsFeedback);
 		return m_appliedImpulse;
 	}
 	btTypedConstraintType getConstraintType () const
 	{
 		return btTypedConstraintType(m_objectType);
 	}
 	void setDbgDrawSize(btScalar dbgDrawSize)
 	{
 		m_dbgDrawSize = dbgDrawSize;
 	}
 	btScalar getDbgDrawSize()
 	{
 		return m_dbgDrawSize;
 	}
 	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
 	///If no axis is provided, it uses the default axis for this constraint.
 	virtual	void	setParam(int num, btScalar value, int axis = -1) = 0;
 	///return the local value of parameter
 	virtual	btScalar getParam(int num, int axis = -1) const = 0;
 	virtual	int	calculateSerializeBufferSize() const=0;
 	///fills the dataBuffer and returns the struct name (and 0 on failure)
 	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const=0;
 };
 // returns angle in range [-SIMD_2_PI, SIMD_2_PI], closest to one of the limits 
 // all arguments should be normalized angles (i.e. in range [-SIMD_PI, SIMD_PI])
 SIMD_FORCE_INLINE btScalar btAdjustAngleToLimits(btScalar angleInRadians, btScalar angleLowerLimitInRadians, btScalar angleUpperLimitInRadians)
 {
 	if(angleLowerLimitInRadians >= angleUpperLimitInRadians)
 	{
 		return angleInRadians;
 	}
 	else if(angleInRadians < angleLowerLimitInRadians)
 	{
 		btScalar diffLo = btFabs(btNormalizeAngle(angleLowerLimitInRadians - angleInRadians));
 		btScalar diffHi = btFabs(btNormalizeAngle(angleUpperLimitInRadians - angleInRadians));
 		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + SIMD_2_PI);
 	}
 	else if(angleInRadians > angleUpperLimitInRadians)
 	{
 		btScalar diffHi = btFabs(btNormalizeAngle(angleInRadians - angleUpperLimitInRadians));
 		btScalar diffLo = btFabs(btNormalizeAngle(angleInRadians - angleLowerLimitInRadians));
 		return (diffLo < diffHi) ? (angleInRadians - SIMD_2_PI) : angleInRadians;
 	}
 	else
 	{
 		return angleInRadians;
 	}
 }
 ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
 struct	btTypedConstraintData
 {
 	btRigidBodyData		*m_rbA;
 	btRigidBodyData		*m_rbB;
 	char	*m_name;
 	int	m_objectType;
 	int	m_userConstraintType;
 	int	m_userConstraintId;
 	int	m_needsFeedback;
 	float	m_appliedImpulse;
 	float	m_dbgDrawSize;
 	int	m_disableCollisionsBetweenLinkedBodies;
 	int	m_overrideNumSolverIterations;
 	float	m_breakingImpulseThreshold;
 	int		m_isEnabled;
 };
 SIMD_FORCE_INLINE	int	btTypedConstraint::calculateSerializeBufferSize() const
 {
 	return sizeof(btTypedConstraintData);
 }
 class btAngularLimit
 {
 private:
 	btScalar 
 		m_center,
 		m_halfRange,
 		m_softness,
 		m_biasFactor,
 		m_relaxationFactor,
 		m_correction,
 		m_sign;
 	bool
 		m_solveLimit;
 public:
 	/// Default constructor initializes limit as inactive, allowing free constraint movement
 	btAngularLimit()
 		:m_center(0.0f),
 		m_halfRange(-1.0f),
 		m_softness(0.9f),
 		m_biasFactor(0.3f),
 		m_relaxationFactor(1.0f),
 		m_correction(0.0f),
 		m_sign(0.0f),
 		m_solveLimit(false)
 	{}
 	/// Sets all limit's parameters.
 	/// When low > high limit becomes inactive.
 	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
 	void set(btScalar low, btScalar high, btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f);
 	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
 	/// correction is calculated.
 	void test(const btScalar angle);
 	/// Returns limit's softness
 	inline btScalar getSoftness() const
 	{
 		return m_softness;
 	}
 	/// Returns limit's bias factor
 	inline btScalar getBiasFactor() const
 	{
 		return m_biasFactor;
 	}
 	/// Returns limit's relaxation factor
 	inline btScalar getRelaxationFactor() const
 	{
 		return m_relaxationFactor;
 	}
 	/// Returns correction value evaluated when test() was invoked 
 	inline btScalar getCorrection() const
 	{
 		return m_correction;
 	}
 	/// Returns sign value evaluated when test() was invoked 
 	inline btScalar getSign() const
 	{
 		return m_sign;
 	}
 	/// Gives half of the distance between min and max limit angle
 	inline btScalar getHalfRange() const
 	{
 		return m_halfRange;
 	}
 	/// Returns true when the last test() invocation recognized limit violation
 	inline bool isLimit() const
 	{
 		return m_solveLimit;
 	}
 	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
 	/// returned is modified so it equals to the limit closest to given angle.
 	void fit(btScalar& angle) const;
 	/// Returns correction value multiplied by sign value
 	btScalar getError() const;
 	btScalar getLow() const;
 	btScalar getHigh() const;
 };
 #endif //BT_TYPED_CONSTRAINT_H
--- a/opencl/lds_bank_conflict/main.cpp
+++ b/opencl/lds_bank_conflict/main.cpp
@@ -16,7 +16,7 @@
 // limitations under the License.
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include "BulletCommon/btQuickprof.h"
@@ -124,11 +124,11 @@ int main(int argc, char **argv)
 	cl_kernel			transposeNoBankConflictsKernel= 0;
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	device = btOpenCLUtils::getDevice(ctx,0);
+	device = b3OpenCLUtils::getDevice(ctx,0);
-	btOpenCLUtils::printDeviceInfo(device);
+	b3OpenCLUtils::printDeviceInfo(device);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
 	const char* cSourceFile = "opencl/lds_bank_conflict/lds_kernels.cl";
@@ -166,11 +166,11 @@ char flags[1024]={0};
 #endif//CL_PLATFORM_INTEL
-	copyKernel  = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copyKernel",&ciErrNum,0,flags);
+	copyKernel  = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copyKernel",&ciErrNum,0,flags);
-	copySharedMemKernel  = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copySharedMemKernel",&ciErrNum,0,flags);
+	copySharedMemKernel  = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copySharedMemKernel",&ciErrNum,0,flags);
-	transposeNaiveKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNaiveKernel",&ciErrNum,0,flags);
+	transposeNaiveKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNaiveKernel",&ciErrNum,0,flags);
-	transposeCoalescedKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeCoalescedKernel",&ciErrNum,0,flags);
+	transposeCoalescedKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeCoalescedKernel",&ciErrNum,0,flags);
-	transposeNoBankConflictsKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNoBankConflictsKernel",&ciErrNum,0,flags);
+	transposeNoBankConflictsKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNoBankConflictsKernel",&ciErrNum,0,flags);
 	btFillCL clMemSet(ctx,device,queue);
--- a/opencl/lds_bank_conflict/premake4.lua
+++ b/opencl/lds_bank_conflict/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
--- a/opencl/parallel_primitives/benchmark/premake4.lua
+++ b/opencl/parallel_primitives/benchmark/premake4.lua
@@ -19,8 +19,8 @@ function createProject(vendor)
 		files {
 			"test_large_problem_sorting.cpp",
-			"../../basic_initialize/btOpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/btFillCL.cpp",
 			"../host/btPrefixScanCL.cpp",
 			"../host/btRadixSort32CL.cpp",
--- a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp
+++ b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp
@@ -65,7 +65,7 @@
 */
 #include "../host/btRadixSort32CL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "BulletCommon/btQuickprof.h"
 cl_context g_cxMainContext;
@@ -78,7 +78,7 @@ cl_command_queue g_cqCommandQueue;
 bool g_verbose;
 ///Preferred OpenCL device/platform. When < 0 then no preference is used. 
-///Note that btOpenCLUtils might still use the preference of using a platform vendor that matches the SDK vendor used to build the application.
+///Note that b3OpenCLUtils might still use the preference of using a platform vendor that matches the SDK vendor used to build the application.
 ///Preferred device/platform take priority over this platform-vendor match
 int gPreferredDeviceId = -1;
 int gPreferredPlatformId = -1;
--- a/opencl/parallel_primitives/host/btBoundSearchCL.cpp
+++ b/opencl/parallel_primitives/host/btBoundSearchCL.cpp
@@ -21,7 +21,7 @@ subject to the following restrictions:
 #include "btBoundSearchCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btLauncherCL.h"
 #include "../kernels/BoundSearchKernelsCL.h"
@@ -37,20 +37,20 @@ btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command
 	cl_int pErrNum;
 	const char* kernelSource = boundSearchKernelsCL;
-	cl_program boundSearchProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
+	cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
 	btAssert(boundSearchProg);
-	m_lowerSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
+	m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
 	btAssert(m_lowerSortDataKernel );
-	m_upperSortDataKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
+	m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
 	btAssert(m_upperSortDataKernel);
 	m_subtractKernel = 0;
 	if( maxSize )
 	{
-		m_subtractKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
+		m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
 		btAssert(m_subtractKernel);
 	}
--- a/opencl/parallel_primitives/host/btFillCL.cpp
+++ b/opencl/parallel_primitives/host/btFillCL.cpp
@@ -1,5 +1,5 @@
 #include "btFillCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btBufferInfoCL.h"
 #include "btLauncherCL.h"
@@ -14,21 +14,21 @@ btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
 	cl_int pErrNum;
 	const char* additionalMacros = "";
-	cl_program fillProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
+	cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
 	btAssert(fillProg);
-	m_fillIntKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillIntKernel);
-	m_fillUnsignedIntKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillIntKernel);
-	m_fillFloatKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillFloatKernel);
-	m_fillKernelInt2 = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
+	m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillKernelInt2);
 }
--- a/opencl/parallel_primitives/host/btOpenCLArray.h
+++ b/opencl/parallel_primitives/host/btOpenCLArray.h
@@ -2,7 +2,7 @@
 #define BT_OPENCL_ARRAY_H
 #include "BulletCommon/btAlignedObjectArray.h"
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
 template <typename T> 
 class btOpenCLArray
--- a/opencl/parallel_primitives/host/btPrefixScanCL.cpp
+++ b/opencl/parallel_primitives/host/btPrefixScanCL.cpp
@@ -3,7 +3,7 @@
 #define BT_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
 #include "btLauncherCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "../kernels/PrefixScanKernelsCL.h"
 btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
@@ -14,14 +14,14 @@ btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_q
 	char* additionalMacros=0;
 	m_workBuffer = new btOpenCLArray<unsigned int>(ctx,queue,size);
-	cl_program scanProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
+	cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
 	btAssert(scanProg);
-	m_localScanKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
+	m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_localScanKernel );
-	m_blockSumKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
+	m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_blockSumKernel );
-	m_propagationKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
+	m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_propagationKernel );
 }
--- a/opencl/parallel_primitives/host/btRadixSort32CL.cpp
+++ b/opencl/parallel_primitives/host/btRadixSort32CL.cpp
@@ -1,7 +1,7 @@
 #include "btRadixSort32CL.h"
 #include "btLauncherCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btPrefixScanCL.h"
 #include "btFillCL.h"
@@ -13,7 +13,7 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 :m_commandQueue(queue)
 {
 	btOpenCLDeviceInfo info;
-	btOpenCLUtils::getDeviceInfo(device,&info);
+	b3OpenCLUtils::getDeviceInfo(device,&info);
 	m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU)!=0;
 	m_workBuffer1 = new btOpenCLArray<unsigned int>(ctx,queue);
@@ -42,15 +42,15 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 	cl_int pErrNum;
 	const char* kernelSource = radixSort32KernelsCL;
-	cl_program sortProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
+	cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
 	btAssert(sortProg);
-	m_streamCountSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
+	m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_streamCountSortDataKernel );
-	m_streamCountKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
+	m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_streamCountKernel);
@@ -58,19 +58,19 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 	if (m_deviceCPU)
 	{
-		m_sortAndScatterSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterSortDataKernel);
-		m_sortAndScatterKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterKernel);
 	} else
 	{
-		m_sortAndScatterSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterSortDataKernel);
-		m_sortAndScatterKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterKernel);
 	}
-	m_prefixScanKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
+	m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_prefixScanKernel);
 }
--- a/opencl/parallel_primitives/test/main.cpp
+++ b/opencl/parallel_primitives/test/main.cpp
@@ -14,7 +14,7 @@ subject to the following restrictions:
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
 #include "../host/btFillCL.h"
 #include "../host/btBoundSearchCL.h"
 #include "../host/btRadixSort32CL.h"
@@ -45,17 +45,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
+        b3OpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
--- a/opencl/parallel_primitives/test/premake4.lua
+++ b/opencl/parallel_primitives/test/premake4.lua
@@ -16,9 +16,9 @@ function createProject(vendor)
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/btFillCL.cpp",
 			"../host/btFillCL.h",
 			"../host/btBoundSearchCL.cpp",
--- a/opencl/reduce/main.cpp
+++ b/opencl/reduce/main.cpp
@@ -1,5 +1,5 @@
 ///original author: Erwin Coumans
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include <stdio.h>
@@ -45,17 +45,17 @@ int main(int argc, char* argv[])
 	cl_command_queue	queue;
 	cl_device_id		device;
 	cl_kernel			addKernel;
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	if (!ctx) {
 		printf("No OpenCL capable GPU found!");
 		return 0;
 	}
-	device = btOpenCLUtils::getDevice(ctx,0);
+	device = b3OpenCLUtils::getDevice(ctx,0);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
-	addKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
+	addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	int numElements = 1024*1024;
 	btOpenCLArray<int> a(ctx,queue);
--- a/opencl/reduce/premake4.lua
+++ b/opencl/reduce/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
--- a/opencl/vector_add/premake4.lua
+++ b/opencl/vector_add/premake4.lua
@@ -15,8 +15,8 @@ function createProject(vendor)
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h"
+			"../basic_initialize/b3OpenCLUtils.h"
 		}
 	end
--- a/opencl/vector_add_simplified/main.cpp
+++ b/opencl/vector_add_simplified/main.cpp
@@ -1,5 +1,5 @@
 ///original author: Erwin Coumans
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include <stdio.h>
@@ -29,17 +29,17 @@ int main(int argc, char* argv[])
 	cl_command_queue	queue;
 	cl_device_id		device;
 	cl_kernel			addKernel;
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	if (!ctx) {
 		printf("No OpenCL capable GPU found!");
 		return 0;
 	}
-	device = btOpenCLUtils::getDevice(ctx,0);
+	device = b3OpenCLUtils::getDevice(ctx,0);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
-	addKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum);
+	addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	int numElements = 32;
 	btOpenCLArray<float> a(ctx,queue);
--- a/opencl/vector_add_simplified/premake4.lua
+++ b/opencl/vector_add_simplified/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
--- a/readme.txt
+++ b/readme.txt
@@ -0,0 +1,6 @@
 Bullet 3.x GPU rigid body pipeline.
 See docs folder for information, 
 including how to build the project.