add bitonic sort, as comparison.

fix stringify.bat for Windows (need to fix Mac/Linux version too)
2013-04-30 11:40:09 -07:00
parent c5f488fe6d
commit 92f0938af3
24 changed files with 1857 additions and 177 deletions
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h
@@ -30,12 +30,12 @@ static const char* sapFastCL= \
 "		float   m_maxElems[4];\n"
 "		int			m_maxIndices[4];\n"
 "	};\n"
-"} b3AabbCL;\n"
+"} btAabbCL;\n"
 "\n"
 "\n"
 "/// conservative test for overlap between two aabbs\n"
-"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2);\n"
-"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2)\n"
+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
 "{\n"
 "//skip pairs between static (mass=0) objects\n"
 "	if ((aabb1->m_maxIndices[3]==0) && (aabb2->m_maxIndices[3] == 0))\n"
@@ -50,18 +50,18 @@ static const char* sapFastCL= \
 "\n"
 "\n"
 "//computePairsKernelBatchWrite\n"
-"__kernel void   computePairsKernel( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
+"__kernel void   computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	int localId = get_local_id(0);\n"
 "\n"
 "	__local int numActiveWgItems[1];\n"
 "	__local int breakRequest[1];\n"
-"	__local b3AabbCL localAabbs[128];// = aabbs[i];\n"
+"	__local btAabbCL localAabbs[128];// = aabbs[i];\n"
 "	\n"
 "	int2 myPairs[64];\n"
 "	\n"
-"	b3AabbCL myAabb;\n"
+"	btAabbCL myAabb;\n"
 "	\n"
 "	myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
 "	float testValue = 	myAabb.m_maxElems[axis];\n"
--- a/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
+++ b/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
@@ -30,12 +30,12 @@ static const char* sapCL= \
 "		float   m_maxElems[4];\n"
 "		int			m_maxIndices[4];\n"
 "	};\n"
-"} b3AabbCL;\n"
+"} btAabbCL;\n"
 "\n"
 "\n"
 "/// conservative test for overlap between two aabbs\n"
-"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2);\n"
-"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, __local const b3AabbCL* aabb2)\n"
+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
 "{\n"
 "	bool overlap = true;\n"
 "	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
@@ -43,8 +43,8 @@ static const char* sapCL= \
 "	overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
 "	return overlap;\n"
 "}\n"
-"bool TestAabbAgainstAabb2GlobalGlobal(__global const b3AabbCL* aabb1, __global const b3AabbCL* aabb2);\n"
-"bool TestAabbAgainstAabb2GlobalGlobal(__global const b3AabbCL* aabb1, __global const b3AabbCL* aabb2)\n"
+"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
+"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
 "{\n"
 "	bool overlap = true;\n"
 "	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
@@ -53,8 +53,8 @@ static const char* sapCL= \
 "	return overlap;\n"
 "}\n"
 "\n"
-"bool TestAabbAgainstAabb2Global(const b3AabbCL* aabb1, __global const b3AabbCL* aabb2);\n"
-"bool TestAabbAgainstAabb2Global(const b3AabbCL* aabb1, __global const b3AabbCL* aabb2)\n"
+"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
+"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
 "{\n"
 "	bool overlap = true;\n"
 "	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
@@ -64,7 +64,7 @@ static const char* sapCL= \
 "}\n"
 "\n"
 "\n"
-"__kernel void   computePairsKernelTwoArrays( __global const b3AabbCL* unsortedAabbs, __global const b3AabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
+"__kernel void   computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	if (i>=numUnsortedAabbs)\n"
@@ -89,7 +89,7 @@ static const char* sapCL= \
 "	}\n"
 "}\n"
 "\n"
-"__kernel void   computePairsKernelOriginal( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
+"__kernel void   computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	if (i>=numObjects)\n"
@@ -117,7 +117,7 @@ static const char* sapCL= \
 "\n"
 "\n"
 "\n"
-"__kernel void   computePairsKernelBarrier( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
+"__kernel void   computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	int localId = get_local_id(0);\n"
@@ -181,16 +181,16 @@ static const char* sapCL= \
 "}\n"
 "\n"
 "\n"
-"__kernel void   computePairsKernelLocalSharedMemory( __global const b3AabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
+"__kernel void   computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	int localId = get_local_id(0);\n"
 "\n"
 "	__local int numActiveWgItems[1];\n"
 "	__local int breakRequest[1];\n"
-"	__local b3AabbCL localAabbs[128];// = aabbs[i];\n"
+"	__local btAabbCL localAabbs[128];// = aabbs[i];\n"
 "	\n"
-"	b3AabbCL myAabb;\n"
+"	btAabbCL myAabb;\n"
 "	\n"
 "	myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
 "	float testValue = 	myAabb.m_maxElems[axis];\n"
@@ -289,7 +289,7 @@ static const char* sapCL= \
 "\n"
 "\n"
 "\n"
-"__kernel void   copyAabbsKernel( __global const b3AabbCL* allAabbs, __global b3AabbCL* destAabbs, int numObjects)\n"
+"__kernel void   copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	if (i>=numObjects)\n"
@@ -300,7 +300,7 @@ static const char* sapCL= \
 "}\n"
 "\n"
 "\n"
-"__kernel void   flipFloatKernel( __global const b3AabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
+"__kernel void   flipFloatKernel( __global const btAabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	if (i>=numObjects)\n"
@@ -312,7 +312,7 @@ static const char* sapCL= \
 "}\n"
 "\n"
 "\n"
-"__kernel void   scatterKernel( __global const b3AabbCL* aabbs, volatile __global const int2* sortData, __global b3AabbCL* sortedAabbs, int numObjects)\n"
+"__kernel void   scatterKernel( __global const btAabbCL* aabbs, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
 "{\n"
 "	int i = get_global_id(0);\n"
 "	if (i>=numObjects)\n"
--- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h
@@ -1,6 +1,6 @@
 //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
 static const char* bvhTraversalKernelCL= \
-"//keep this enum in sync with the CPU version (in b3Collidable.h)\n"
+"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
 "//written by Erwin Coumans\n"
 "\n"
 "#define SHAPE_CONVEX_HULL 3\n"
@@ -13,7 +13,7 @@ static const char* bvhTraversalKernelCL= \
 "\n"
 "#define MAX_NUM_PARTS_IN_BITS 10\n"
 "\n"
-"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
+"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n"
 "///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n"
 "typedef struct\n"
 "{\n"
@@ -22,7 +22,7 @@ static const char* bvhTraversalKernelCL= \
 "	unsigned short int	m_quantizedAabbMax[3];\n"
 "	//4 bytes\n"
 "	int	m_escapeIndexOrTriangleIndex;\n"
-"} b3QuantizedBvhNode;\n"
+"} btQuantizedBvhNode;\n"
 "\n"
 "typedef struct\n"
 "{\n"
@@ -44,12 +44,12 @@ static const char* bvhTraversalKernelCL= \
 "	}\n"
 "	int getEscapeIndex() const\n"
 "	{\n"
-"		b3Assert(!isLeafNode());\n"
+"		btAssert(!isLeafNode());\n"
 "		return -m_escapeIndexOrTriangleIndex;\n"
 "	}\n"
 "	int	getTriangleIndex() const\n"
 "	{\n"
-"		b3Assert(isLeafNode());\n"
+"		btAssert(isLeafNode());\n"
 "		unsigned int x=0;\n"
 "		unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
 "		// Get only the lower bits where the triangle index is stored\n"
@@ -57,13 +57,13 @@ static const char* bvhTraversalKernelCL= \
 "	}\n"
 "	int	getPartId() const\n"
 "	{\n"
-"		b3Assert(isLeafNode());\n"
+"		btAssert(isLeafNode());\n"
 "		// Get only the highest bits where the part index is stored\n"
 "		return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n"
 "	}\n"
 "*/\n"
 "\n"
-"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n"
+"int	getTriangleIndex(const btQuantizedBvhNode* rootNode)\n"
 "{\n"
 "	unsigned int x=0;\n"
 "	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
@@ -71,13 +71,13 @@ static const char* bvhTraversalKernelCL= \
 "	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n"
 "}\n"
 "\n"
-"int isLeaf(const b3QuantizedBvhNode* rootNode)\n"
+"int isLeaf(const btQuantizedBvhNode* rootNode)\n"
 "{\n"
 "	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
 "	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n"
 "}\n"
 "	\n"
-"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n"
+"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n"
 "{\n"
 "	return -rootNode->m_escapeIndexOrTriangleIndex;\n"
 "}\n"
@@ -92,9 +92,9 @@ static const char* bvhTraversalKernelCL= \
 "	//4 bytes\n"
 "	int			m_subtreeSize;\n"
 "	int			m_padding[3];\n"
-"} b3BvhSubtreeInfo;\n"
+"} btBvhSubtreeInfo;\n"
 "\n"
-"///keep this in sync with b3Collidable.h\n"
+"///keep this in sync with btCollidable.h\n"
 "typedef struct\n"
 "{\n"
 "	int m_numChildShapes;\n"
@@ -102,7 +102,7 @@ static const char* bvhTraversalKernelCL= \
 "	int m_shapeType;\n"
 "	int m_shapeIndex;\n"
 "	\n"
-"} b3CollidableGpu;\n"
+"} btCollidableGpu;\n"
 "\n"
 "typedef struct\n"
 "{\n"
@@ -112,7 +112,7 @@ static const char* bvhTraversalKernelCL= \
 "	int m_unused0;\n"
 "	int m_unused1;\n"
 "	int m_unused2;\n"
-"} b3GpuChildShape;\n"
+"} btGpuChildShape;\n"
 "\n"
 "\n"
 "typedef struct\n"
@@ -142,7 +142,7 @@ static const char* bvhTraversalKernelCL= \
 "		float   m_maxElems[4];\n"
 "		int			m_maxIndices[4];\n"
 "	};\n"
-"} b3AabbCL;\n"
+"} btAabbCL;\n"
 "\n"
 "\n"
 "int testQuantizedAabbAgainstQuantizedAabb(\n"
@@ -196,12 +196,12 @@ static const char* bvhTraversalKernelCL= \
 "// work-in-progress\n"
 "__kernel void   bvhTraversalKernel( __global const int2* pairs, \n"
 "									__global const BodyData* rigidBodies, \n"
-"									__global const b3CollidableGpu* collidables,\n"
-"									__global b3AabbCL* aabbs,\n"
+"									__global const btCollidableGpu* collidables,\n"
+"									__global btAabbCL* aabbs,\n"
 "									__global int4* concavePairsOut,\n"
 "									__global volatile int* numConcavePairsOut,\n"
-"									__global const b3BvhSubtreeInfo* subtreeHeadersRoot,\n"
-"									__global const b3QuantizedBvhNode* quantizedNodesRoot,\n"
+"									__global const btBvhSubtreeInfo* subtreeHeadersRoot,\n"
+"									__global const btQuantizedBvhNode* quantizedNodesRoot,\n"
 "									__global const b3BvhInfo* bvhInfos,\n"
 "									int numPairs,\n"
 "									int maxNumConcavePairsCapacity)\n"
@@ -238,8 +238,8 @@ static const char* bvhTraversalKernelCL= \
 "	float4 bvhAabbMax = bvhInfo.m_aabbMax;\n"
 "	float4 bvhQuantization = bvhInfo.m_quantization;\n"
 "	int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n"
-"	__global const b3BvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n"
-"	__global const b3QuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n"
+"	__global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n"
+"	__global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n"
 "	\n"
 "\n"
 "	unsigned short int quantizedQueryAabbMin[3];\n"
@@ -249,7 +249,7 @@ static const char* bvhTraversalKernelCL= \
 "	\n"
 "	for (int i=0;i<numSubtreeHeaders;i++)\n"
 "	{\n"
-"		b3BvhSubtreeInfo subtree = subtreeHeaders[i];\n"
+"		btBvhSubtreeInfo subtree = subtreeHeaders[i];\n"
 "				\n"
 "		int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n"
 "		if (overlap != 0)\n"
@@ -262,7 +262,7 @@ static const char* bvhTraversalKernelCL= \
 "			int aabbOverlap;\n"
 "			while (curIndex < endNodeIndex)\n"
 "			{\n"
-"				b3QuantizedBvhNode rootNode = quantizedNodes[curIndex];\n"
+"				btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n"
 "				aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n"
 "				isLeafNode = isLeaf(&rootNode);\n"
 "				if (aabbOverlap)\n"
--- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h
@@ -67,9 +67,9 @@ static const char* primitiveContactsKernelsCL= \
 "		float   m_maxElems[4];\n"
 "		int			m_maxIndices[4];\n"
 "	};\n"
-"} b3AabbCL;\n"
+"} btAabbCL;\n"
 "\n"
-"///keep this in sync with b3Collidable.h\n"
+"///keep this in sync with btCollidable.h\n"
 "typedef struct\n"
 "{\n"
 "	int m_numChildShapes;\n"
@@ -77,7 +77,7 @@ static const char* primitiveContactsKernelsCL= \
 "	int m_shapeType;\n"
 "	int m_shapeIndex;\n"
 "	\n"
-"} b3CollidableGpu;\n"
+"} btCollidableGpu;\n"
 "\n"
 "typedef struct\n"
 "{\n"
@@ -87,7 +87,7 @@ static const char* primitiveContactsKernelsCL= \
 "	int m_unused0;\n"
 "	int m_unused1;\n"
 "	int m_unused2;\n"
-"} b3GpuChildShape;\n"
+"} btGpuChildShape;\n"
 "\n"
 "#define GET_NPOINTS(x) (x).m_worldNormal.w\n"
 "\n"
@@ -129,7 +129,7 @@ static const char* primitiveContactsKernelsCL= \
 "	float4 m_plane;\n"
 "	int m_indexOffset;\n"
 "	int m_numIndices;\n"
-"} b3GpuFace;\n"
+"} btGpuFace;\n"
 "\n"
 "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
 "\n"
@@ -290,7 +290,7 @@ static const char* primitiveContactsKernelsCL= \
 "\n"
 "\n"
 "inline bool IsPointInPolygon(float4 p, \n"
-"							const b3GpuFace* face,\n"
+"							const btGpuFace* face,\n"
 "							__global const float4* baseVertex,\n"
 "							__global const  int* convexIndices,\n"
 "							float4* out)\n"
@@ -352,11 +352,11 @@ static const char* primitiveContactsKernelsCL= \
 "																int bodyIndexA, int bodyIndexB, \n"
 "																int collidableIndexA, int collidableIndexB, \n"
 "																__global const BodyData* rigidBodies, \n"
-"																__global const b3CollidableGpu* collidables,\n"
+"																__global const btCollidableGpu* collidables,\n"
 "																__global const ConvexPolyhedronCL* convexShapes,\n"
 "																__global const float4* convexVertices,\n"
 "																__global const int* convexIndices,\n"
-"																__global const b3GpuFace* faces,\n"
+"																__global const btGpuFace* faces,\n"
 "																__global Contact4* restrict globalContactsOut,\n"
 "																counter32_t nGlobalContactsOut,\n"
 "																int maxContactCapacity,\n"
@@ -383,7 +383,7 @@ static const char* primitiveContactsKernelsCL= \
 "\n"
 "	for ( int f = 0; f < numFaces; f++ )\n"
 "	{\n"
-"		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
+"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n"
 "\n"
 "		// set up a plane equation \n"
 "		float4 planeEqn;\n"
@@ -594,11 +594,11 @@ static const char* primitiveContactsKernelsCL= \
 "								int bodyIndexA, int bodyIndexB, \n"
 "								int collidableIndexA, int collidableIndexB, \n"
 "								__global const BodyData* rigidBodies, \n"
-"								__global const b3CollidableGpu*collidables,\n"
+"								__global const btCollidableGpu*collidables,\n"
 "								__global const ConvexPolyhedronCL* convexShapes,\n"
 "								__global const float4* convexVertices,\n"
 "								__global const int* convexIndices,\n"
-"								__global const b3GpuFace* faces,\n"
+"								__global const btGpuFace* faces,\n"
 "								__global Contact4* restrict globalContactsOut,\n"
 "								counter32_t nGlobalContactsOut,\n"
 "								int maxContactCapacity,\n"
@@ -733,8 +733,8 @@ static const char* primitiveContactsKernelsCL= \
 "																int bodyIndexA, int bodyIndexB, \n"
 "																int collidableIndexA, int collidableIndexB, \n"
 "																__global const BodyData* rigidBodies, \n"
-"																__global const b3CollidableGpu* collidables,\n"
-"																__global const b3GpuFace* faces,\n"
+"																__global const btCollidableGpu* collidables,\n"
+"																__global const btGpuFace* faces,\n"
 "																__global Contact4* restrict globalContactsOut,\n"
 "																counter32_t nGlobalContactsOut,\n"
 "																int maxContactCapacity)\n"
@@ -793,11 +793,11 @@ static const char* primitiveContactsKernelsCL= \
 "\n"
 "__kernel void   primitiveContactsKernel( __global const int2* pairs, \n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
 "																					__global Contact4* restrict globalContactsOut,\n"
 "																					counter32_t nGlobalContactsOut,\n"
@@ -972,14 +972,14 @@ static const char* primitiveContactsKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n"
 "													__global const BodyData* rigidBodies, \n"
-"													__global const b3CollidableGpu* collidables,\n"
+"													__global const btCollidableGpu* collidables,\n"
 "													__global const ConvexPolyhedronCL* convexShapes, \n"
 "													__global const float4* vertices,\n"
 "													__global const float4* uniqueEdges,\n"
-"													__global const b3GpuFace* faces,\n"
+"													__global const btGpuFace* faces,\n"
 "													__global const int* indices,\n"
-"													__global b3AabbCL* aabbs,\n"
-"													__global const b3GpuChildShape* gpuChildShapes,\n"
+"													__global btAabbCL* aabbs,\n"
+"													__global const btGpuChildShape* gpuChildShapes,\n"
 "													__global Contact4* restrict globalContactsOut,\n"
 "													counter32_t nGlobalContactsOut,\n"
 "													int numCompoundPairs, int maxContactCapacity\n"
@@ -1157,7 +1157,7 @@ static const char* primitiveContactsKernelsCL= \
 "									int bodyIndexA, int bodyIndexB,\n"
 "									int collidableIndexA, int collidableIndexB, \n"
 "									__global const BodyData* rigidBodies, \n"
-"									__global const b3CollidableGpu* collidables,\n"
+"									__global const btCollidableGpu* collidables,\n"
 "									const float4* triangleVertices,\n"
 "									__global Contact4* restrict globalContactsOut,\n"
 "									counter32_t nGlobalContactsOut,\n"
@@ -1299,13 +1299,13 @@ static const char* primitiveContactsKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n"
 "												__global const BodyData* rigidBodies,\n"
-"												__global const b3CollidableGpu* collidables,\n"
+"												__global const btCollidableGpu* collidables,\n"
 "												__global const ConvexPolyhedronCL* convexShapes, \n"
 "												__global const float4* vertices,\n"
 "												__global const float4* uniqueEdges,\n"
-"												__global const b3GpuFace* faces,\n"
+"												__global const btGpuFace* faces,\n"
 "												__global const int* indices,\n"
-"												__global b3AabbCL* aabbs,\n"
+"												__global btAabbCL* aabbs,\n"
 "												__global Contact4* restrict globalContactsOut,\n"
 "												counter32_t nGlobalContactsOut,\n"
 "													int numConcavePairs, int maxContactCapacity\n"
@@ -1329,7 +1329,7 @@ static const char* primitiveContactsKernelsCL= \
 "	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n"
 "	{\n"
 "		int f = concavePairs[i].z;\n"
-"		b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
+"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
 "		\n"
 "		float4 verticesA[3];\n"
 "		for (int i=0;i<3;i++)\n"
--- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h
@@ -55,7 +55,7 @@ static const char* satClipKernelsCL= \
 "} Contact4;\n"
 "\n"
 "\n"
-"///keep this in sync with b3Collidable.h\n"
+"///keep this in sync with btCollidable.h\n"
 "typedef struct\n"
 "{\n"
 "	int m_numChildShapes;\n"
@@ -63,7 +63,7 @@ static const char* satClipKernelsCL= \
 "	int m_shapeType;\n"
 "	int m_shapeIndex;\n"
 "	\n"
-"} b3CollidableGpu;\n"
+"} btCollidableGpu;\n"
 "\n"
 "typedef struct\n"
 "{\n"
@@ -73,7 +73,7 @@ static const char* satClipKernelsCL= \
 "	int m_unused0;\n"
 "	int m_unused1;\n"
 "	int m_unused2;\n"
-"} b3GpuChildShape;\n"
+"} btGpuChildShape;\n"
 "\n"
 "#define GET_NPOINTS(x) (x).m_worldNormal.w\n"
 "\n"
@@ -115,7 +115,7 @@ static const char* satClipKernelsCL= \
 "	float4 m_plane;\n"
 "	int m_indexOffset;\n"
 "	int m_numIndices;\n"
-"} b3GpuFace;\n"
+"} btGpuFace;\n"
 "\n"
 "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
 "\n"
@@ -357,7 +357,7 @@ static const char* satClipKernelsCL= \
 "	float4* worldVertsB2, int capacityWorldVertsB2,\n"
 "	const float minDist, float maxDist,\n"
 "	__global const float4* vertices,\n"
-"	__global const b3GpuFace* faces,\n"
+"	__global const btGpuFace* faces,\n"
 "	__global const int* indices,\n"
 "	float4* contactsOut,\n"
 "	int contactCapacity)\n"
@@ -392,7 +392,7 @@ static const char* satClipKernelsCL= \
 "	if (closestFaceA<0)\n"
 "		return numContactsOut;\n"
 "\n"
-"	b3GpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
+"	btGpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n"
 "\n"
 "	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
 "	int numVerticesA = polyA.m_numIndices;\n"
@@ -416,7 +416,7 @@ static const char* satClipKernelsCL= \
 "		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
 "		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
 "\n"
-"		//b3Swap(pVtxIn,pVtxOut);\n"
+"		//btSwap(pVtxIn,pVtxOut);\n"
 "		float4* tmp = pVtxOut;\n"
 "		pVtxOut = pVtxIn;\n"
 "		pVtxIn = tmp;\n"
@@ -458,10 +458,10 @@ static const char* satClipKernelsCL= \
 "	float4* worldVertsB2, int capacityWorldVertsB2,\n"
 "	const float minDist, float maxDist,\n"
 "	const float4* verticesA,\n"
-"	const b3GpuFace* facesA,\n"
+"	const btGpuFace* facesA,\n"
 "	const int* indicesA,\n"
 "	__global const float4* verticesB,\n"
-"	__global const b3GpuFace* facesB,\n"
+"	__global const btGpuFace* facesB,\n"
 "	__global const int* indicesB,\n"
 "	float4* contactsOut,\n"
 "	int contactCapacity)\n"
@@ -496,7 +496,7 @@ static const char* satClipKernelsCL= \
 "	if (closestFaceA<0)\n"
 "		return numContactsOut;\n"
 "\n"
-"	b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
+"	btGpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];\n"
 "\n"
 "	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n"
 "	int numVerticesA = polyA.m_numIndices;\n"
@@ -520,7 +520,7 @@ static const char* satClipKernelsCL= \
 "		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n"
 "		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n"
 "\n"
-"		//b3Swap(pVtxIn,pVtxOut);\n"
+"		//btSwap(pVtxIn,pVtxOut);\n"
 "		float4* tmp = pVtxOut;\n"
 "		pVtxOut = pVtxIn;\n"
 "		pVtxIn = tmp;\n"
@@ -561,7 +561,7 @@ static const char* satClipKernelsCL= \
 "	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
 "	const float minDist, float maxDist,\n"
 "	__global const float4* vertices,\n"
-"	__global const b3GpuFace* faces,\n"
+"	__global const btGpuFace* faces,\n"
 "	__global const int* indices,\n"
 "	float4*	localContactsOut,\n"
 "	int localContactCapacity)\n"
@@ -589,7 +589,7 @@ static const char* satClipKernelsCL= \
 "	}\n"
 "\n"
 "	{\n"
-"		const b3GpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
+"		const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
 "		const int numVertices = polyB.m_numIndices;\n"
 "		for(int e0=0;e0<numVertices;e0++)\n"
 "		{\n"
@@ -617,10 +617,10 @@ static const char* satClipKernelsCL= \
 "	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n"
 "	const float minDist, float maxDist,\n"
 "	const float4* verticesA,\n"
-"	const b3GpuFace* facesA,\n"
+"	const btGpuFace* facesA,\n"
 "	const int* indicesA,\n"
 "	__global const float4* verticesB,\n"
-"	__global const b3GpuFace* facesB,\n"
+"	__global const btGpuFace* facesB,\n"
 "	__global const int* indicesB,\n"
 "	float4*	localContactsOut,\n"
 "	int localContactCapacity)\n"
@@ -648,7 +648,7 @@ static const char* satClipKernelsCL= \
 "	}\n"
 "\n"
 "	{\n"
-"		const b3GpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
+"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
 "		const int numVertices = polyB.m_numIndices;\n"
 "		for(int e0=0;e0<numVertices;e0++)\n"
 "		{\n"
@@ -956,11 +956,11 @@ static const char* satClipKernelsCL= \
 "\n"
 "__kernel void   clipHullHullKernel( __global const int2* pairs, \n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
 "																					__global const float4* separatingNormals,\n"
 "																					__global const int* hasSeparatingAxis,\n"
@@ -1053,13 +1053,13 @@ static const char* satClipKernelsCL= \
 "\n"
 "__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
-"																					__global const b3GpuChildShape* gpuChildShapes,\n"
+"																					__global const btGpuChildShape* gpuChildShapes,\n"
 "																					__global const float4* gpuCompoundSepNormalsOut,\n"
 "																					__global const int* gpuHasCompoundSepNormalsOut,\n"
 "																					__global Contact4* restrict globalContactsOut,\n"
@@ -1185,7 +1185,7 @@ static const char* satClipKernelsCL= \
 "\n"
 "__kernel void   sphereSphereCollisionKernel( __global const int2* pairs, \n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const float4* separatingNormals,\n"
 "																					__global const int* hasSeparatingAxis,\n"
 "																					__global Contact4* restrict globalContactsOut,\n"
@@ -1252,13 +1252,13 @@ static const char* satClipKernelsCL= \
 "\n"
 "__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
-"																					__global const b3GpuChildShape* gpuChildShapes,\n"
+"																					__global const btGpuChildShape* gpuChildShapes,\n"
 "																					__global const float4* separatingNormals,\n"
 "																					__global Contact4* restrict globalContactsOut,\n"
 "																					counter32_t nGlobalContactsOut,\n"
@@ -1306,7 +1306,7 @@ static const char* satClipKernelsCL= \
 "		convexPolyhedronA.m_vertexOffset = 0;\n"
 "		float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
 "\n"
-"		b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
+"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
 "		\n"
 "		float4 verticesA[3];\n"
 "		for (int i=0;i<3;i++)\n"
@@ -1335,7 +1335,7 @@ static const char* satClipKernelsCL= \
 "                                  \n"
 "		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
 "                             \n"
-"		b3GpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
+"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
 "		int indicesA[3+3+2+2+2];\n"
 "		int curUsedIndices=0;\n"
 "		int fidx=0;\n"
@@ -1496,7 +1496,7 @@ static const char* satClipKernelsCL= \
 "                      int capacityWorldVerts,\n"
 "                      const float minDist, float maxDist,\n"
 "                      __global const float4* vertices,\n"
-"                      __global const b3GpuFace* faces,\n"
+"                      __global const btGpuFace* faces,\n"
 "                      __global const int* indices,\n"
 "                      __global int4* clippingFaces, int pairIndex)\n"
 "{\n"
@@ -1523,7 +1523,7 @@ static const char* satClipKernelsCL= \
 "	}\n"
 "    \n"
 "	{\n"
-"		const b3GpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
+"		const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n"
 "		const int numVertices = polyB.m_numIndices;\n"
 "		for(int e0=0;e0<numVertices;e0++)\n"
 "		{\n"
@@ -1664,11 +1664,11 @@ static const char* satClipKernelsCL= \
 "\n"
 "__kernel void   findClippingFacesKernel(  __global const int2* pairs,\n"
 "                                        __global const BodyData* rigidBodies,\n"
-"                                        __global const b3CollidableGpu* collidables,\n"
+"                                        __global const btCollidableGpu* collidables,\n"
 "                                        __global const ConvexPolyhedronCL* convexShapes,\n"
 "                                        __global const float4* vertices,\n"
 "                                        __global const float4* uniqueEdges,\n"
-"                                        __global const b3GpuFace* faces,\n"
+"                                        __global const btGpuFace* faces,\n"
 "                                        __global const int* indices,\n"
 "                                        __global const float4* separatingNormals,\n"
 "                                        __global const int* hasSeparatingAxis,\n"
--- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h
@@ -1,6 +1,6 @@
 //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
 static const char* satKernelsCL= \
-"//keep this enum in sync with the CPU version (in b3Collidable.h)\n"
+"//keep this enum in sync with the CPU version (in btCollidable.h)\n"
 "//written by Erwin Coumans\n"
 "\n"
 "\n"
@@ -13,7 +13,7 @@ static const char* satKernelsCL= \
 "\n"
 "typedef unsigned int u32;\n"
 "\n"
-"///keep this in sync with b3Collidable.h\n"
+"///keep this in sync with btCollidable.h\n"
 "typedef struct\n"
 "{\n"
 "	int m_numChildShapes;\n"
@@ -21,7 +21,7 @@ static const char* satKernelsCL= \
 "	int m_shapeType;\n"
 "	int m_shapeIndex;\n"
 "	\n"
-"} b3CollidableGpu;\n"
+"} btCollidableGpu;\n"
 "\n"
 "typedef struct\n"
 "{\n"
@@ -31,7 +31,7 @@ static const char* satKernelsCL= \
 "	int m_unused0;\n"
 "	int m_unused1;\n"
 "	int m_unused2;\n"
-"} b3GpuChildShape;\n"
+"} btGpuChildShape;\n"
 "\n"
 "\n"
 "typedef struct\n"
@@ -80,14 +80,14 @@ static const char* satKernelsCL= \
 "		float   m_maxElems[4];\n"
 "		int			m_maxIndices[4];\n"
 "	};\n"
-"} b3AabbCL;\n"
+"} btAabbCL;\n"
 "\n"
 "typedef struct\n"
 "{\n"
 "	float4 m_plane;\n"
 "	int m_indexOffset;\n"
 "	int m_numIndices;\n"
-"} b3GpuFace;\n"
+"} btGpuFace;\n"
 "\n"
 "#define make_float4 (float4)\n"
 "\n"
@@ -296,12 +296,12 @@ static const char* satKernelsCL= \
 "	\n"
 "	const float4* verticesA, \n"
 "	const float4* uniqueEdgesA, \n"
-"	const b3GpuFace* facesA,\n"
+"	const btGpuFace* facesA,\n"
 "	const int*  indicesA,\n"
 "\n"
 "	__global const float4* verticesB, \n"
 "	__global const float4* uniqueEdgesB, \n"
-"	__global const b3GpuFace* facesB,\n"
+"	__global const btGpuFace* facesB,\n"
 "	__global const int*  indicesB,\n"
 "	float4* sep,\n"
 "	float* dmin)\n"
@@ -348,11 +348,11 @@ static const char* satKernelsCL= \
 "	const float4 DeltaC2,\n"
 "	__global const float4* verticesA, \n"
 "	__global const float4* uniqueEdgesA, \n"
-"	__global const b3GpuFace* facesA,\n"
+"	__global const btGpuFace* facesA,\n"
 "	__global const int*  indicesA,\n"
 "	const float4* verticesB,\n"
 "	const float4* uniqueEdgesB, \n"
-"	const b3GpuFace* facesB,\n"
+"	const btGpuFace* facesB,\n"
 "	const int*  indicesB,\n"
 "	float4* sep,\n"
 "	float* dmin)\n"
@@ -401,11 +401,11 @@ static const char* satKernelsCL= \
 "	const float4 DeltaC2,\n"
 "	const float4* verticesA, \n"
 "	const float4* uniqueEdgesA, \n"
-"	const b3GpuFace* facesA,\n"
+"	const btGpuFace* facesA,\n"
 "	const int*  indicesA,\n"
 "	__global const float4* verticesB, \n"
 "	__global const float4* uniqueEdgesB, \n"
-"	__global const b3GpuFace* facesB,\n"
+"	__global const btGpuFace* facesB,\n"
 "	__global const int*  indicesB,\n"
 "		float4* sep,\n"
 "	float* dmin)\n"
@@ -507,7 +507,7 @@ static const char* satKernelsCL= \
 "	const float4 DeltaC2,\n"
 "	__global const float4* vertices, \n"
 "	__global const float4* uniqueEdges, \n"
-"	__global const b3GpuFace* faces,\n"
+"	__global const btGpuFace* faces,\n"
 "	__global const int*  indices,\n"
 "	float4* sep,\n"
 "	float* dmin)\n"
@@ -566,7 +566,7 @@ static const char* satKernelsCL= \
 "	const float4 DeltaC2,\n"
 "	__global const float4* vertices, \n"
 "	__global const float4* uniqueEdges, \n"
-"	__global const b3GpuFace* faces,\n"
+"	__global const btGpuFace* faces,\n"
 "	__global const int*  indices,\n"
 "	float4* sep,\n"
 "	float* dmin)\n"
@@ -643,14 +643,14 @@ static const char* satKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
-"																					__global b3AabbCL* aabbs,\n"
-"																					__global const b3GpuChildShape* gpuChildShapes,\n"
+"																					__global btAabbCL* aabbs,\n"
+"																					__global const btGpuChildShape* gpuChildShapes,\n"
 "																					__global volatile float4* gpuCompoundSepNormalsOut,\n"
 "																					__global volatile int* gpuHasCompoundSepNormalsOut,\n"
 "																					int numCompoundPairs\n"
@@ -760,14 +760,14 @@ static const char* satKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   findCompoundPairsKernel( __global const int2* pairs, \n"
 "	__global const BodyData* rigidBodies, \n"
-"	__global const b3CollidableGpu* collidables,\n"
+"	__global const btCollidableGpu* collidables,\n"
 "	__global const ConvexPolyhedronCL* convexShapes, \n"
 "	__global const float4* vertices,\n"
 "	__global const float4* uniqueEdges,\n"
-"	__global const b3GpuFace* faces,\n"
+"	__global const btGpuFace* faces,\n"
 "	__global const int* indices,\n"
-"	__global b3AabbCL* aabbs,\n"
-"	__global const b3GpuChildShape* gpuChildShapes,\n"
+"	__global btAabbCL* aabbs,\n"
+"	__global const btGpuChildShape* gpuChildShapes,\n"
 "	__global volatile int4* gpuCompoundPairsOut,\n"
 "	__global volatile int* numCompoundPairsOut,\n"
 "	int numPairs,\n"
@@ -942,13 +942,13 @@ static const char* satKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   findSeparatingAxisKernel( __global const int2* pairs, \n"
 "																					__global const BodyData* rigidBodies, \n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
-"																					__global b3AabbCL* aabbs,\n"
+"																					__global btAabbCL* aabbs,\n"
 "																					__global volatile float4* separatingNormals,\n"
 "																					__global volatile int* hasSeparatingAxis,\n"
 "																					int numPairs\n"
@@ -1056,14 +1056,14 @@ static const char* satKernelsCL= \
 "// work-in-progress\n"
 "__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n"
 "																					__global const BodyData* rigidBodies,\n"
-"																					__global const b3CollidableGpu* collidables,\n"
+"																					__global const btCollidableGpu* collidables,\n"
 "																					__global const ConvexPolyhedronCL* convexShapes, \n"
 "																					__global const float4* vertices,\n"
 "																					__global const float4* uniqueEdges,\n"
-"																					__global const b3GpuFace* faces,\n"
+"																					__global const btGpuFace* faces,\n"
 "																					__global const int* indices,\n"
-"																					__global const b3GpuChildShape* gpuChildShapes,\n"
-"																					__global b3AabbCL* aabbs,\n"
+"																					__global const btGpuChildShape* gpuChildShapes,\n"
+"																					__global btAabbCL* aabbs,\n"
 "																					__global float4* concaveSeparatingNormalsOut,\n"
 "																					int numConcavePairs\n"
 "																					)\n"
@@ -1106,9 +1106,9 @@ static const char* satKernelsCL= \
 "	convexPolyhedronA.m_vertexOffset = 0;\n"
 "	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n"
 "\n"
-"	b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
+"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n"
 "	float4 triMinAabb, triMaxAabb;\n"
-"	b3AabbCL triAabb;\n"
+"	btAabbCL triAabb;\n"
 "	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n"
 "	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n"
 "	\n"
@@ -1153,7 +1153,7 @@ static const char* satKernelsCL= \
 "                                  \n"
 "		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n"
 "                             \n"
-"		b3GpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
+"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n"
 "		int indicesA[3+3+2+2+2];\n"
 "		int curUsedIndices=0;\n"
 "		int fidx=0;\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h
@@ -210,7 +210,7 @@ static const char* batchingKernelsNewCL= \
 "\n"
 "					if (i!=numValidConstraints)\n"
 "					{\n"
-"						//b3Swap(cs[i],cs[numValidConstraints]);\n"
+"						//btSwap(cs[i],cs[numValidConstraints]);\n"
 "						\n"
 "						Contact4 tmp = cs[i];\n"
 "						cs[i] = cs[numValidConstraints];\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h
@@ -47,7 +47,7 @@ static const char* integrateKernelCL= \
 "  integrateTransformsKernel( __global Body* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n"
 "{\n"
 "	int nodeID = get_global_id(0);\n"
-"	float B3_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
+"	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n"
 "	if( nodeID < numNodes && (bodies[nodeID].m_invMass != 0.f))\n"
 "	{\n"
 "		//angular velocity\n"
@@ -61,9 +61,9 @@ static const char* integrateKernelCL= \
 "			float4 angvel = bodies[nodeID].m_angVel;\n"
 "			float fAngle = native_sqrt(dot(angvel, angvel));\n"
 "			//limit the angular motion\n"
-"			if(fAngle*timeStep > B3_GPU_ANGULAR_MOTION_THRESHOLD)\n"
+"			if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n"
 "			{\n"
-"				fAngle = B3_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n"
+"				fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n"
 "			}\n"
 "			if(fAngle < 0.001f)\n"
 "			{\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h
@@ -313,8 +313,8 @@ static const char* solveContactCL= \
 "	}\n"
 "}\n"
 "\n"
-"void b3PlaneSpace1 (const float4* n, float4* p, float4* q);\n"
-" void b3PlaneSpace1 (const float4* n, float4* p, float4* q)\n"
+"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n"
+" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n"
 "{\n"
 "  if (fabs(n[0].z) > 0.70710678f) {\n"
 "    // choose p in y-z plane\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h
@@ -265,8 +265,8 @@ static const char* solveFrictionCL= \
 "	float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n"
 "	return -1.f/(jmj0+jmj1+jmj2+jmj3);\n"
 "}\n"
-"void b3PlaneSpace1 (const float4* n, float4* p, float4* q);\n"
-" void b3PlaneSpace1 (const float4* n, float4* p, float4* q)\n"
+"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n"
+" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n"
 "{\n"
 "  if (fabs(n[0].z) > 0.70710678f) {\n"
 "    // choose p in y-z plane\n"
@@ -347,7 +347,7 @@ static const char* solveFrictionCL= \
 "			float4 n = -cs->m_linear;\n"
 "			\n"
 "			float4 tangent[2];\n"
-"			b3PlaneSpace1(&n,&tangent[0],&tangent[1]);\n"
+"			btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n"
 "			float4 angular0, angular1, linear;\n"
 "			float4 r0 = center - posA;\n"
 "			float4 r1 = center - posB;\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h
@@ -489,8 +489,8 @@ static const char* solverSetupCL= \
 "} ConstBufferSSD;\n"
 "\n"
 "\n"
-"void b3PlaneSpace1 (float4 n, float4* p, float4* q);\n"
-" void b3PlaneSpace1 (float4 n, float4* p, float4* q)\n"
+"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
+" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
 "{\n"
 "  if (fabs(n.z) > 0.70710678f) {\n"
 "    // choose p in y-z plane\n"
@@ -577,7 +577,7 @@ static const char* solverSetupCL= \
 "		center /= (float)src->m_worldNormal.w;\n"
 "\n"
 "		float4 tangent[2];\n"
-"		b3PlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
+"		btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
 "		\n"
 "		float4 r[2];\n"
 "		r[0] = center - posA;\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h
@@ -488,8 +488,8 @@ static const char* solverUtilsCL= \
 "}\n"
 "\n"
 "\n"
-"void b3PlaneSpace1 (float4 n, float4* p, float4* q);\n"
-" void b3PlaneSpace1 (float4 n, float4* p, float4* q)\n"
+"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n"
+" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n"
 "{\n"
 "  if (fabs(n.z) > 0.70710678f) {\n"
 "    // choose p in y-z plane\n"
@@ -739,7 +739,7 @@ static const char* solverUtilsCL= \
 "			float4 n = -cs->m_linear;\n"
 "			\n"
 "			float4 tangent[2];\n"
-"			b3PlaneSpace1(n,&tangent[0],&tangent[1]);\n"
+"			btPlaneSpace1(n,&tangent[0],&tangent[1]);\n"
 "			float4 angular0, angular1, linear;\n"
 "			float4 r0 = center - posA;\n"
 "			float4 r1 = center - posB;\n"
@@ -896,7 +896,7 @@ static const char* solverUtilsCL= \
 "		center /= (float)src->m_worldNormal.w;\n"
 "\n"
 "		float4 tangent[2];\n"
-"		b3PlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
+"		btPlaneSpace1(src->m_worldNormal,&tangent[0],&tangent[1]);\n"
 "		\n"
 "		float4 r[2];\n"
 "		r[0] = center - posA;\n"
--- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h
+++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h
@@ -120,7 +120,7 @@ static const char* updateAabbsKernelCL= \
 "	float			fy;\n"
 "	float			fz;\n"
 "	int	uw;\n"
-"} b3AABBCL;\n"
+"} btAABBCL;\n"
 "\n"
 "__inline\n"
 "Matrix3x3 mtTranspose(Matrix3x3 m)\n"
@@ -156,7 +156,7 @@ static const char* updateAabbsKernelCL= \
 "}\n"
 "\n"
 "\n"
-"__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)\n"
+"__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n"
 "{\n"
 "	int nodeID = get_global_id(0);\n"
 "		\n"
@@ -171,8 +171,8 @@ static const char* updateAabbsKernelCL= \
 "			\n"
 "		if (shapeIndex>=0)\n"
 "		{\n"
-"			b3AABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n"
-"			b3AABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n"
+"			btAABBCL minAabb = plocalShapeAABB[collidableIndex*2];\n"
+"			btAABBCL maxAabb = plocalShapeAABB[collidableIndex*2+1];\n"
 "				\n"
 "			float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;\n"
 "			float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;\n"