support compound versus compound collision shape acceleration on GPU, using aabb tree versus aabb tree.

Remove constructor from b3Vector3, to make it a POD type, so it can go into a union (and more compatible with OpenCL float4) Use b3MakeVector3 instead of constructor Share some code between C++ and GPU in a shared file: see b3TransformAabb2 in src/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h Improve PairBench a bit, show timings and #overlapping pairs. Increase shadowmap default size to 8192x8192 (hope the GPU supports it)
2013-08-20 03:19:59 -07:00
parent 41ba48b10d
commit 677722bba3
62 changed files with 1827 additions and 564 deletions
--- a/src/Bullet3Common/b3Logging.cpp
+++ b/src/Bullet3Common/b3Logging.cpp
@@ -18,22 +18,44 @@ subject to the following restrictions:
 #include <stdio.h>
 #include <stdarg.h>

+#ifdef _WIN32
+#include <Windows.h>
+#endif //_WIN32
+

 void b3PrintfFuncDefault(const char* msg)
 {
+#ifdef _WIN32
+	OutputDebugStringA(msg);
+#else
 	printf("%s",msg);
+#endif
+
 }

 void b3WarningMessageFuncDefault(const char* msg)
 {
+#ifdef _WIN32
+	OutputDebugStringA(msg);
+#else
 	printf("%s",msg);
+#endif
+
 }

+
 void b3ErrorMessageFuncDefault(const char* msg)
 {
+#ifdef _WIN32
+	OutputDebugStringA(msg);
+#else
 	printf("%s",msg);
+#endif
+    
 }

+
+
 static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault;
 static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault;
 static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault;
--- a/src/Bullet3Common/b3Matrix3x3.h
+++ b/src/Bullet3Common/b3Matrix3x3.h
@@ -128,7 +128,7 @@ public:
 	*  @param i Column number 0 indexed */
 	B3_FORCE_INLINE b3Vector3 getColumn(int i) const
 	{
-		return b3Vector3(m_el[0][i],m_el[1][i],m_el[2][i]);
+		return b3MakeVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
 	}


@@ -1019,7 +1019,7 @@ b3Matrix3x3::adjoint() const
 B3_FORCE_INLINE b3Matrix3x3 
 b3Matrix3x3::inverse() const
 {
-	b3Vector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
+	b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
 	b3Scalar det = (*this)[0].dot(co);
 	b3FullAssert(det != b3Scalar(0.0));
 	b3Scalar s = b3Scalar(1.0) / det;
@@ -1143,7 +1143,7 @@ operator*(const b3Matrix3x3& m, const b3Vector3& v)
 #if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
    return v.dot3(m[0], m[1], m[2]);
 #else
-	return b3Vector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
+	return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
 #endif
 }

@@ -1184,7 +1184,7 @@ operator*(const b3Vector3& v, const b3Matrix3x3& m)
    
    return b3Vector3(c0);
 #else
-	return b3Vector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
+	return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
 #endif
 }

--- a/src/Bullet3Common/b3Quaternion.h
+++ b/src/Bullet3Common/b3Quaternion.h
@@ -407,9 +407,9 @@ public:
 		b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
 		
 		if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
-			return b3Vector3(1.0, 0.0, 0.0);  // Arbitrary
+			return b3MakeVector3(1.0, 0.0, 0.0);  // Arbitrary
 		b3Scalar s = 1.f/b3Sqrt(s_squared);
-		return b3Vector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
+		return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 	}

 	/**@brief Return the inverse of this quaternion */
@@ -848,7 +848,7 @@ b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
 #elif defined(B3_USE_NEON)
    return b3Vector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
 #else	
-	return b3Vector3(q.getX(),q.getY(),q.getZ());
+	return b3MakeVector3(q.getX(),q.getY(),q.getZ());
 #endif
 }

--- a/src/Bullet3Common/b3Transform.h
+++ b/src/Bullet3Common/b3Transform.h
@@ -45,7 +45,7 @@ public:
   * @param q Rotation from quaternion 
   * @param c Translation from Vector (default 0,0,0) */
 	explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q, 
-		const b3Vector3& c = b3Vector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) 
+		const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) 
 		: m_basis(q),
 		m_origin(c)
 	{}
@@ -54,7 +54,7 @@ public:
   * @param b Rotation from Matrix 
   * @param c Translation from Vector default (0,0,0)*/
 	explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b, 
-		const b3Vector3& c = b3Vector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
+		const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
 		: m_basis(b),
 		m_origin(c)
 	{}
--- a/src/Bullet3Common/b3TransformUtil.h
+++ b/src/Bullet3Common/b3TransformUtil.h
@@ -24,7 +24,7 @@ subject to the following restrictions:

 B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents,const b3Vector3& supportDir)
 {
-	return b3Vector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
+	return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
      supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(),
      supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ()); 
 }
@@ -99,12 +99,12 @@ public:
 		b3Quaternion orn1 = orn0.nearest(orn1a);
 		b3Quaternion dorn = orn1 * orn0.inverse();
 		angle = dorn.getAngle();
-		axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ());
+		axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
 		axis[3] = b3Scalar(0.);
 		//check for axis length
 		b3Scalar len = axis.length2();
 		if (len < B3_EPSILON*B3_EPSILON)
-			axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
+			axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
 		else
 			axis /= b3Sqrt(len);
 	}
@@ -128,12 +128,12 @@ public:
 		dorn.normalize();
 		
 		angle = dorn.getAngle();
-		axis = b3Vector3(dorn.getX(),dorn.getY(),dorn.getZ());
+		axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
 		axis[3] = b3Scalar(0.);
 		//check for axis length
 		b3Scalar len = axis.length2();
 		if (len < B3_EPSILON*B3_EPSILON)
-			axis = b3Vector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
+			axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
 		else
 			axis /= b3Sqrt(len);
 	}
--- a/src/Bullet3Common/b3Vector3.h
+++ b/src/Bullet3Common/b3Vector3.h
@@ -22,6 +22,8 @@ subject to the following restrictions:
 #include "b3MinMax.h"
 #include "b3AlignedAllocator.h"

+
+
 #ifdef B3_USE_DOUBLE_PRECISION
 #define b3Vector3Data b3Vector3DoubleData
 #define b3Vector3DataName "b3Vector3DoubleData"
@@ -71,84 +73,78 @@ const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x

 #endif

+class b3Vector3;
+class b3Vector4;
+inline b3Vector3 b3MakeVector3( b3SimdFloat4 v);
+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z);
+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w);
+inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
+inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w);
+
+
 /**@brief b3Vector3 can be used to represent 3D points and vectors.
 * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
 */
 B3_ATTRIBUTE_ALIGNED16(class) b3Vector3
 {
+public:
+#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
+        union {
+            b3SimdFloat4      mVec128;
+            float	m_floats[4];
+			struct {float x,y,z,w;};
+			
+        };
+#else
+	union
+	{
+        	float	m_floats[4];
+			struct {float	x,y,z,w;};
+	};
+#endif
+
+
 public:

 	B3_DECLARE_ALIGNED_ALLOCATOR();

-#if defined (__SPU__) && defined (__CELLOS_LV2__)
-		b3Scalar	m_floats[4];
-public:
-	B3_FORCE_INLINE const vec_float4&	get128() const
+#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
+
+	/*B3_FORCE_INLINE		b3Vector3()
 	{
-		return *((const vec_float4*)&m_floats[0]);
 	}
-public:
-#else //__CELLOS_LV2__ __SPU__
-    #if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
-        union {
-            b3SimdFloat4      mVec128;
-            b3Scalar	m_floats[4];
-			struct {b3Scalar x,y,z,w;};
-			
-        };
-        B3_FORCE_INLINE	b3SimdFloat4	get128() const
-        {
-            return mVec128;
-        }
-        B3_FORCE_INLINE	void	set128(b3SimdFloat4 v128)
-        {
-            mVec128 = v128;
-        }
-    #else
-	union
-	{
-        	b3Scalar	m_floats[4];
-		struct {b3Scalar x,y,z,w;};
-	};
-    #endif
-#endif //__CELLOS_LV2__ __SPU__
+	*/
+
+    B3_FORCE_INLINE	b3SimdFloat4	get128() const
+    {
+        return mVec128;
+    }
+    B3_FORCE_INLINE	void	set128(b3SimdFloat4 v128)
+    {
+        mVec128 = v128;
+    }
+#endif

 	public:

-  /**@brief No initialization constructor */
-	B3_FORCE_INLINE b3Vector3() 
-	{
-
-	}
-
- 
-	
-  /**@brief Constructor from scalars 
-   * @param x X value
-   * @param y Y value 
-   * @param z Z value 
-   */
-	B3_FORCE_INLINE b3Vector3(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
-	{
-		m_floats[0] = _x;
-		m_floats[1] = _y;
-		m_floats[2] = _z;
-		m_floats[3] = b3Scalar(0.f);
-	}
-
+  
 #if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE) )|| defined (B3_USE_NEON)
-	// Set Vector 
+	/*
+	
 	B3_FORCE_INLINE b3Vector3( b3SimdFloat4 v)
 	{
 		mVec128 = v;
 	}
-
-	// Copy constructor
+		
 	B3_FORCE_INLINE b3Vector3(const b3Vector3& rhs)
 	{
 		mVec128 = rhs.mVec128;
 	}
+	*/
+
+
+

 	// Assignment Operator
 	B3_FORCE_INLINE b3Vector3& 
@@ -158,6 +154,12 @@ public:
 		
 		return *this;
 	}
+
+#else
+
+
+
+
 #endif // #if defined (B3_USE_SSE_IN_API) || defined (B3_USE_NEON) 
    
 /**@brief Add a vector to this one 
@@ -352,11 +354,11 @@ public:
 	B3_FORCE_INLINE b3Vector3 absolute() const 
 	{
 #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
-		return b3Vector3(_mm_and_ps(mVec128, b3v3AbsfMask));
+		return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
 #elif defined(B3_USE_NEON)
 		return b3Vector3(vabsq_f32(mVec128));
 #else	
-		return b3Vector3(
+		return b3MakeVector3(
 			b3Fabs(m_floats[0]), 
 			b3Fabs(m_floats[1]), 
 			b3Fabs(m_floats[2]));
@@ -378,7 +380,7 @@ public:
 		V = _mm_sub_ps(V, T);
 		
 		V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
-		return b3Vector3(V);
+		return b3MakeVector3(V);
 #elif defined(B3_USE_NEON)
 		float32x4_t T, V;
 		// form (Y, Z, X, _) of mVec128 and v.mVec128
@@ -397,7 +399,7 @@ public:
 		
 		return b3Vector3(V);
 #else
-		return b3Vector3(
+		return b3MakeVector3(
 			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
 			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
 			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
@@ -517,7 +519,7 @@ public:
 		vl = _mm_mul_ps(vl, vt);
 		vl = _mm_add_ps(vl, mVec128);
 		
-		return b3Vector3(vl);
+		return b3MakeVector3(vl);
 #elif defined(B3_USE_NEON)
 		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
 		vl = vmulq_n_f32(vl, t);
@@ -526,7 +528,7 @@ public:
 		return b3Vector3(vl);
 #else	
 		return 
-			b3Vector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
+			b3MakeVector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
 						m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
 						m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
 #endif
@@ -715,7 +717,7 @@ public:
        r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
        a2 = _mm_and_ps( a2, b3vxyzMaskf);
        r = _mm_add_ps( r, b3CastdTo128f (_mm_move_sd( b3CastfTo128d(a2), b3CastfTo128d(b1) )));
-        return b3Vector3(r);
+        return b3MakeVector3(r);
        
 #elif defined(B3_USE_NEON)
        static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
@@ -728,7 +730,7 @@ public:
        float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
        return b3Vector3( vcombine_f32(b0, b1) );
 #else	
-		return b3Vector3( dot(v0), dot(v1), dot(v2));
+		return b3MakeVector3( dot(v0), dot(v1), dot(v2));
 #endif
    }
 };
@@ -738,11 +740,11 @@ B3_FORCE_INLINE b3Vector3
 operator+(const b3Vector3& v1, const b3Vector3& v2) 
 {
 #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-	return b3Vector3(_mm_add_ps(v1.mVec128, v2.mVec128));
+	return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
 #elif defined(B3_USE_NEON)
-	return b3Vector3(vaddq_f32(v1.mVec128, v2.mVec128));
+	return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
 #else
-	return b3Vector3(
+	return b3MakeVector3(
 			v1.m_floats[0] + v2.m_floats[0], 
 			v1.m_floats[1] + v2.m_floats[1], 
 			v1.m_floats[2] + v2.m_floats[2]);
@@ -754,11 +756,11 @@ B3_FORCE_INLINE b3Vector3
 operator*(const b3Vector3& v1, const b3Vector3& v2) 
 {
 #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-	return b3Vector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
+	return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
 #elif defined(B3_USE_NEON)
-	return b3Vector3(vmulq_f32(v1.mVec128, v2.mVec128));
+	return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
 #else
-	return b3Vector3(
+	return b3MakeVector3(
 			v1.m_floats[0] * v2.m_floats[0], 
 			v1.m_floats[1] * v2.m_floats[1], 
 			v1.m_floats[2] * v2.m_floats[2]);
@@ -773,12 +775,12 @@ operator-(const b3Vector3& v1, const b3Vector3& v2)

 	//	without _mm_and_ps this code causes slowdown in Concave moving
 	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
-	return b3Vector3(_mm_and_ps(r, b3vFFF0fMask));
+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
 #elif defined(B3_USE_NEON)
 	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
-	return b3Vector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
 #else
-	return b3Vector3(
+	return b3MakeVector3(
 			v1.m_floats[0] - v2.m_floats[0], 
 			v1.m_floats[1] - v2.m_floats[1], 
 			v1.m_floats[2] - v2.m_floats[2]);
@@ -791,11 +793,11 @@ operator-(const b3Vector3& v)
 {
 #if (defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
 	__m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
-	return b3Vector3(_mm_and_ps(r, b3vFFF0fMask)); 
+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); 
 #elif defined(B3_USE_NEON)
-	return b3Vector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
+	return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
 #else	
-	return b3Vector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
+	return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
 #endif
 }

@@ -806,12 +808,12 @@ operator*(const b3Vector3& v, const b3Scalar& s)
 #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
 	__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
 	vs = b3_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
-	return b3Vector3(_mm_mul_ps(v.mVec128, vs));
+	return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
 #elif defined(B3_USE_NEON)
 	float32x4_t r = vmulq_n_f32(v.mVec128, s);
-	return b3Vector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
 #else
-	return b3Vector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
+	return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
 #endif
 }

@@ -846,7 +848,7 @@ operator/(const b3Vector3& v1, const b3Vector3& v2)
 #if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE))
 	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
 	vec = _mm_and_ps(vec, b3vFFF0fMask);
-	return b3Vector3(vec); 
+	return b3MakeVector3(vec); 
 #elif defined(B3_USE_NEON)
 	float32x4_t x, y, v, m;

@@ -862,7 +864,7 @@ operator/(const b3Vector3& v1, const b3Vector3& v2)

 	return b3Vector3(v);
 #else
-	return b3Vector3(
+	return b3MakeVector3(
 			v1.m_floats[0] / v2.m_floats[0], 
 			v1.m_floats[1] / v2.m_floats[1],
 			v1.m_floats[2] / v2.m_floats[2]);
@@ -953,7 +955,7 @@ B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Sca

    __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
 	b3Scalar ssin = b3Sin( _angle );
-    __m128 C = wAxis.cross( mVec128 ).mVec128;
+    __m128 C = wAxis.cross( b3MakeVector3(mVec128) ).mVec128;
 	O = _mm_and_ps(O, b3vFFF0fMask);
    b3Scalar scos = b3Cos( _angle );
 	
@@ -975,7 +977,7 @@ B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Sca
 	vcos = vcos * X;
 	O = O + vcos;	
 	
-	return b3Vector3(O);
+	return b3MakeVector3(O);
 #else
 	b3Vector3 o = wAxis * wAxis.dot( *this );
 	b3Vector3 _x = *this - o;
@@ -1069,25 +1071,12 @@ class b3Vector4 : public b3Vector3
 {
 public:

-	B3_FORCE_INLINE b3Vector4() {}
+	


-	B3_FORCE_INLINE b3Vector4(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w) 
-		: b3Vector3(_x,_y,_z)
-	{
-		m_floats[3] = _w;
-	}

 #if (defined (B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined (B3_USE_NEON) 
-	B3_FORCE_INLINE b3Vector4(const b3SimdFloat4 vec)
-	{
-		mVec128 = vec;
-	}
-
-	B3_FORCE_INLINE b3Vector4(const b3Vector3& rhs)
-	{
-		mVec128 = rhs.mVec128;
-	}
+	

 	B3_FORCE_INLINE b3Vector4& 
 	operator=(const b3Vector4& v) 
@@ -1100,11 +1089,11 @@ public:
 	B3_FORCE_INLINE b3Vector4 absolute4() const 
 	{
 #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
-		return b3Vector4(_mm_and_ps(mVec128, b3vAbsfMask));
+		return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
 #elif defined(B3_USE_NEON)
 		return b3Vector4(vabsq_f32(mVec128));
 #else	
-		return b3Vector4(
+		return b3MakeVector4(
 			b3Fabs(m_floats[0]), 
 			b3Fabs(m_floats[1]), 
 			b3Fabs(m_floats[2]),
@@ -1341,4 +1330,46 @@ B3_FORCE_INLINE void	b3Vector3::deSerialize(const struct	b3Vector3Data& dataIn)
 		m_floats[i] = dataIn.m_floats[i];
 }

+
+inline b3Vector3 b3MakeVector3( b3SimdFloat4 v)
+{
+	b3Vector3 tmp;
+	tmp.set128(v);
+	return tmp;
+}
+		
+
+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z)
+{
+	b3Vector3	tmp;
+	tmp.setValue(x,y,z);
+	return tmp;
+}
+
+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w)
+{	
+	b3Vector3	tmp;
+	tmp.setValue(x,y,z);
+	tmp.w = w;
+	return tmp;
+}
+
+inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w)
+{
+	b3Vector4	tmp;
+	tmp.setValue(x,y,z,w);
+	return tmp;
+}
+
+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
+inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
+{
+	b3Vector4	tmp;
+	tmp.set128(vec);
+	return tmp;
+}
+
+#endif
+
+
 #endif //B3_VECTOR3_H
--- a/src/Bullet3Common/premake4.lua
+++ b/src/Bullet3Common/premake4.lua
@@ -5,6 +5,8 @@
 	kind "StaticLib"
 		
 	targetdir "../../bin"
+	
+	includedirs {".."}

 	files {
 		"**.cpp",
--- a/src/Bullet3Common/shared/b3Float4.h
+++ b/src/Bullet3Common/shared/b3Float4.h
@@ -6,8 +6,26 @@
 #ifdef __cplusplus
 	#include "Bullet3Common/b3Vector3.h"
 	#define b3Float4 b3Vector3
+	#define b3Float4ConstArg const b3Vector3&
+	#define b3Dot3F4 b3Dot
+	#define b3Cross3 b3Cross
+	#define	b3MakeFloat4  b3MakeVector3
 #else
 	typedef float4	b3Float4;
+	#define b3Float4ConstArg const b3Float4
+	#define b3MakeFloat4 (float4)
+	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)
+	{
+		float4 a1 = b3MakeFloat4(v0.xyz,0.f);
+		float4 b1 = b3MakeFloat4(v1.xyz,0.f);
+		return dot(a1, b1);
+	}
+	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)
+	{
+		float4 a1 = b3MakeFloat4(v0.xyz,0.f);
+		float4 b1 = b3MakeFloat4(v1.xyz,0.f);
+		return cross(a1, b1);
+	}
 #endif 

 #endif //B3_FLOAT4_H
--- a/src/Bullet3Common/shared/b3Int2.h
+++ b/src/Bullet3Common/shared/b3Int2.h
@@ -16,6 +16,8 @@ subject to the following restrictions:
 #ifndef B3_INT2_H
 #define B3_INT2_H

+#ifdef __cplusplus
+
 struct b3UnsignedInt2
 {
 	union
@@ -52,5 +54,11 @@ inline b3Int2 b3MakeInt2(int x, int y)
 	v.s[0] = x; v.s[1] = y;
 	return v;
 }
+#else

+#define b3UnsignedInt2 uint2
+#define b3Int2 int2
+#define b3MakeInt2 (int2)
+
+#endif //__cplusplus
 #endif
--- a/src/Bullet3Common/shared/b3Mat3x3.h
+++ b/src/Bullet3Common/shared/b3Mat3x3.h
@@ -0,0 +1,75 @@
+
+#ifndef B3_MAT3x3_H
+#define B3_MAT3x3_H
+
+#include "Bullet3Common/shared/b3Quat.h"
+
+
+#ifdef __cplusplus
+
+#include "Bullet3Common/b3Matrix3x3.h"
+
+#define b3Mat3x3 b3Matrix3x3
+#define b3Mat3x3ConstArg const b3Matrix3x3&
+
+inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat)
+{
+	return b3Mat3x3(quat);
+}
+
+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
+{
+	return mat.absolute();
+}
+
+#define b3GetRow(m,row) m.getRow(row)
+
+#else
+
+typedef struct
+{
+	float4 m_row[3];
+}b3Mat3x3;
+
+#define b3Mat3x3ConstArg const b3Mat3x3
+#define b3GetRow(m,row) (m.m_row[row])
+
+inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
+{
+	float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
+	b3Mat3x3 out;
+
+	out.m_row[0].x=1-2*quat2.y-2*quat2.z;
+	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;
+	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;
+	out.m_row[0].w = 0.f;
+
+	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;
+	out.m_row[1].y=1-2*quat2.x-2*quat2.z;
+	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;
+	out.m_row[1].w = 0.f;
+
+	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;
+	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;
+	out.m_row[2].z=1-2*quat2.x-2*quat2.y;
+	out.m_row[2].w = 0.f;
+
+	return out;
+}
+
+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
+{
+	b3Mat3x3 out;
+	out.m_row[0] = fabs(matIn.m_row[0]);
+	out.m_row[1] = fabs(matIn.m_row[1]);
+	out.m_row[2] = fabs(matIn.m_row[2]);
+	return out;
+}
+
+#endif
+
+
+
+
+
+#endif //B3_MAT3x3_H
--- a/src/Bullet3Common/shared/b3Quat.h
+++ b/src/Bullet3Common/shared/b3Quat.h
@@ -2,12 +2,76 @@
 #define B3_QUAT_H

 #include "Bullet3Common/shared/b3PlatformDefinitions.h"
+#include "Bullet3Common/shared/b3Float4.h"

 #ifdef __cplusplus
 	#include "Bullet3Common/b3Quaternion.h"
+	#include "Bullet3Common/b3Transform.h"
+
 	#define b3Quat b3Quaternion
+	#define b3QuatConstArg const b3Quaternion&
+
+	inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)
+	{
+		b3Transform tr;
+		tr.setOrigin(translation);
+		tr.setRotation(orientation);
+		return tr(point);
+	}
+
 #else
 	typedef float4	b3Quat;
+	#define b3QuatConstArg const b3Quat
+	
+	
+inline float4 b3FastNormalize4(float4 v)
+{
+	v = (float4)(v.xyz,0.f);
+	return fast_normalize(v);
+}
+	
+inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
+inline b3Quat b3QuatNormalize(b3QuatConstArg in);
+inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
+inline b3Quat b3QuatInvert(b3QuatConstArg q);
+inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
+{
+	b3Quat ans;
+	ans = b3Cross3( a, b );
+	ans += a.w*b+b.w*a;
+//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
+	ans.w = a.w*b.w - b3Dot3F4(a, b);
+	return ans;
+}
+
+inline b3Quat b3QuatNormalize(b3QuatConstArg in)
+{
+	return b3FastNormalize4(in);
+}
+inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
+{
+	b3Quat qInv = b3QuatInvert( q );
+	float4 vcpy = vec;
+	vcpy.w = 0.f;
+	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);
+	return out;
+}
+
+inline b3Quat b3QuatInvert(b3QuatConstArg q)
+{
+	return (b3Quat)(-q.xyz, q.w);
+}
+
+inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)
+{
+	return b3QuatRotate( b3QuatInvert( q ), vec );
+}
+
+inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)
+{
+	return b3QuatRotate( orientation, point ) + (translation);
+}
+	
 #endif 

 #endif //B3_QUAT_H