From d54423f9c04897bcb9073d648a92a1af01019b98 Mon Sep 17 00:00:00 2001
From: "erwin.coumans" <erwin.coumans@08e121b0-ae19-0410-a57b-3be3395fd4fd>
Date: Fri, 7 Dec 2012 18:16:04 +0000
Subject: [PATCH] Fix compile error in Visual Studio 2005 build: only enable
 SSE/SIMD intrinsics for btVector3 for Visual Studio 2008 and higher

---
 src/LinearMath/btPolarDecomposition.cpp |  4 +--
 src/LinearMath/btScalar.h               | 13 ++++++--
 src/LinearMath/btVector3.cpp            |  4 ++-
 src/LinearMath/btVector3.h              | 41 ++++++++++++-------------
 4 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/src/LinearMath/btPolarDecomposition.cpp b/src/LinearMath/btPolarDecomposition.cpp
index d7de20408..a4dca7fdd 100644
--- a/src/LinearMath/btPolarDecomposition.cpp
+++ b/src/LinearMath/btPolarDecomposition.cpp
@@ -60,10 +60,10 @@ unsigned int btPolarDecomposition::decompose(const btMatrix3x3& a, btMatrix3x3&
       break;
 
     const btScalar gamma = btPow(h_norm / u_norm, 0.25f);
-    const btScalar inv_gamma = 1.0 / gamma;
+    const btScalar inv_gamma = btScalar(1.0) / gamma;
 
     // Determine the delta to 'u'
-    const btMatrix3x3 delta = (u * (gamma - 2.0) + h.transpose() * inv_gamma) * 0.5;
+    const btMatrix3x3 delta = (u * (gamma - btScalar(2.0)) + h.transpose() * inv_gamma) * btScalar(0.5);
 
     // Update the matrices
     u += delta;
diff --git a/src/LinearMath/btScalar.h b/src/LinearMath/btScalar.h
index 3a94054e9..aaa1d6de6 100644
--- a/src/LinearMath/btScalar.h
+++ b/src/LinearMath/btScalar.h
@@ -68,6 +68,10 @@ inline int	btGetVersion()
 		#else
 
 #if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
+			#if _MSC_VER>1400
+				#define BT_USE_SIMD_VECTOR3
+			#endif
+
 			#define BT_USE_SSE
 			#ifdef BT_USE_SSE
 			//BT_USE_SSE_IN_API is disabled under Windows by default, because 
@@ -159,7 +163,8 @@ inline int	btGetVersion()
 
 #if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
     #if defined (__i386__) || defined (__x86_64__)
-        #define BT_USE_SSE
+		#define BT_USE_SIMD_VECTOR3
+		#define BT_USE_SSE
 		//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
 		//if apps run into issues, we will disable the next line
 		#define BT_USE_SSE_IN_API
@@ -178,7 +183,8 @@ inline int	btGetVersion()
     #elif defined( __armv7__ )
         #ifdef __clang__
             #define BT_USE_NEON 1
-
+			#define BT_USE_SIMD_VECTOR3
+		
             #if defined BT_USE_NEON && defined (__clang__)
                 #include <arm_neon.h>
             #endif//BT_USE_NEON
@@ -264,7 +270,8 @@ typedef float btScalar;
 typedef __m128 btSimdFloat4;
 #endif//BT_USE_SSE
 
-#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
+#if defined (BT_USE_SSE)
+//#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
 #ifdef _WIN32
 
 #ifndef BT_NAN
diff --git a/src/LinearMath/btVector3.cpp b/src/LinearMath/btVector3.cpp
index 24bd521a9..40d09c0c3 100644
--- a/src/LinearMath/btVector3.cpp
+++ b/src/LinearMath/btVector3.cpp
@@ -19,10 +19,12 @@
 #define BT_USE_SSE_IN_API
 #endif
 
+
 #include "btVector3.h"
 
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
 
+
+#if defined BT_USE_SIMD_VECTOR3
 #ifdef __APPLE__
 #include <stdint.h>
 typedef  float float4 __attribute__ ((vector_size(16)));
diff --git a/src/LinearMath/btVector3.h b/src/LinearMath/btVector3.h
index 5001dfa9f..b36b49de3 100644
--- a/src/LinearMath/btVector3.h
+++ b/src/LinearMath/btVector3.h
@@ -229,7 +229,7 @@ public:
    * @param v The other vector in the dot product */
 	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)		
+#if defined BT_USE_SIMD_VECTOR3
 		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
 		__m128 z = _mm_movehl_ps(vd, vd);
 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
@@ -345,7 +345,8 @@ public:
   /**@brief Return a vector will the absolute values of each element */
 	SIMD_FORCE_INLINE btVector3 absolute() const 
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
 		return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
 #elif defined(BT_USE_NEON)
 		return btVector3(vabsq_f32(mVec128));
@@ -400,7 +401,7 @@ public:
 
 	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 		// cross:
 		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
 		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
@@ -632,7 +633,7 @@ public:
 
 	void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
  
 		__m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
 		__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
@@ -702,7 +703,7 @@ public:
     /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
     SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
     {
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 
         __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
         __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
@@ -768,7 +769,7 @@ operator*(const btVector3& v1, const btVector3& v2)
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v1, const btVector3& v2)
 {
-#if (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
 
 	//	without _mm_and_ps this code causes slowdown in Concave moving
 	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
@@ -788,7 +789,7 @@ operator-(const btVector3& v1, const btVector3& v2)
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v)
 {
-#if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
 	__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
 	return btVector3(_mm_and_ps(r, btvFFF0fMask)); 
 #elif defined(BT_USE_NEON)
@@ -842,7 +843,7 @@ operator/(const btVector3& v, const btScalar& s)
 SIMD_FORCE_INLINE btVector3
 operator/(const btVector3& v1, const btVector3& v2)
 {
-#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
+#if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
 	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
 	vec = _mm_and_ps(vec, btvFFF0fMask);
 	return btVector3(vec); 
@@ -948,7 +949,7 @@ SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btS
 {
 	// wAxis must be a unit lenght vector
 
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
 
     __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
 	btScalar ssin = btSin( _angle );
@@ -988,7 +989,7 @@ SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btS
 
 SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
 {
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if defined BT_USE_SIMD_VECTOR3
     #if defined _WIN32 || defined (BT_USE_SSE)
         const long scalar_cutoff = 10;
         long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
@@ -996,10 +997,8 @@ SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long arra
         const long scalar_cutoff = 4;
         extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
     #endif
-    if( array_count < scalar_cutoff )
-#else
-	
-#endif//BT_USE_SSE || BT_USE_NEON
+    if( array_count < scalar_cutoff )	
+#endif//BT_USE_SIMD_VECTOR3
     {
         btScalar maxDot = -SIMD_INFINITY;
         int i = 0;
@@ -1018,14 +1017,14 @@ SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long arra
         dotOut = maxDot;
         return ptIndex;
     }
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if defined BT_USE_SIMD_VECTOR3
     return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
-#endif
+#endif//BT_USE_SIMD_VECTOR3
 }
 
 SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
 {
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if defined BT_USE_SIMD_VECTOR3
     #if defined BT_USE_SSE
         const long scalar_cutoff = 10;
         long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
@@ -1037,7 +1036,7 @@ SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long arra
     #endif
     
     if( array_count < scalar_cutoff )
-#endif//BT_USE_SSE || BT_USE_NEON
+#endif//BT_USE_SIMD_VECTOR3
     {
         btScalar  minDot = SIMD_INFINITY;
         int i = 0;
@@ -1058,9 +1057,9 @@ SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long arra
         
         return ptIndex;
     }
-#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+#if defined BT_USE_SIMD_VECTOR3
     return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
-#endif
+#endif//BT_USE_SIMD_VECTOR3
 }
 
 
@@ -1098,7 +1097,7 @@ public:
 
 	SIMD_FORCE_INLINE btVector4 absolute4() const 
 	{
-#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
 		return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
 #elif defined(BT_USE_NEON)
 		return btVector4(vabsq_f32(mVec128));