diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h index 92cbdc45a..120eac502 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h @@ -2060,7 +2060,10 @@ inline Quat::Quat( const Matrix3 & tfrm ) diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) ); - invSqrt = rsqrtf4( radicand ); + // invSqrt = rsqrtf4( radicand ); + invSqrt = newtonrapson_rsqrt4( radicand ); + + zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03 //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX ); diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h index 9ebecacff..4fe957928 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h @@ -595,11 +595,17 @@ inline const floatInVec length( const Vector3 &vec ) return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 ); } -inline const Vector3 normalize( const Vector3 &vec ) + +inline const Vector3 normalizeApprox( const Vector3 &vec ) { return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); } +inline const Vector3 normalize( const Vector3 &vec ) +{ + return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); +} + inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) ); @@ -1021,11 +1027,16 @@ inline const floatInVec length( const Vector4 &vec ) return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 ); } -inline const Vector4 normalize( const Vector4 &vec ) +inline const Vector4 normalizeApprox( const Vector4 &vec ) { return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); } +inline const Vector4 normalize( const Vector4 &vec ) +{ + return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); +} + inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ) { return select( vec0, vec1, boolInVec(select1) ); diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h index c07fa3acc..ebca2094d 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h @@ -153,6 +153,15 @@ static inline __m128 vec_ctf(__m128 x, int a) #define recipf4(x) _mm_rcp_ps( x ) #define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x ) +static __forceinline __m128 newtonrapson_rsqrt4( const __m128 v ) +{ +#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) +#define _three _mm_setr_ps(3.f,3.f,3.f,3.f) +const __m128 approx = _mm_rsqrt_ps( v ); +const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx); +return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) ); +} + static inline __m128 acosf4(__m128 x) { __m128 xabs = fabsf4(x);