use newton-rapson iteration for more precise normalize() method
This commit is contained in:
@@ -2060,7 +2060,10 @@ inline Quat::Quat( const Matrix3 & tfrm )
|
||||
diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
|
||||
diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
|
||||
radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
|
||||
invSqrt = rsqrtf4( radicand );
|
||||
// invSqrt = rsqrtf4( radicand );
|
||||
invSqrt = newtonrapson_rsqrt4( radicand );
|
||||
|
||||
|
||||
|
||||
zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03
|
||||
//zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
|
||||
|
||||
@@ -595,11 +595,17 @@ inline const floatInVec length( const Vector3 &vec )
|
||||
return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
|
||||
}
|
||||
|
||||
inline const Vector3 normalize( const Vector3 &vec )
|
||||
|
||||
inline const Vector3 normalizeApprox( const Vector3 &vec )
|
||||
{
|
||||
return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
|
||||
}
|
||||
|
||||
inline const Vector3 normalize( const Vector3 &vec )
|
||||
{
|
||||
return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
|
||||
}
|
||||
|
||||
inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
|
||||
{
|
||||
return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
|
||||
@@ -1021,11 +1027,16 @@ inline const floatInVec length( const Vector4 &vec )
|
||||
return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
|
||||
}
|
||||
|
||||
inline const Vector4 normalize( const Vector4 &vec )
|
||||
inline const Vector4 normalizeApprox( const Vector4 &vec )
|
||||
{
|
||||
return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
|
||||
}
|
||||
|
||||
inline const Vector4 normalize( const Vector4 &vec )
|
||||
{
|
||||
return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
|
||||
}
|
||||
|
||||
inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
|
||||
{
|
||||
return select( vec0, vec1, boolInVec(select1) );
|
||||
|
||||
@@ -153,6 +153,15 @@ static inline __m128 vec_ctf(__m128 x, int a)
|
||||
#define recipf4(x) _mm_rcp_ps( x )
|
||||
#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
|
||||
|
||||
static __forceinline __m128 newtonrapson_rsqrt4( const __m128 v )
|
||||
{
|
||||
#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f)
|
||||
#define _three _mm_setr_ps(3.f,3.f,3.f,3.f)
|
||||
const __m128 approx = _mm_rsqrt_ps( v );
|
||||
const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx);
|
||||
return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) );
|
||||
}
|
||||
|
||||
static inline __m128 acosf4(__m128 x)
|
||||
{
|
||||
__m128 xabs = fabsf4(x);
|
||||
|
||||
Reference in New Issue
Block a user