diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h index d21d25cbb..a11f72a54 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h index e8ac5959e..4c9682410 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h index 120eac502..5b2b71410 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, @@ -62,28 +62,28 @@ namespace Aos { //----------------------------------------------------------------------------- // Definitions -inline Matrix3::Matrix3( const Matrix3 & mat ) +__forceinline Matrix3::Matrix3( const Matrix3 & mat ) { mCol0 = mat.mCol0; mCol1 = mat.mCol1; mCol2 = mat.mCol2; } -inline Matrix3::Matrix3( float scalar ) +__forceinline Matrix3::Matrix3( float scalar ) { mCol0 = Vector3( scalar ); mCol1 = Vector3( scalar ); mCol2 = Vector3( scalar ); } -inline Matrix3::Matrix3( const floatInVec &scalar ) +__forceinline Matrix3::Matrix3( const floatInVec &scalar ) { mCol0 = Vector3( scalar ); mCol1 = Vector3( scalar ); mCol2 = Vector3( scalar ); } -inline Matrix3::Matrix3( const Quat &unitQuat ) +__forceinline Matrix3::Matrix3( const Quat &unitQuat ) { __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2; __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; @@ -114,38 +114,38 @@ inline Matrix3::Matrix3( const Quat &unitQuat ) mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) ); } -inline Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 ) +__forceinline Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 ) { mCol0 = _col0; mCol1 = _col1; mCol2 = _col2; } -inline Matrix3 & Matrix3::setCol0( const Vector3 &_col0 ) +__forceinline Matrix3 & Matrix3::setCol0( const Vector3 &_col0 ) { mCol0 = _col0; return *this; } -inline Matrix3 & Matrix3::setCol1( const Vector3 &_col1 ) +__forceinline Matrix3 & Matrix3::setCol1( const Vector3 &_col1 ) { mCol1 = _col1; return *this; } -inline Matrix3 & Matrix3::setCol2( const Vector3 &_col2 ) +__forceinline Matrix3 & Matrix3::setCol2( const Vector3 &_col2 ) { mCol2 = _col2; return *this; } -inline Matrix3 & Matrix3::setCol( int col, const Vector3 &vec ) +__forceinline Matrix3 & Matrix3::setCol( int col, const Vector3 &vec ) { *(&mCol0 + col) = vec; return *this; } -inline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec ) +__forceinline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec ) { mCol0.setElem( row, vec.getElem( 0 ) ); mCol1.setElem( row, vec.getElem( 1 ) ); @@ -153,13 +153,13 @@ inline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec ) return *this; } -inline Matrix3 & Matrix3::setElem( int col, int row, float val ) +__forceinline Matrix3 & Matrix3::setElem( int col, int row, float val ) { (*this)[col].setElem(row, val); return *this; } -inline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val ) +__forceinline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val ) { Vector3 tmpV3_0; tmpV3_0 = this->getCol( col ); @@ -168,47 +168,47 @@ inline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val ) return *this; } -inline const floatInVec Matrix3::getElem( int col, int row ) const +__forceinline const floatInVec Matrix3::getElem( int col, int row ) const { return this->getCol( col ).getElem( row ); } -inline const Vector3 Matrix3::getCol0( ) const +__forceinline const Vector3 Matrix3::getCol0( ) const { return mCol0; } -inline const Vector3 Matrix3::getCol1( ) const +__forceinline const Vector3 Matrix3::getCol1( ) const { return mCol1; } -inline const Vector3 Matrix3::getCol2( ) const +__forceinline const Vector3 Matrix3::getCol2( ) const { return mCol2; } -inline const Vector3 Matrix3::getCol( int col ) const +__forceinline const Vector3 Matrix3::getCol( int col ) const { return *(&mCol0 + col); } -inline const Vector3 Matrix3::getRow( int row ) const +__forceinline const Vector3 Matrix3::getRow( int row ) const { return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); } -inline Vector3 & Matrix3::operator []( int col ) +__forceinline Vector3 & Matrix3::operator []( int col ) { return *(&mCol0 + col); } -inline const Vector3 Matrix3::operator []( int col ) const +__forceinline const Vector3 Matrix3::operator []( int col ) const { return *(&mCol0 + col); } -inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +__forceinline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) { mCol0 = mat.mCol0; mCol1 = mat.mCol1; @@ -216,7 +216,7 @@ inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) return *this; } -inline const Matrix3 transpose( const Matrix3 & mat ) +__forceinline const Matrix3 transpose( const Matrix3 & mat ) { __m128 tmp0, tmp1, res0, res1, res2; tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); @@ -236,7 +236,7 @@ inline const Matrix3 transpose( const Matrix3 & mat ) ); } -inline const Matrix3 inverse( const Matrix3 & mat ) +__forceinline const Matrix3 inverse( const Matrix3 & mat ) { __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2; tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() ); @@ -265,12 +265,12 @@ inline const Matrix3 inverse( const Matrix3 & mat ) ); } -inline const floatInVec determinant( const Matrix3 & mat ) +__forceinline const floatInVec determinant( const Matrix3 & mat ) { return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); } -inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +__forceinline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const { return Matrix3( ( mCol0 + mat.mCol0 ), @@ -279,7 +279,7 @@ inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const ); } -inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +__forceinline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const { return Matrix3( ( mCol0 - mat.mCol0 ), @@ -288,19 +288,19 @@ inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const ); } -inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +__forceinline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) { *this = *this + mat; return *this; } -inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +__forceinline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) { *this = *this - mat; return *this; } -inline const Matrix3 Matrix3::operator -( ) const +__forceinline const Matrix3 Matrix3::operator -( ) const { return Matrix3( ( -mCol0 ), @@ -309,7 +309,7 @@ inline const Matrix3 Matrix3::operator -( ) const ); } -inline const Matrix3 absPerElem( const Matrix3 & mat ) +__forceinline const Matrix3 absPerElem( const Matrix3 & mat ) { return Matrix3( absPerElem( mat.getCol0() ), @@ -318,12 +318,12 @@ inline const Matrix3 absPerElem( const Matrix3 & mat ) ); } -inline const Matrix3 Matrix3::operator *( float scalar ) const +__forceinline const Matrix3 Matrix3::operator *( float scalar ) const { return *this * floatInVec(scalar); } -inline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const +__forceinline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const { return Matrix3( ( mCol0 * scalar ), @@ -332,28 +332,28 @@ inline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const ); } -inline Matrix3 & Matrix3::operator *=( float scalar ) +__forceinline Matrix3 & Matrix3::operator *=( float scalar ) { return *this *= floatInVec(scalar); } -inline Matrix3 & Matrix3::operator *=( const floatInVec &scalar ) +__forceinline Matrix3 & Matrix3::operator *=( const floatInVec &scalar ) { *this = *this * scalar; return *this; } -inline const Matrix3 operator *( float scalar, const Matrix3 & mat ) +__forceinline const Matrix3 operator *( float scalar, const Matrix3 & mat ) { return floatInVec(scalar) * mat; } -inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ) +__forceinline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ) { return mat * scalar; } -inline const Vector3 Matrix3::operator *( const Vector3 &vec ) const +__forceinline const Vector3 Matrix3::operator *( const Vector3 &vec ) const { __m128 res; __m128 xxxx, yyyy, zzzz; @@ -366,7 +366,7 @@ inline const Vector3 Matrix3::operator *( const Vector3 &vec ) const return Vector3( res ); } -inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +__forceinline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const { return Matrix3( ( *this * mat.mCol0 ), @@ -375,13 +375,13 @@ inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const ); } -inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +__forceinline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) { *this = *this * mat; return *this; } -inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +__forceinline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) { return Matrix3( mulPerElem( mat0.getCol0(), mat1.getCol0() ), @@ -390,7 +390,7 @@ inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) ); } -inline const Matrix3 Matrix3::identity( ) +__forceinline const Matrix3 Matrix3::identity( ) { return Matrix3( Vector3::xAxis( ), @@ -399,12 +399,12 @@ inline const Matrix3 Matrix3::identity( ) ); } -inline const Matrix3 Matrix3::rotationX( float radians ) +__forceinline const Matrix3 Matrix3::rotationX( float radians ) { return rotationX( floatInVec(radians) ); } -inline const Matrix3 Matrix3::rotationX( const floatInVec &radians ) +__forceinline const Matrix3 Matrix3::rotationX( const floatInVec &radians ) { __m128 s, c, res1, res2; __m128 zero; @@ -423,12 +423,12 @@ inline const Matrix3 Matrix3::rotationX( const floatInVec &radians ) ); } -inline const Matrix3 Matrix3::rotationY( float radians ) +__forceinline const Matrix3 Matrix3::rotationY( float radians ) { return rotationY( floatInVec(radians) ); } -inline const Matrix3 Matrix3::rotationY( const floatInVec &radians ) +__forceinline const Matrix3 Matrix3::rotationY( const floatInVec &radians ) { __m128 s, c, res0, res2; __m128 zero; @@ -447,12 +447,12 @@ inline const Matrix3 Matrix3::rotationY( const floatInVec &radians ) ); } -inline const Matrix3 Matrix3::rotationZ( float radians ) +__forceinline const Matrix3 Matrix3::rotationZ( float radians ) { return rotationZ( floatInVec(radians) ); } -inline const Matrix3 Matrix3::rotationZ( const floatInVec &radians ) +__forceinline const Matrix3 Matrix3::rotationZ( const floatInVec &radians ) { __m128 s, c, res0, res1; __m128 zero; @@ -471,7 +471,7 @@ inline const Matrix3 Matrix3::rotationZ( const floatInVec &radians ) ); } -inline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ ) +__forceinline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ ) { __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; angles = Vector4( radiansXYZ, 0.0f ).get128(); @@ -493,12 +493,12 @@ inline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ ) ); } -inline const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec ) +__forceinline const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec ) { return rotation( floatInVec(radians), unitVec ); } -inline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +__forceinline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec ) { __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; axis = unitVec.get128(); @@ -530,12 +530,12 @@ inline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 ); } -inline const Matrix3 Matrix3::rotation( const Quat &unitQuat ) +__forceinline const Matrix3 Matrix3::rotation( const Quat &unitQuat ) { return Matrix3( unitQuat ); } -inline const Matrix3 Matrix3::scale( const Vector3 &scaleVec ) +__forceinline const Matrix3 Matrix3::scale( const Vector3 &scaleVec ) { __m128 zero = _mm_setzero_ps(); __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; @@ -548,7 +548,7 @@ inline const Matrix3 Matrix3::scale( const Vector3 &scaleVec ) ); } -inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ) +__forceinline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ) { return Matrix3( ( mat.getCol0() * scaleVec.getX( ) ), @@ -557,7 +557,7 @@ inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ) ); } -inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ) +__forceinline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ) { return Matrix3( mulPerElem( mat.getCol0(), scaleVec ), @@ -566,7 +566,7 @@ inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ); } -inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +__forceinline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) { return Matrix3( select( mat0.getCol0(), mat1.getCol0(), select1 ), @@ -575,7 +575,7 @@ inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool se ); } -inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ) +__forceinline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ) { return Matrix3( select( mat0.getCol0(), mat1.getCol0(), select1 ), @@ -586,14 +586,14 @@ inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const b #ifdef _VECTORMATH_DEBUG -inline void print( const Matrix3 & mat ) +__forceinline void print( const Matrix3 & mat ) { print( mat.getRow( 0 ) ); print( mat.getRow( 1 ) ); print( mat.getRow( 2 ) ); } -inline void print( const Matrix3 & mat, const char * name ) +__forceinline void print( const Matrix3 & mat, const char * name ) { printf("%s:\n", name); print( mat ); @@ -601,7 +601,7 @@ inline void print( const Matrix3 & mat, const char * name ) #endif -inline Matrix4::Matrix4( const Matrix4 & mat ) +__forceinline Matrix4::Matrix4( const Matrix4 & mat ) { mCol0 = mat.mCol0; mCol1 = mat.mCol1; @@ -609,7 +609,7 @@ inline Matrix4::Matrix4( const Matrix4 & mat ) mCol3 = mat.mCol3; } -inline Matrix4::Matrix4( float scalar ) +__forceinline Matrix4::Matrix4( float scalar ) { mCol0 = Vector4( scalar ); mCol1 = Vector4( scalar ); @@ -617,7 +617,7 @@ inline Matrix4::Matrix4( float scalar ) mCol3 = Vector4( scalar ); } -inline Matrix4::Matrix4( const floatInVec &scalar ) +__forceinline Matrix4::Matrix4( const floatInVec &scalar ) { mCol0 = Vector4( scalar ); mCol1 = Vector4( scalar ); @@ -625,7 +625,7 @@ inline Matrix4::Matrix4( const floatInVec &scalar ) mCol3 = Vector4( scalar ); } -inline Matrix4::Matrix4( const Transform3 & mat ) +__forceinline Matrix4::Matrix4( const Transform3 & mat ) { mCol0 = Vector4( mat.getCol0(), 0.0f ); mCol1 = Vector4( mat.getCol1(), 0.0f ); @@ -633,7 +633,7 @@ inline Matrix4::Matrix4( const Transform3 & mat ) mCol3 = Vector4( mat.getCol3(), 1.0f ); } -inline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 ) +__forceinline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 ) { mCol0 = _col0; mCol1 = _col1; @@ -641,7 +641,7 @@ inline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vecto mCol3 = _col3; } -inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec ) +__forceinline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec ) { mCol0 = Vector4( mat.getCol0(), 0.0f ); mCol1 = Vector4( mat.getCol1(), 0.0f ); @@ -649,7 +649,7 @@ inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec ) mCol3 = Vector4( translateVec, 1.0f ); } -inline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec ) +__forceinline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec ) { Matrix3 mat; mat = Matrix3( unitQuat ); @@ -659,37 +659,37 @@ inline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec ) mCol3 = Vector4( translateVec, 1.0f ); } -inline Matrix4 & Matrix4::setCol0( const Vector4 &_col0 ) +__forceinline Matrix4 & Matrix4::setCol0( const Vector4 &_col0 ) { mCol0 = _col0; return *this; } -inline Matrix4 & Matrix4::setCol1( const Vector4 &_col1 ) +__forceinline Matrix4 & Matrix4::setCol1( const Vector4 &_col1 ) { mCol1 = _col1; return *this; } -inline Matrix4 & Matrix4::setCol2( const Vector4 &_col2 ) +__forceinline Matrix4 & Matrix4::setCol2( const Vector4 &_col2 ) { mCol2 = _col2; return *this; } -inline Matrix4 & Matrix4::setCol3( const Vector4 &_col3 ) +__forceinline Matrix4 & Matrix4::setCol3( const Vector4 &_col3 ) { mCol3 = _col3; return *this; } -inline Matrix4 & Matrix4::setCol( int col, const Vector4 &vec ) +__forceinline Matrix4 & Matrix4::setCol( int col, const Vector4 &vec ) { *(&mCol0 + col) = vec; return *this; } -inline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec ) +__forceinline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec ) { mCol0.setElem( row, vec.getElem( 0 ) ); mCol1.setElem( row, vec.getElem( 1 ) ); @@ -698,13 +698,13 @@ inline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec ) return *this; } -inline Matrix4 & Matrix4::setElem( int col, int row, float val ) +__forceinline Matrix4 & Matrix4::setElem( int col, int row, float val ) { (*this)[col].setElem(row, val); return *this; } -inline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val ) +__forceinline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val ) { Vector4 tmpV3_0; tmpV3_0 = this->getCol( col ); @@ -713,52 +713,52 @@ inline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val ) return *this; } -inline const floatInVec Matrix4::getElem( int col, int row ) const +__forceinline const floatInVec Matrix4::getElem( int col, int row ) const { return this->getCol( col ).getElem( row ); } -inline const Vector4 Matrix4::getCol0( ) const +__forceinline const Vector4 Matrix4::getCol0( ) const { return mCol0; } -inline const Vector4 Matrix4::getCol1( ) const +__forceinline const Vector4 Matrix4::getCol1( ) const { return mCol1; } -inline const Vector4 Matrix4::getCol2( ) const +__forceinline const Vector4 Matrix4::getCol2( ) const { return mCol2; } -inline const Vector4 Matrix4::getCol3( ) const +__forceinline const Vector4 Matrix4::getCol3( ) const { return mCol3; } -inline const Vector4 Matrix4::getCol( int col ) const +__forceinline const Vector4 Matrix4::getCol( int col ) const { return *(&mCol0 + col); } -inline const Vector4 Matrix4::getRow( int row ) const +__forceinline const Vector4 Matrix4::getRow( int row ) const { return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); } -inline Vector4 & Matrix4::operator []( int col ) +__forceinline Vector4 & Matrix4::operator []( int col ) { return *(&mCol0 + col); } -inline const Vector4 Matrix4::operator []( int col ) const +__forceinline const Vector4 Matrix4::operator []( int col ) const { return *(&mCol0 + col); } -inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +__forceinline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) { mCol0 = mat.mCol0; mCol1 = mat.mCol1; @@ -767,7 +767,7 @@ inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) return *this; } -inline const Matrix4 transpose( const Matrix4 & mat ) +__forceinline const Matrix4 transpose( const Matrix4 & mat ) { __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3; tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); @@ -791,7 +791,7 @@ static __declspec(align(16)) const unsigned int _vmathPNPN[4] = {0x00000000, 0x8 static __declspec(align(16)) const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000}; static __declspec(align(16)) const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f}; -inline const Matrix4 inverse( const Matrix4 & mat ) +__forceinline const Matrix4 inverse( const Matrix4 & mat ) { __m128 Va,Vb,Vc; __m128 r1,r2,r3,tt,tt2; @@ -886,7 +886,7 @@ inline const Matrix4 inverse( const Matrix4 & mat ) ); } -inline const Matrix4 affineInverse( const Matrix4 & mat ) +__forceinline const Matrix4 affineInverse( const Matrix4 & mat ) { Transform3 affineMat; affineMat.setCol0( mat.getCol0().getXYZ( ) ); @@ -896,7 +896,7 @@ inline const Matrix4 affineInverse( const Matrix4 & mat ) return Matrix4( inverse( affineMat ) ); } -inline const Matrix4 orthoInverse( const Matrix4 & mat ) +__forceinline const Matrix4 orthoInverse( const Matrix4 & mat ) { Transform3 affineMat; affineMat.setCol0( mat.getCol0().getXYZ( ) ); @@ -906,7 +906,7 @@ inline const Matrix4 orthoInverse( const Matrix4 & mat ) return Matrix4( orthoInverse( affineMat ) ); } -inline const floatInVec determinant( const Matrix4 & mat ) +__forceinline const floatInVec determinant( const Matrix4 & mat ) { __m128 Va,Vb,Vc; __m128 r1,r2,r3,tt,tt2; @@ -947,7 +947,7 @@ inline const floatInVec determinant( const Matrix4 & mat ) return floatInVec(Det, 0); } -inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +__forceinline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const { return Matrix4( ( mCol0 + mat.mCol0 ), @@ -957,7 +957,7 @@ inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const ); } -inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +__forceinline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const { return Matrix4( ( mCol0 - mat.mCol0 ), @@ -967,19 +967,19 @@ inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const ); } -inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +__forceinline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) { *this = *this + mat; return *this; } -inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +__forceinline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) { *this = *this - mat; return *this; } -inline const Matrix4 Matrix4::operator -( ) const +__forceinline const Matrix4 Matrix4::operator -( ) const { return Matrix4( ( -mCol0 ), @@ -989,7 +989,7 @@ inline const Matrix4 Matrix4::operator -( ) const ); } -inline const Matrix4 absPerElem( const Matrix4 & mat ) +__forceinline const Matrix4 absPerElem( const Matrix4 & mat ) { return Matrix4( absPerElem( mat.getCol0() ), @@ -999,12 +999,12 @@ inline const Matrix4 absPerElem( const Matrix4 & mat ) ); } -inline const Matrix4 Matrix4::operator *( float scalar ) const +__forceinline const Matrix4 Matrix4::operator *( float scalar ) const { return *this * floatInVec(scalar); } -inline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const +__forceinline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const { return Matrix4( ( mCol0 * scalar ), @@ -1014,28 +1014,28 @@ inline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const ); } -inline Matrix4 & Matrix4::operator *=( float scalar ) +__forceinline Matrix4 & Matrix4::operator *=( float scalar ) { return *this *= floatInVec(scalar); } -inline Matrix4 & Matrix4::operator *=( const floatInVec &scalar ) +__forceinline Matrix4 & Matrix4::operator *=( const floatInVec &scalar ) { *this = *this * scalar; return *this; } -inline const Matrix4 operator *( float scalar, const Matrix4 & mat ) +__forceinline const Matrix4 operator *( float scalar, const Matrix4 & mat ) { return floatInVec(scalar) * mat; } -inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ) +__forceinline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ) { return mat * scalar; } -inline const Vector4 Matrix4::operator *( const Vector4 &vec ) const +__forceinline const Vector4 Matrix4::operator *( const Vector4 &vec ) const { return Vector4( _mm_add_ps( @@ -1044,7 +1044,7 @@ inline const Vector4 Matrix4::operator *( const Vector4 &vec ) const ); } -inline const Vector4 Matrix4::operator *( const Vector3 &vec ) const +__forceinline const Vector4 Matrix4::operator *( const Vector3 &vec ) const { return Vector4( _mm_add_ps( @@ -1053,7 +1053,7 @@ inline const Vector4 Matrix4::operator *( const Vector3 &vec ) const ); } -inline const Vector4 Matrix4::operator *( const Point3 &pnt ) const +__forceinline const Vector4 Matrix4::operator *( const Point3 &pnt ) const { return Vector4( _mm_add_ps( @@ -1062,7 +1062,7 @@ inline const Vector4 Matrix4::operator *( const Point3 &pnt ) const ); } -inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +__forceinline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const { return Matrix4( ( *this * mat.mCol0 ), @@ -1072,13 +1072,13 @@ inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const ); } -inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +__forceinline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) { *this = *this * mat; return *this; } -inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +__forceinline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const { return Matrix4( ( *this * tfrm.getCol0() ), @@ -1088,13 +1088,13 @@ inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const ); } -inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +__forceinline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) { *this = *this * tfrm; return *this; } -inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +__forceinline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) { return Matrix4( mulPerElem( mat0.getCol0(), mat1.getCol0() ), @@ -1104,7 +1104,7 @@ inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) ); } -inline const Matrix4 Matrix4::identity( ) +__forceinline const Matrix4 Matrix4::identity( ) { return Matrix4( Vector4::xAxis( ), @@ -1114,7 +1114,7 @@ inline const Matrix4 Matrix4::identity( ) ); } -inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +__forceinline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) { mCol0.setXYZ( mat3.getCol0() ); mCol1.setXYZ( mat3.getCol1() ); @@ -1122,7 +1122,7 @@ inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) return *this; } -inline const Matrix3 Matrix4::getUpper3x3( ) const +__forceinline const Matrix3 Matrix4::getUpper3x3( ) const { return Matrix3( mCol0.getXYZ( ), @@ -1131,23 +1131,23 @@ inline const Matrix3 Matrix4::getUpper3x3( ) const ); } -inline Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec ) +__forceinline Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec ) { mCol3.setXYZ( translateVec ); return *this; } -inline const Vector3 Matrix4::getTranslation( ) const +__forceinline const Vector3 Matrix4::getTranslation( ) const { return mCol3.getXYZ( ); } -inline const Matrix4 Matrix4::rotationX( float radians ) +__forceinline const Matrix4 Matrix4::rotationX( float radians ) { return rotationX( floatInVec(radians) ); } -inline const Matrix4 Matrix4::rotationX( const floatInVec &radians ) +__forceinline const Matrix4 Matrix4::rotationX( const floatInVec &radians ) { __m128 s, c, res1, res2; __m128 zero; @@ -1167,12 +1167,12 @@ inline const Matrix4 Matrix4::rotationX( const floatInVec &radians ) ); } -inline const Matrix4 Matrix4::rotationY( float radians ) +__forceinline const Matrix4 Matrix4::rotationY( float radians ) { return rotationY( floatInVec(radians) ); } -inline const Matrix4 Matrix4::rotationY( const floatInVec &radians ) +__forceinline const Matrix4 Matrix4::rotationY( const floatInVec &radians ) { __m128 s, c, res0, res2; __m128 zero; @@ -1192,12 +1192,12 @@ inline const Matrix4 Matrix4::rotationY( const floatInVec &radians ) ); } -inline const Matrix4 Matrix4::rotationZ( float radians ) +__forceinline const Matrix4 Matrix4::rotationZ( float radians ) { return rotationZ( floatInVec(radians) ); } -inline const Matrix4 Matrix4::rotationZ( const floatInVec &radians ) +__forceinline const Matrix4 Matrix4::rotationZ( const floatInVec &radians ) { __m128 s, c, res0, res1; __m128 zero; @@ -1217,7 +1217,7 @@ inline const Matrix4 Matrix4::rotationZ( const floatInVec &radians ) ); } -inline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ ) +__forceinline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ ) { __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; angles = Vector4( radiansXYZ, 0.0f ).get128(); @@ -1240,12 +1240,12 @@ inline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ ) ); } -inline const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec ) +__forceinline const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec ) { return rotation( floatInVec(radians), unitVec ); } -inline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec ) +__forceinline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec ) { __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; axis = unitVec.get128(); @@ -1283,12 +1283,12 @@ inline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 ); } -inline const Matrix4 Matrix4::rotation( const Quat &unitQuat ) +__forceinline const Matrix4 Matrix4::rotation( const Quat &unitQuat ) { return Matrix4( Transform3::rotation( unitQuat ) ); } -inline const Matrix4 Matrix4::scale( const Vector3 &scaleVec ) +__forceinline const Matrix4 Matrix4::scale( const Vector3 &scaleVec ) { __m128 zero = _mm_setzero_ps(); __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; @@ -1302,7 +1302,7 @@ inline const Matrix4 Matrix4::scale( const Vector3 &scaleVec ) ); } -inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ) +__forceinline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ) { return Matrix4( ( mat.getCol0() * scaleVec.getX( ) ), @@ -1312,7 +1312,7 @@ inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ) ); } -inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ) +__forceinline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ) { Vector4 scale4; scale4 = Vector4( scaleVec, 1.0f ); @@ -1324,7 +1324,7 @@ inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ); } -inline const Matrix4 Matrix4::translation( const Vector3 &translateVec ) +__forceinline const Matrix4 Matrix4::translation( const Vector3 &translateVec ) { return Matrix4( Vector4::xAxis( ), @@ -1334,7 +1334,7 @@ inline const Matrix4 Matrix4::translation( const Vector3 &translateVec ) ); } -inline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ) +__forceinline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ) { Matrix4 m4EyeFrame; Vector3 v3X, v3Y, v3Z; @@ -1346,7 +1346,7 @@ inline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAt return orthoInverse( m4EyeFrame ); } -inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +__forceinline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) { float f, rangeInv; __m128 zero, col0, col1, col2, col3; @@ -1375,7 +1375,7 @@ inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, floa ); } -inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +__forceinline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) { /* function implementation based on code from STIDC SDK: */ /* -------------------------------------------------------------- */ @@ -1423,7 +1423,7 @@ inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, fl ); } -inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +__forceinline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) { /* function implementation based on code from STIDC SDK: */ /* -------------------------------------------------------------- */ @@ -1470,7 +1470,7 @@ inline const Matrix4 Matrix4::orthographic( float left, float right, float botto ); } -inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +__forceinline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) { return Matrix4( select( mat0.getCol0(), mat1.getCol0(), select1 ), @@ -1480,7 +1480,7 @@ inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool se ); } -inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ) +__forceinline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ) { return Matrix4( select( mat0.getCol0(), mat1.getCol0(), select1 ), @@ -1492,7 +1492,7 @@ inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const b #ifdef _VECTORMATH_DEBUG -inline void print( const Matrix4 & mat ) +__forceinline void print( const Matrix4 & mat ) { print( mat.getRow( 0 ) ); print( mat.getRow( 1 ) ); @@ -1500,7 +1500,7 @@ inline void print( const Matrix4 & mat ) print( mat.getRow( 3 ) ); } -inline void print( const Matrix4 & mat, const char * name ) +__forceinline void print( const Matrix4 & mat, const char * name ) { printf("%s:\n", name); print( mat ); @@ -1508,7 +1508,7 @@ inline void print( const Matrix4 & mat, const char * name ) #endif -inline Transform3::Transform3( const Transform3 & tfrm ) +__forceinline Transform3::Transform3( const Transform3 & tfrm ) { mCol0 = tfrm.mCol0; mCol1 = tfrm.mCol1; @@ -1516,7 +1516,7 @@ inline Transform3::Transform3( const Transform3 & tfrm ) mCol3 = tfrm.mCol3; } -inline Transform3::Transform3( float scalar ) +__forceinline Transform3::Transform3( float scalar ) { mCol0 = Vector3( scalar ); mCol1 = Vector3( scalar ); @@ -1524,7 +1524,7 @@ inline Transform3::Transform3( float scalar ) mCol3 = Vector3( scalar ); } -inline Transform3::Transform3( const floatInVec &scalar ) +__forceinline Transform3::Transform3( const floatInVec &scalar ) { mCol0 = Vector3( scalar ); mCol1 = Vector3( scalar ); @@ -1532,7 +1532,7 @@ inline Transform3::Transform3( const floatInVec &scalar ) mCol3 = Vector3( scalar ); } -inline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 ) +__forceinline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 ) { mCol0 = _col0; mCol1 = _col1; @@ -1540,49 +1540,49 @@ inline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const mCol3 = _col3; } -inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ) +__forceinline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ) { this->setUpper3x3( tfrm ); this->setTranslation( translateVec ); } -inline Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec ) +__forceinline Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec ) { this->setUpper3x3( Matrix3( unitQuat ) ); this->setTranslation( translateVec ); } -inline Transform3 & Transform3::setCol0( const Vector3 &_col0 ) +__forceinline Transform3 & Transform3::setCol0( const Vector3 &_col0 ) { mCol0 = _col0; return *this; } -inline Transform3 & Transform3::setCol1( const Vector3 &_col1 ) +__forceinline Transform3 & Transform3::setCol1( const Vector3 &_col1 ) { mCol1 = _col1; return *this; } -inline Transform3 & Transform3::setCol2( const Vector3 &_col2 ) +__forceinline Transform3 & Transform3::setCol2( const Vector3 &_col2 ) { mCol2 = _col2; return *this; } -inline Transform3 & Transform3::setCol3( const Vector3 &_col3 ) +__forceinline Transform3 & Transform3::setCol3( const Vector3 &_col3 ) { mCol3 = _col3; return *this; } -inline Transform3 & Transform3::setCol( int col, const Vector3 &vec ) +__forceinline Transform3 & Transform3::setCol( int col, const Vector3 &vec ) { *(&mCol0 + col) = vec; return *this; } -inline Transform3 & Transform3::setRow( int row, const Vector4 &vec ) +__forceinline Transform3 & Transform3::setRow( int row, const Vector4 &vec ) { mCol0.setElem( row, vec.getElem( 0 ) ); mCol1.setElem( row, vec.getElem( 1 ) ); @@ -1591,13 +1591,13 @@ inline Transform3 & Transform3::setRow( int row, const Vector4 &vec ) return *this; } -inline Transform3 & Transform3::setElem( int col, int row, float val ) +__forceinline Transform3 & Transform3::setElem( int col, int row, float val ) { (*this)[col].setElem(row, val); return *this; } -inline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val ) +__forceinline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val ) { Vector3 tmpV3_0; tmpV3_0 = this->getCol( col ); @@ -1606,52 +1606,52 @@ inline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val return *this; } -inline const floatInVec Transform3::getElem( int col, int row ) const +__forceinline const floatInVec Transform3::getElem( int col, int row ) const { return this->getCol( col ).getElem( row ); } -inline const Vector3 Transform3::getCol0( ) const +__forceinline const Vector3 Transform3::getCol0( ) const { return mCol0; } -inline const Vector3 Transform3::getCol1( ) const +__forceinline const Vector3 Transform3::getCol1( ) const { return mCol1; } -inline const Vector3 Transform3::getCol2( ) const +__forceinline const Vector3 Transform3::getCol2( ) const { return mCol2; } -inline const Vector3 Transform3::getCol3( ) const +__forceinline const Vector3 Transform3::getCol3( ) const { return mCol3; } -inline const Vector3 Transform3::getCol( int col ) const +__forceinline const Vector3 Transform3::getCol( int col ) const { return *(&mCol0 + col); } -inline const Vector4 Transform3::getRow( int row ) const +__forceinline const Vector4 Transform3::getRow( int row ) const { return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); } -inline Vector3 & Transform3::operator []( int col ) +__forceinline Vector3 & Transform3::operator []( int col ) { return *(&mCol0 + col); } -inline const Vector3 Transform3::operator []( int col ) const +__forceinline const Vector3 Transform3::operator []( int col ) const { return *(&mCol0 + col); } -inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) +__forceinline Transform3 & Transform3::operator =( const Transform3 & tfrm ) { mCol0 = tfrm.mCol0; mCol1 = tfrm.mCol1; @@ -1660,7 +1660,7 @@ inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) return *this; } -inline const Transform3 inverse( const Transform3 & tfrm ) +__forceinline const Transform3 inverse( const Transform3 & tfrm ) { __m128 inv0, inv1, inv2, inv3; __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet; @@ -1700,7 +1700,7 @@ inline const Transform3 inverse( const Transform3 & tfrm ) ); } -inline const Transform3 orthoInverse( const Transform3 & tfrm ) +__forceinline const Transform3 orthoInverse( const Transform3 & tfrm ) { __m128 inv0, inv1, inv2, inv3; __m128 tmp0, tmp1; @@ -1730,7 +1730,7 @@ inline const Transform3 orthoInverse( const Transform3 & tfrm ) ); } -inline const Transform3 absPerElem( const Transform3 & tfrm ) +__forceinline const Transform3 absPerElem( const Transform3 & tfrm ) { return Transform3( absPerElem( tfrm.getCol0() ), @@ -1740,7 +1740,7 @@ inline const Transform3 absPerElem( const Transform3 & tfrm ) ); } -inline const Vector3 Transform3::operator *( const Vector3 &vec ) const +__forceinline const Vector3 Transform3::operator *( const Vector3 &vec ) const { __m128 res; __m128 xxxx, yyyy, zzzz; @@ -1753,7 +1753,7 @@ inline const Vector3 Transform3::operator *( const Vector3 &vec ) const return Vector3( res ); } -inline const Point3 Transform3::operator *( const Point3 &pnt ) const +__forceinline const Point3 Transform3::operator *( const Point3 &pnt ) const { __m128 tmp0, tmp1, res; __m128 xxxx, yyyy, zzzz; @@ -1768,7 +1768,7 @@ inline const Point3 Transform3::operator *( const Point3 &pnt ) const return Point3( res ); } -inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +__forceinline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const { return Transform3( ( *this * tfrm.mCol0 ), @@ -1778,13 +1778,13 @@ inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const ); } -inline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +__forceinline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) { *this = *this * tfrm; return *this; } -inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +__forceinline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) { return Transform3( mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), @@ -1794,7 +1794,7 @@ inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & ); } -inline const Transform3 Transform3::identity( ) +__forceinline const Transform3 Transform3::identity( ) { return Transform3( Vector3::xAxis( ), @@ -1804,7 +1804,7 @@ inline const Transform3 Transform3::identity( ) ); } -inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +__forceinline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) { mCol0 = tfrm.getCol0(); mCol1 = tfrm.getCol1(); @@ -1812,28 +1812,28 @@ inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) return *this; } -inline const Matrix3 Transform3::getUpper3x3( ) const +__forceinline const Matrix3 Transform3::getUpper3x3( ) const { return Matrix3( mCol0, mCol1, mCol2 ); } -inline Transform3 & Transform3::setTranslation( const Vector3 &translateVec ) +__forceinline Transform3 & Transform3::setTranslation( const Vector3 &translateVec ) { mCol3 = translateVec; return *this; } -inline const Vector3 Transform3::getTranslation( ) const +__forceinline const Vector3 Transform3::getTranslation( ) const { return mCol3; } -inline const Transform3 Transform3::rotationX( float radians ) +__forceinline const Transform3 Transform3::rotationX( float radians ) { return rotationX( floatInVec(radians) ); } -inline const Transform3 Transform3::rotationX( const floatInVec &radians ) +__forceinline const Transform3 Transform3::rotationX( const floatInVec &radians ) { __m128 s, c, res1, res2; __m128 zero; @@ -1853,12 +1853,12 @@ inline const Transform3 Transform3::rotationX( const floatInVec &radians ) ); } -inline const Transform3 Transform3::rotationY( float radians ) +__forceinline const Transform3 Transform3::rotationY( float radians ) { return rotationY( floatInVec(radians) ); } -inline const Transform3 Transform3::rotationY( const floatInVec &radians ) +__forceinline const Transform3 Transform3::rotationY( const floatInVec &radians ) { __m128 s, c, res0, res2; __m128 zero; @@ -1878,12 +1878,12 @@ inline const Transform3 Transform3::rotationY( const floatInVec &radians ) ); } -inline const Transform3 Transform3::rotationZ( float radians ) +__forceinline const Transform3 Transform3::rotationZ( float radians ) { return rotationZ( floatInVec(radians) ); } -inline const Transform3 Transform3::rotationZ( const floatInVec &radians ) +__forceinline const Transform3 Transform3::rotationZ( const floatInVec &radians ) { __m128 s, c, res0, res1; __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; @@ -1902,7 +1902,7 @@ inline const Transform3 Transform3::rotationZ( const floatInVec &radians ) ); } -inline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ ) +__forceinline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ ) { __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; angles = Vector4( radiansXYZ, 0.0f ).get128(); @@ -1925,22 +1925,22 @@ inline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ ) ); } -inline const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec ) +__forceinline const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec ) { return rotation( floatInVec(radians), unitVec ); } -inline const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +__forceinline const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec ) { return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); } -inline const Transform3 Transform3::rotation( const Quat &unitQuat ) +__forceinline const Transform3 Transform3::rotation( const Quat &unitQuat ) { return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); } -inline const Transform3 Transform3::scale( const Vector3 &scaleVec ) +__forceinline const Transform3 Transform3::scale( const Vector3 &scaleVec ) { __m128 zero = _mm_setzero_ps(); __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; @@ -1954,7 +1954,7 @@ inline const Transform3 Transform3::scale( const Vector3 &scaleVec ) ); } -inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ) +__forceinline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ) { return Transform3( ( tfrm.getCol0() * scaleVec.getX( ) ), @@ -1964,7 +1964,7 @@ inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &sca ); } -inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ) +__forceinline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ) { return Transform3( mulPerElem( tfrm.getCol0(), scaleVec ), @@ -1974,7 +1974,7 @@ inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 ); } -inline const Transform3 Transform3::translation( const Vector3 &translateVec ) +__forceinline const Transform3 Transform3::translation( const Vector3 &translateVec ) { return Transform3( Vector3::xAxis( ), @@ -1984,7 +1984,7 @@ inline const Transform3 Transform3::translation( const Vector3 &translateVec ) ); } -inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +__forceinline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) { return Transform3( select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), @@ -1994,7 +1994,7 @@ inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfr ); } -inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ) +__forceinline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ) { return Transform3( select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), @@ -2006,14 +2006,14 @@ inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfr #ifdef _VECTORMATH_DEBUG -inline void print( const Transform3 & tfrm ) +__forceinline void print( const Transform3 & tfrm ) { print( tfrm.getRow( 0 ) ); print( tfrm.getRow( 1 ) ); print( tfrm.getRow( 2 ) ); } -inline void print( const Transform3 & tfrm, const char * name ) +__forceinline void print( const Transform3 & tfrm, const char * name ) { printf("%s:\n", name); print( tfrm ); @@ -2021,7 +2021,7 @@ inline void print( const Transform3 & tfrm, const char * name ) #endif -inline Quat::Quat( const Matrix3 & tfrm ) +__forceinline Quat::Quat( const Matrix3 & tfrm ) { __m128 res; __m128 col0, col1, col2; @@ -2109,7 +2109,7 @@ inline Quat::Quat( const Matrix3 & tfrm ) mVec128 = res; } -inline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 ) +__forceinline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 ) { return Matrix3( ( tfrm0 * tfrm1.getX( ) ), @@ -2118,7 +2118,7 @@ inline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 ) ); } -inline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 ) +__forceinline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 ) { return Matrix4( ( tfrm0 * tfrm1.getX( ) ), @@ -2128,7 +2128,7 @@ inline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 ) ); } -inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ) +__forceinline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ) { __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res; __m128 xxxx, yyyy, zzzz; @@ -2151,7 +2151,7 @@ inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ) return Vector3( res ); } -inline const Matrix3 crossMatrix( const Vector3 &vec ) +__forceinline const Matrix3 crossMatrix( const Vector3 &vec ) { __m128 neg, res0, res1, res2; neg = negatef4( vec.get128() ); @@ -2179,7 +2179,7 @@ inline const Matrix3 crossMatrix( const Vector3 &vec ) ); } -inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ) +__forceinline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ) { return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); } diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h index a7cd4e145..081cb3a4d 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, @@ -42,80 +42,96 @@ namespace Vectormath { namespace Aos { -inline Quat::Quat( float _x, float _y, float _z, float _w ) +__forceinline void Quat::set128(vec_float4 vec) { - mVec128 = _mm_setr_ps(_x, _y, _z, _w); + mVec128 = vec; } -inline Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +__forceinline Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) { mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); } -inline Quat::Quat( const Vector3 &xyz, float _w ) +__forceinline Quat::Quat( const Vector3 &xyz, float _w ) { mVec128 = xyz.get128(); _vmathVfSetElement(mVec128, _w, 3); } -inline Quat::Quat( const Vector3 &xyz, const floatInVec &_w ) + + +__forceinline Quat::Quat(const Quat& quat) +{ + mVec128 = quat.get128(); +} + +__forceinline Quat::Quat( float _x, float _y, float _z, float _w ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, _w); +} + + + + + +__forceinline Quat::Quat( const Vector3 &xyz, const floatInVec &_w ) { mVec128 = xyz.get128(); mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); } -inline Quat::Quat( const Vector4 &vec ) +__forceinline Quat::Quat( const Vector4 &vec ) { mVec128 = vec.get128(); } -inline Quat::Quat( float scalar ) +__forceinline Quat::Quat( float scalar ) { mVec128 = floatInVec(scalar).get128(); } -inline Quat::Quat( const floatInVec &scalar ) +__forceinline Quat::Quat( const floatInVec &scalar ) { mVec128 = scalar.get128(); } -inline Quat::Quat( __m128 vf4 ) +__forceinline Quat::Quat( __m128 vf4 ) { mVec128 = vf4; } -inline const Quat Quat::identity( ) +__forceinline const Quat Quat::identity( ) { return Quat( _VECTORMATH_UNIT_0001 ); } -inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ) +__forceinline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ) { return lerp( floatInVec(t), quat0, quat1 ); } -inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ) +__forceinline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ) { return ( quat0 + ( ( quat1 - quat0 ) * t ) ); } -inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ) +__forceinline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ) { return slerp( floatInVec(t), unitQuat0, unitQuat1 ); } -inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ) +__forceinline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ) { Quat start; vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; - vec_uint4 selectMask; + __m128 selectMask; cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() ); - selectMask = (vec_uint4)vec_cmpgt( _mm_setzero_ps(), cosAngle ); + selectMask = (__m128)vec_cmpgt( _mm_setzero_ps(), cosAngle ); cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask ); start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) ); - selectMask = (vec_uint4)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + selectMask = (__m128)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); angle = acosf4( cosAngle ); tttt = t.get128(); oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt ); @@ -129,236 +145,239 @@ inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) ); } -inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +__forceinline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) { return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 ); } -inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +__forceinline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) { return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) ); } -inline __m128 Quat::get128( ) const +__forceinline __m128 Quat::get128( ) const { return mVec128; } -inline Quat & Quat::operator =( const Quat &quat ) +__forceinline Quat & Quat::operator =( const Quat &quat ) { mVec128 = quat.mVec128; return *this; } -inline Quat & Quat::setXYZ( const Vector3 &vec ) +__forceinline Quat & Quat::setXYZ( const Vector3 &vec ) { __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; mVec128 = vec_sel( vec.get128(), mVec128, sw ); return *this; } -inline const Vector3 Quat::getXYZ( ) const +__forceinline const Vector3 Quat::getXYZ( ) const { return Vector3( mVec128 ); } -inline Quat & Quat::setX( float _x ) +__forceinline Quat & Quat::setX( float _x ) { _vmathVfSetElement(mVec128, _x, 0); return *this; } -inline Quat & Quat::setX( const floatInVec &_x ) +__forceinline Quat & Quat::setX( const floatInVec &_x ) { mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); return *this; } -inline const floatInVec Quat::getX( ) const +__forceinline const floatInVec Quat::getX( ) const { return floatInVec( mVec128, 0 ); } -inline Quat & Quat::setY( float _y ) +__forceinline Quat & Quat::setY( float _y ) { _vmathVfSetElement(mVec128, _y, 1); return *this; } -inline Quat & Quat::setY( const floatInVec &_y ) +__forceinline Quat & Quat::setY( const floatInVec &_y ) { mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); return *this; } -inline const floatInVec Quat::getY( ) const +__forceinline const floatInVec Quat::getY( ) const { return floatInVec( mVec128, 1 ); } -inline Quat & Quat::setZ( float _z ) +__forceinline Quat & Quat::setZ( float _z ) { _vmathVfSetElement(mVec128, _z, 2); return *this; } -inline Quat & Quat::setZ( const floatInVec &_z ) +__forceinline Quat & Quat::setZ( const floatInVec &_z ) { mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); return *this; } -inline const floatInVec Quat::getZ( ) const +__forceinline const floatInVec Quat::getZ( ) const { return floatInVec( mVec128, 2 ); } -inline Quat & Quat::setW( float _w ) +__forceinline Quat & Quat::setW( float _w ) { _vmathVfSetElement(mVec128, _w, 3); return *this; } -inline Quat & Quat::setW( const floatInVec &_w ) +__forceinline Quat & Quat::setW( const floatInVec &_w ) { mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); return *this; } -inline const floatInVec Quat::getW( ) const +__forceinline const floatInVec Quat::getW( ) const { return floatInVec( mVec128, 3 ); } -inline Quat & Quat::setElem( int idx, float value ) +__forceinline Quat & Quat::setElem( int idx, float value ) { _vmathVfSetElement(mVec128, value, idx); return *this; } -inline Quat & Quat::setElem( int idx, const floatInVec &value ) +__forceinline Quat & Quat::setElem( int idx, const floatInVec &value ) { mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); return *this; } -inline const floatInVec Quat::getElem( int idx ) const +__forceinline const floatInVec Quat::getElem( int idx ) const { return floatInVec( mVec128, idx ); } -inline VecIdx Quat::operator []( int idx ) +__forceinline VecIdx Quat::operator []( int idx ) { return VecIdx( mVec128, idx ); } -inline const floatInVec Quat::operator []( int idx ) const +__forceinline const floatInVec Quat::operator []( int idx ) const { return floatInVec( mVec128, idx ); } -inline const Quat Quat::operator +( const Quat &quat ) const +__forceinline const Quat Quat::operator +( const Quat &quat ) const { return Quat( _mm_add_ps( mVec128, quat.mVec128 ) ); } -inline const Quat Quat::operator -( const Quat &quat ) const + +__forceinline const Quat Quat::operator -( const Quat &quat ) const { return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) ); } -inline const Quat Quat::operator *( float scalar ) const +__forceinline const Quat Quat::operator *( float scalar ) const { return *this * floatInVec(scalar); } -inline const Quat Quat::operator *( const floatInVec &scalar ) const +__forceinline const Quat Quat::operator *( const floatInVec &scalar ) const { return Quat( _mm_mul_ps( mVec128, scalar.get128() ) ); } -inline Quat & Quat::operator +=( const Quat &quat ) +__forceinline Quat & Quat::operator +=( const Quat &quat ) { *this = *this + quat; return *this; } -inline Quat & Quat::operator -=( const Quat &quat ) +__forceinline Quat & Quat::operator -=( const Quat &quat ) { *this = *this - quat; return *this; } -inline Quat & Quat::operator *=( float scalar ) +__forceinline Quat & Quat::operator *=( float scalar ) { *this = *this * scalar; return *this; } -inline Quat & Quat::operator *=( const floatInVec &scalar ) +__forceinline Quat & Quat::operator *=( const floatInVec &scalar ) { *this = *this * scalar; return *this; } -inline const Quat Quat::operator /( float scalar ) const +__forceinline const Quat Quat::operator /( float scalar ) const { return *this / floatInVec(scalar); } -inline const Quat Quat::operator /( const floatInVec &scalar ) const +__forceinline const Quat Quat::operator /( const floatInVec &scalar ) const { return Quat( _mm_div_ps( mVec128, scalar.get128() ) ); } -inline Quat & Quat::operator /=( float scalar ) +__forceinline Quat & Quat::operator /=( float scalar ) { *this = *this / scalar; return *this; } -inline Quat & Quat::operator /=( const floatInVec &scalar ) +__forceinline Quat & Quat::operator /=( const floatInVec &scalar ) { *this = *this / scalar; return *this; } -inline const Quat Quat::operator -( ) const +__forceinline const Quat Quat::operator -( ) const { return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); } -inline const Quat operator *( float scalar, const Quat &quat ) +__forceinline const Quat operator *( float scalar, const Quat &quat ) { return floatInVec(scalar) * quat; } -inline const Quat operator *( const floatInVec &scalar, const Quat &quat ) +__forceinline const Quat operator *( const floatInVec &scalar, const Quat &quat ) { return quat * scalar; } -inline const floatInVec dot( const Quat &quat0, const Quat &quat1 ) +__forceinline const floatInVec dot( const Quat &quat0, const Quat &quat1 ) { return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 ); } -inline const floatInVec norm( const Quat &quat ) +__forceinline const floatInVec norm( const Quat &quat ) { return floatInVec( _vmathVfDot4( quat.get128(), quat.get128() ), 0 ); } -inline const floatInVec length( const Quat &quat ) +__forceinline const floatInVec length( const Quat &quat ) { return floatInVec( _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 ); } -inline const Quat normalize( const Quat &quat ) +__forceinline const Quat normalize( const Quat &quat ) { - return Quat( _mm_mul_ps( quat.get128(), _mm_rsqrt_ps( _vmathVfDot4( quat.get128(), quat.get128() ) ) ) ); + vec_float4 dot =_vmathVfDot4( quat.get128(), quat.get128()); + return Quat( _mm_mul_ps( quat.get128(), newtonrapson_rsqrt4( dot ) ) ); } -inline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ) + +__forceinline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ) { Vector3 crossVec; __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res; @@ -373,12 +392,12 @@ inline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVe return Quat( res ); } -inline const Quat Quat::rotation( float radians, const Vector3 &unitVec ) +__forceinline const Quat Quat::rotation( float radians, const Vector3 &unitVec ) { return rotation( floatInVec(radians), unitVec ); } -inline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec ) +__forceinline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec ) { __m128 s, c, angle, res; angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); @@ -388,12 +407,12 @@ inline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unit return Quat( res ); } -inline const Quat Quat::rotationX( float radians ) +__forceinline const Quat Quat::rotationX( float radians ) { return rotationX( floatInVec(radians) ); } -inline const Quat Quat::rotationX( const floatInVec &radians ) +__forceinline const Quat Quat::rotationX( const floatInVec &radians ) { __m128 s, c, angle, res; angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); @@ -405,12 +424,12 @@ inline const Quat Quat::rotationX( const floatInVec &radians ) return Quat( res ); } -inline const Quat Quat::rotationY( float radians ) +__forceinline const Quat Quat::rotationY( float radians ) { return rotationY( floatInVec(radians) ); } -inline const Quat Quat::rotationY( const floatInVec &radians ) +__forceinline const Quat Quat::rotationY( const floatInVec &radians ) { __m128 s, c, angle, res; angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); @@ -422,12 +441,12 @@ inline const Quat Quat::rotationY( const floatInVec &radians ) return Quat( res ); } -inline const Quat Quat::rotationZ( float radians ) +__forceinline const Quat Quat::rotationZ( float radians ) { return rotationZ( floatInVec(radians) ); } -inline const Quat Quat::rotationZ( const floatInVec &radians ) +__forceinline const Quat Quat::rotationZ( const floatInVec &radians ) { __m128 s, c, angle, res; angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); @@ -439,7 +458,7 @@ inline const Quat Quat::rotationZ( const floatInVec &radians ) return Quat( res ); } -inline const Quat Quat::operator *( const Quat &quat ) const +__forceinline const Quat Quat::operator *( const Quat &quat ) const { __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3; __m128 product, l_wxyz, r_wxyz, xy, qw; @@ -463,13 +482,13 @@ inline const Quat Quat::operator *( const Quat &quat ) const return Quat( vec_sel( qv, qw, sw ) ); } -inline Quat & Quat::operator *=( const Quat &quat ) +__forceinline Quat & Quat::operator *=( const Quat &quat ) { *this = *this * quat; return *this; } -inline const Vector3 rotate( const Quat &quat, const Vector3 &vec ) +__forceinline const Vector3 rotate( const Quat &quat, const Vector3 &vec ) { __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res; qdata = quat.get128(); vdata = vec.get128(); @@ -493,32 +512,48 @@ inline const Vector3 rotate( const Quat &quat, const Vector3 &vec ) return Vector3( res ); } -inline const Quat conj( const Quat &quat ) +__forceinline const Quat conj( const Quat &quat ) { __declspec(align(16)) unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0}; return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) ); } -inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ) +__forceinline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ) { return select( quat0, quat1, boolInVec(select1) ); } -inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ) +//__forceinline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ) +//{ +// return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) ); +//} + +__forceinline void loadXYZW(Quat& quat, const float* fptr) { - return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) ); +#ifdef USE_SSE2_LDDQU + quat = Quat( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 ); +#else + SSEFloat fl; + fl.f[0] = fptr[0]; + fl.f[1] = fptr[1]; + fl.f[2] = fptr[2]; + fl.f[3] = fptr[3]; + quat = Quat( fl.m128); +#endif + + } #ifdef _VECTORMATH_DEBUG -inline void print( const Quat &quat ) +__forceinline void print( const Quat &quat ) { union { __m128 v; float s[4]; } tmp; tmp.v = quat.get128(); printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); } -inline void print( const Quat &quat, const char * name ) +__forceinline void print( const Quat &quat, const char * name ) { union { __m128 v; float s[4]; } tmp; tmp.v = quat.get128(); diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h index 4fe957928..98f560738 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, @@ -58,6 +58,7 @@ #define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f } #define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f } #define _VECTORMATH_SLERP_TOL 0.999f +//_VECTORMATH_SLERP_TOLF //----------------------------------------------------------------------------- // Definitions @@ -65,13 +66,14 @@ #ifndef _VECTORMATH_INTERNAL_FUNCTIONS #define _VECTORMATH_INTERNAL_FUNCTIONS -static inline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 ) +#define _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx)) +static __forceinline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 ) { - __m128 result = _mm_mul_ps( vec0, vec1); + __m128 result = _mm_mul_ps( vec0, vec1); return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) ); } -static inline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 ) +static __forceinline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 ) { __m128 result = _mm_mul_ps(vec0, vec1); return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)), @@ -79,7 +81,7 @@ static inline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 ) _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3))))); } -static inline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 ) +static __forceinline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 ) { __m128 tmp0, tmp1, tmp2, tmp3, result; tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) ); @@ -91,7 +93,7 @@ static inline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 ) return result; } /* -static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v) +static __forceinline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v) { #if 0 vec_int4 bexp; @@ -123,7 +125,7 @@ static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v) #endif } -static inline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v) +static __forceinline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v) { #if 0 vec_uint4 hfloat_u, hfloat_v; @@ -138,7 +140,7 @@ static inline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v) } */ -static inline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot) +static __forceinline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot) { SSEFloat s; s.m128 = src; @@ -150,7 +152,7 @@ static inline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot) #define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar -static inline __m128 _vmathVfSplatScalar(float scalar) +static __forceinline __m128 _vmathVfSplatScalar(float scalar) { return _mm_set1_ps(scalar); } @@ -160,48 +162,49 @@ static inline __m128 _vmathVfSplatScalar(float scalar) namespace Vectormath { namespace Aos { + #ifdef _VECTORMATH_NO_SCALAR_CAST -inline VecIdx::operator floatInVec() const +__forceinline VecIdx::operator floatInVec() const { return floatInVec(ref, i); } -inline float VecIdx::getAsFloat() const +__forceinline float VecIdx::getAsFloat() const #else -inline VecIdx::operator float() const +__forceinline VecIdx::operator float() const #endif { return ((float *)&ref)[i]; } -inline float VecIdx::operator =( float scalar ) +__forceinline float VecIdx::operator =( float scalar ) { _vmathVfSetElement(ref, scalar, i); return scalar; } -inline floatInVec VecIdx::operator =( const floatInVec &scalar ) +__forceinline floatInVec VecIdx::operator =( const floatInVec &scalar ) { ref = _vmathVfInsert(ref, scalar.get128(), i); return scalar; } -inline floatInVec VecIdx::operator =( const VecIdx& scalar ) +__forceinline floatInVec VecIdx::operator =( const VecIdx& scalar ) { return *this = floatInVec(scalar.ref, scalar.i); } -inline floatInVec VecIdx::operator *=( float scalar ) +__forceinline floatInVec VecIdx::operator *=( float scalar ) { return *this *= floatInVec(scalar); } -inline floatInVec VecIdx::operator *=( const floatInVec &scalar ) +__forceinline floatInVec VecIdx::operator *=( const floatInVec &scalar ) { return *this = floatInVec(ref, i) * scalar; } -inline floatInVec VecIdx::operator /=( float scalar ) +__forceinline floatInVec VecIdx::operator /=( float scalar ) { return *this /= floatInVec(scalar); } @@ -211,88 +214,99 @@ inline floatInVec VecIdx::operator /=( const floatInVec &scalar ) return *this = floatInVec(ref, i) / scalar; } -inline floatInVec VecIdx::operator +=( float scalar ) +__forceinline floatInVec VecIdx::operator +=( float scalar ) { return *this += floatInVec(scalar); } -inline floatInVec VecIdx::operator +=( const floatInVec &scalar ) +__forceinline floatInVec VecIdx::operator +=( const floatInVec &scalar ) { return *this = floatInVec(ref, i) + scalar; } -inline floatInVec VecIdx::operator -=( float scalar ) +__forceinline floatInVec VecIdx::operator -=( float scalar ) { return *this -= floatInVec(scalar); } -inline floatInVec VecIdx::operator -=( const floatInVec &scalar ) +__forceinline floatInVec VecIdx::operator -=( const floatInVec &scalar ) { return *this = floatInVec(ref, i) - scalar; } -inline Vector3::Vector3( float _x, float _y, float _z ) +__forceinline Vector3::Vector3(const Vector3& vec) +{ + set128(vec.get128()); +} + +__forceinline void Vector3::set128(vec_float4 vec) +{ + mVec128 = vec; +} + + +__forceinline Vector3::Vector3( float _x, float _y, float _z ) { mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); } -inline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +__forceinline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) { __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() ); mVec128 = _mm_unpacklo_ps( xz, _y.get128() ); } -inline Vector3::Vector3( const Point3 &pnt ) +__forceinline Vector3::Vector3( const Point3 &pnt ) { mVec128 = pnt.get128(); } -inline Vector3::Vector3( float scalar ) +__forceinline Vector3::Vector3( float scalar ) { mVec128 = floatInVec(scalar).get128(); } -inline Vector3::Vector3( const floatInVec &scalar ) +__forceinline Vector3::Vector3( const floatInVec &scalar ) { mVec128 = scalar.get128(); } -inline Vector3::Vector3( __m128 vf4 ) +__forceinline Vector3::Vector3( __m128 vf4 ) { mVec128 = vf4; } -inline const Vector3 Vector3::xAxis( ) +__forceinline const Vector3 Vector3::xAxis( ) { return Vector3( _VECTORMATH_UNIT_1000 ); } -inline const Vector3 Vector3::yAxis( ) +__forceinline const Vector3 Vector3::yAxis( ) { return Vector3( _VECTORMATH_UNIT_0100 ); } -inline const Vector3 Vector3::zAxis( ) +__forceinline const Vector3 Vector3::zAxis( ) { return Vector3( _VECTORMATH_UNIT_0010 ); } -inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ) { return lerp( floatInVec(t), vec0, vec1 ); } -inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ) { return ( vec0 + ( ( vec1 - vec0 ) * t ) ); } -inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +__forceinline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) { return slerp( floatInVec(t), unitVec0, unitVec1 ); } -inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +__forceinline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) { __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() ); @@ -310,12 +324,27 @@ inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); } -inline __m128 Vector3::get128( ) const +__forceinline __m128 Vector3::get128( ) const { return mVec128; } -inline void storeXYZ( const Vector3 &vec, __m128 * quad ) +__forceinline void loadXYZ(Vector3& vec, const float* fptr) +{ +#ifdef USE_SSE2_LDDQU + vec = Vector3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 ); +#else + SSEFloat fl; + fl.f[0] = fptr[0]; + fl.f[1] = fptr[1]; + fl.f[2] = fptr[2]; + fl.f[3] = fptr[3]; + vec = Vector3( fl.m128); +#endif //USE_SSE2_LDDQU + +} + +__forceinline void storeXYZ( const Vector3 &vec, __m128 * quad ) { __m128 dstVec = *quad; __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize @@ -323,7 +352,15 @@ inline void storeXYZ( const Vector3 &vec, __m128 * quad ) *quad = dstVec; } -inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ) +__forceinline void storeXYZ(const Vector3& vec, float* fptr) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + + +__forceinline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ) { const float *quads = (float *)threeQuads; vec0 = Vector3( _mm_load_ps(quads) ); @@ -332,7 +369,7 @@ inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector vec3 = Vector3( _mm_loadu_ps(quads + 9) ); } -inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ) +__forceinline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ) { __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); @@ -343,7 +380,7 @@ inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vecto threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); } /* -inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ) +__forceinline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ) { assert(0); #if 0 @@ -357,197 +394,201 @@ inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vec #endif } */ -inline Vector3 & Vector3::operator =( const Vector3 &vec ) +__forceinline Vector3 & Vector3::operator =( const Vector3 &vec ) { mVec128 = vec.mVec128; return *this; } -inline Vector3 & Vector3::setX( float _x ) +__forceinline Vector3 & Vector3::setX( float _x ) { _vmathVfSetElement(mVec128, _x, 0); return *this; } -inline Vector3 & Vector3::setX( const floatInVec &_x ) +__forceinline Vector3 & Vector3::setX( const floatInVec &_x ) { mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); return *this; } -inline const floatInVec Vector3::getX( ) const +__forceinline const floatInVec Vector3::getX( ) const { return floatInVec( mVec128, 0 ); } -inline Vector3 & Vector3::setY( float _y ) +__forceinline Vector3 & Vector3::setY( float _y ) { _vmathVfSetElement(mVec128, _y, 1); return *this; } -inline Vector3 & Vector3::setY( const floatInVec &_y ) +__forceinline Vector3 & Vector3::setY( const floatInVec &_y ) { mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); return *this; } -inline const floatInVec Vector3::getY( ) const +__forceinline const floatInVec Vector3::getY( ) const { return floatInVec( mVec128, 1 ); } -inline Vector3 & Vector3::setZ( float _z ) +__forceinline Vector3 & Vector3::setZ( float _z ) { _vmathVfSetElement(mVec128, _z, 2); return *this; } -inline Vector3 & Vector3::setZ( const floatInVec &_z ) +__forceinline Vector3 & Vector3::setZ( const floatInVec &_z ) { mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); return *this; } -inline const floatInVec Vector3::getZ( ) const +__forceinline const floatInVec Vector3::getZ( ) const { return floatInVec( mVec128, 2 ); } -inline Vector3 & Vector3::setElem( int idx, float value ) +__forceinline Vector3 & Vector3::setElem( int idx, float value ) { _vmathVfSetElement(mVec128, value, idx); return *this; } -inline Vector3 & Vector3::setElem( int idx, const floatInVec &value ) +__forceinline Vector3 & Vector3::setElem( int idx, const floatInVec &value ) { mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); return *this; } -inline const floatInVec Vector3::getElem( int idx ) const +__forceinline const floatInVec Vector3::getElem( int idx ) const { return floatInVec( mVec128, idx ); } -inline VecIdx Vector3::operator []( int idx ) +__forceinline VecIdx Vector3::operator []( int idx ) { return VecIdx( mVec128, idx ); } -inline const floatInVec Vector3::operator []( int idx ) const +__forceinline const floatInVec Vector3::operator []( int idx ) const { return floatInVec( mVec128, idx ); } -inline const Vector3 Vector3::operator +( const Vector3 &vec ) const +__forceinline const Vector3 Vector3::operator +( const Vector3 &vec ) const { return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) ); } -inline const Vector3 Vector3::operator -( const Vector3 &vec ) const +__forceinline const Vector3 Vector3::operator -( const Vector3 &vec ) const { return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) ); } -inline const Point3 Vector3::operator +( const Point3 &pnt ) const +__forceinline const Point3 Vector3::operator +( const Point3 &pnt ) const { return Point3( _mm_add_ps( mVec128, pnt.get128() ) ); } -inline const Vector3 Vector3::operator *( float scalar ) const +__forceinline const Vector3 Vector3::operator *( float scalar ) const { return *this * floatInVec(scalar); } -inline const Vector3 Vector3::operator *( const floatInVec &scalar ) const +__forceinline const Vector3 Vector3::operator *( const floatInVec &scalar ) const { return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) ); } -inline Vector3 & Vector3::operator +=( const Vector3 &vec ) +__forceinline Vector3 & Vector3::operator +=( const Vector3 &vec ) { *this = *this + vec; return *this; } -inline Vector3 & Vector3::operator -=( const Vector3 &vec ) +__forceinline Vector3 & Vector3::operator -=( const Vector3 &vec ) { *this = *this - vec; return *this; } -inline Vector3 & Vector3::operator *=( float scalar ) +__forceinline Vector3 & Vector3::operator *=( float scalar ) { *this = *this * scalar; return *this; } -inline Vector3 & Vector3::operator *=( const floatInVec &scalar ) +__forceinline Vector3 & Vector3::operator *=( const floatInVec &scalar ) { *this = *this * scalar; return *this; } -inline const Vector3 Vector3::operator /( float scalar ) const +__forceinline const Vector3 Vector3::operator /( float scalar ) const { return *this / floatInVec(scalar); } -inline const Vector3 Vector3::operator /( const floatInVec &scalar ) const +__forceinline const Vector3 Vector3::operator /( const floatInVec &scalar ) const { return Vector3( _mm_div_ps( mVec128, scalar.get128() ) ); } -inline Vector3 & Vector3::operator /=( float scalar ) +__forceinline Vector3 & Vector3::operator /=( float scalar ) { *this = *this / scalar; return *this; } -inline Vector3 & Vector3::operator /=( const floatInVec &scalar ) +__forceinline Vector3 & Vector3::operator /=( const floatInVec &scalar ) { *this = *this / scalar; return *this; } -inline const Vector3 Vector3::operator -( ) const +__forceinline const Vector3 Vector3::operator -( ) const { - return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); + //return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); + + __declspec(align(16)) static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + __m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf; + return Vector3(_mm_xor_ps(get128(),NEG_MASK)); } -inline const Vector3 operator *( float scalar, const Vector3 &vec ) +__forceinline const Vector3 operator *( float scalar, const Vector3 &vec ) { return floatInVec(scalar) * vec; } -inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ) +__forceinline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ) { return vec * scalar; } -inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); } -inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) ); } -inline const Vector3 recipPerElem( const Vector3 &vec ) +__forceinline const Vector3 recipPerElem( const Vector3 &vec ) { return Vector3( _mm_rcp_ps( vec.get128() ) ); } -inline const Vector3 absPerElem( const Vector3 &vec ) +__forceinline const Vector3 absPerElem( const Vector3 &vec ) { return Vector3( fabsf4( vec.get128() ) ); } -inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ) { __m128 vmask = toM128(0x7fffffff); return Vector3( _mm_or_ps( @@ -555,82 +596,83 @@ inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ) _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs } -inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) ); } -inline const floatInVec maxElem( const Vector3 &vec ) +__forceinline const floatInVec maxElem( const Vector3 &vec ) { return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); } -inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) ); } -inline const floatInVec minElem( const Vector3 &vec ) +__forceinline const floatInVec minElem( const Vector3 &vec ) { return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); } -inline const floatInVec sum( const Vector3 &vec ) +__forceinline const floatInVec sum( const Vector3 &vec ) { return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); } -inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ) { return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 ); } -inline const floatInVec lengthSqr( const Vector3 &vec ) +__forceinline const floatInVec lengthSqr( const Vector3 &vec ) { return floatInVec( _vmathVfDot3( vec.get128(), vec.get128() ), 0 ); } -inline const floatInVec length( const Vector3 &vec ) +__forceinline const floatInVec length( const Vector3 &vec ) { return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 ); } -inline const Vector3 normalizeApprox( const Vector3 &vec ) +__forceinline const Vector3 normalizeApprox( const Vector3 &vec ) { return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); } -inline const Vector3 normalize( const Vector3 &vec ) +__forceinline const Vector3 normalize( const Vector3 &vec ) { return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); } -inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ) +__forceinline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ) { return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) ); } -inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ) +__forceinline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ) { return select( vec0, vec1, boolInVec(select1) ); } -inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ) + +__forceinline const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1) { - return Vector3(vec_sel( vec0.get128(), vec1.get128(), select1.get128() )); + return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128())); } #ifdef _VECTORMATH_DEBUG -inline void print( const Vector3 &vec ) +__forceinline void print( const Vector3 &vec ) { union { __m128 v; float s[4]; } tmp; tmp.v = vec.get128(); printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); } -inline void print( const Vector3 &vec, const char * name ) +__forceinline void print( const Vector3 &vec, const char * name ) { union { __m128 v; float s[4]; } tmp; tmp.v = vec.get128(); @@ -639,98 +681,98 @@ inline void print( const Vector3 &vec, const char * name ) #endif -inline Vector4::Vector4( float _x, float _y, float _z, float _w ) +__forceinline Vector4::Vector4( float _x, float _y, float _z, float _w ) { mVec128 = _mm_setr_ps(_x, _y, _z, _w); } -inline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +__forceinline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) { mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); } -inline Vector4::Vector4( const Vector3 &xyz, float _w ) +__forceinline Vector4::Vector4( const Vector3 &xyz, float _w ) { mVec128 = xyz.get128(); _vmathVfSetElement(mVec128, _w, 3); } -inline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w ) +__forceinline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w ) { mVec128 = xyz.get128(); mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); } -inline Vector4::Vector4( const Vector3 &vec ) +__forceinline Vector4::Vector4( const Vector3 &vec ) { mVec128 = vec.get128(); mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3); } -inline Vector4::Vector4( const Point3 &pnt ) +__forceinline Vector4::Vector4( const Point3 &pnt ) { mVec128 = pnt.get128(); mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3); } -inline Vector4::Vector4( const Quat &quat ) +__forceinline Vector4::Vector4( const Quat &quat ) { mVec128 = quat.get128(); } -inline Vector4::Vector4( float scalar ) +__forceinline Vector4::Vector4( float scalar ) { mVec128 = floatInVec(scalar).get128(); } -inline Vector4::Vector4( const floatInVec &scalar ) +__forceinline Vector4::Vector4( const floatInVec &scalar ) { mVec128 = scalar.get128(); } -inline Vector4::Vector4( __m128 vf4 ) +__forceinline Vector4::Vector4( __m128 vf4 ) { mVec128 = vf4; } -inline const Vector4 Vector4::xAxis( ) +__forceinline const Vector4 Vector4::xAxis( ) { return Vector4( _VECTORMATH_UNIT_1000 ); } -inline const Vector4 Vector4::yAxis( ) +__forceinline const Vector4 Vector4::yAxis( ) { return Vector4( _VECTORMATH_UNIT_0100 ); } -inline const Vector4 Vector4::zAxis( ) +__forceinline const Vector4 Vector4::zAxis( ) { return Vector4( _VECTORMATH_UNIT_0010 ); } -inline const Vector4 Vector4::wAxis( ) +__forceinline const Vector4 Vector4::wAxis( ) { return Vector4( _VECTORMATH_UNIT_0001 ); } -inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ) { return lerp( floatInVec(t), vec0, vec1 ); } -inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ) { return ( vec0 + ( ( vec1 - vec0 ) * t ) ); } -inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +__forceinline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) { return slerp( floatInVec(t), unitVec0, unitVec1 ); } -inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +__forceinline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) { __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() ); @@ -748,232 +790,232 @@ inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); } -inline __m128 Vector4::get128( ) const +__forceinline __m128 Vector4::get128( ) const { return mVec128; } /* -inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ) +__forceinline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ) { twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128()); twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128()); } */ -inline Vector4 & Vector4::operator =( const Vector4 &vec ) +__forceinline Vector4 & Vector4::operator =( const Vector4 &vec ) { mVec128 = vec.mVec128; return *this; } -inline Vector4 & Vector4::setXYZ( const Vector3 &vec ) +__forceinline Vector4 & Vector4::setXYZ( const Vector3 &vec ) { __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; mVec128 = vec_sel( vec.get128(), mVec128, sw ); return *this; } -inline const Vector3 Vector4::getXYZ( ) const +__forceinline const Vector3 Vector4::getXYZ( ) const { return Vector3( mVec128 ); } -inline Vector4 & Vector4::setX( float _x ) +__forceinline Vector4 & Vector4::setX( float _x ) { _vmathVfSetElement(mVec128, _x, 0); return *this; } -inline Vector4 & Vector4::setX( const floatInVec &_x ) +__forceinline Vector4 & Vector4::setX( const floatInVec &_x ) { mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); return *this; } -inline const floatInVec Vector4::getX( ) const +__forceinline const floatInVec Vector4::getX( ) const { return floatInVec( mVec128, 0 ); } -inline Vector4 & Vector4::setY( float _y ) +__forceinline Vector4 & Vector4::setY( float _y ) { _vmathVfSetElement(mVec128, _y, 1); return *this; } -inline Vector4 & Vector4::setY( const floatInVec &_y ) +__forceinline Vector4 & Vector4::setY( const floatInVec &_y ) { mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); return *this; } -inline const floatInVec Vector4::getY( ) const +__forceinline const floatInVec Vector4::getY( ) const { return floatInVec( mVec128, 1 ); } -inline Vector4 & Vector4::setZ( float _z ) +__forceinline Vector4 & Vector4::setZ( float _z ) { _vmathVfSetElement(mVec128, _z, 2); return *this; } -inline Vector4 & Vector4::setZ( const floatInVec &_z ) +__forceinline Vector4 & Vector4::setZ( const floatInVec &_z ) { mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); return *this; } -inline const floatInVec Vector4::getZ( ) const +__forceinline const floatInVec Vector4::getZ( ) const { return floatInVec( mVec128, 2 ); } -inline Vector4 & Vector4::setW( float _w ) +__forceinline Vector4 & Vector4::setW( float _w ) { _vmathVfSetElement(mVec128, _w, 3); return *this; } -inline Vector4 & Vector4::setW( const floatInVec &_w ) +__forceinline Vector4 & Vector4::setW( const floatInVec &_w ) { mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); return *this; } -inline const floatInVec Vector4::getW( ) const +__forceinline const floatInVec Vector4::getW( ) const { return floatInVec( mVec128, 3 ); } -inline Vector4 & Vector4::setElem( int idx, float value ) +__forceinline Vector4 & Vector4::setElem( int idx, float value ) { _vmathVfSetElement(mVec128, value, idx); return *this; } -inline Vector4 & Vector4::setElem( int idx, const floatInVec &value ) +__forceinline Vector4 & Vector4::setElem( int idx, const floatInVec &value ) { mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); return *this; } -inline const floatInVec Vector4::getElem( int idx ) const +__forceinline const floatInVec Vector4::getElem( int idx ) const { return floatInVec( mVec128, idx ); } -inline VecIdx Vector4::operator []( int idx ) +__forceinline VecIdx Vector4::operator []( int idx ) { return VecIdx( mVec128, idx ); } -inline const floatInVec Vector4::operator []( int idx ) const +__forceinline const floatInVec Vector4::operator []( int idx ) const { return floatInVec( mVec128, idx ); } -inline const Vector4 Vector4::operator +( const Vector4 &vec ) const +__forceinline const Vector4 Vector4::operator +( const Vector4 &vec ) const { return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) ); } -inline const Vector4 Vector4::operator -( const Vector4 &vec ) const +__forceinline const Vector4 Vector4::operator -( const Vector4 &vec ) const { return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) ); } -inline const Vector4 Vector4::operator *( float scalar ) const +__forceinline const Vector4 Vector4::operator *( float scalar ) const { return *this * floatInVec(scalar); } -inline const Vector4 Vector4::operator *( const floatInVec &scalar ) const +__forceinline const Vector4 Vector4::operator *( const floatInVec &scalar ) const { return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) ); } -inline Vector4 & Vector4::operator +=( const Vector4 &vec ) +__forceinline Vector4 & Vector4::operator +=( const Vector4 &vec ) { *this = *this + vec; return *this; } -inline Vector4 & Vector4::operator -=( const Vector4 &vec ) +__forceinline Vector4 & Vector4::operator -=( const Vector4 &vec ) { *this = *this - vec; return *this; } -inline Vector4 & Vector4::operator *=( float scalar ) +__forceinline Vector4 & Vector4::operator *=( float scalar ) { *this = *this * scalar; return *this; } -inline Vector4 & Vector4::operator *=( const floatInVec &scalar ) +__forceinline Vector4 & Vector4::operator *=( const floatInVec &scalar ) { *this = *this * scalar; return *this; } -inline const Vector4 Vector4::operator /( float scalar ) const +__forceinline const Vector4 Vector4::operator /( float scalar ) const { return *this / floatInVec(scalar); } -inline const Vector4 Vector4::operator /( const floatInVec &scalar ) const +__forceinline const Vector4 Vector4::operator /( const floatInVec &scalar ) const { return Vector4( _mm_div_ps( mVec128, scalar.get128() ) ); } -inline Vector4 & Vector4::operator /=( float scalar ) +__forceinline Vector4 & Vector4::operator /=( float scalar ) { *this = *this / scalar; return *this; } -inline Vector4 & Vector4::operator /=( const floatInVec &scalar ) +__forceinline Vector4 & Vector4::operator /=( const floatInVec &scalar ) { *this = *this / scalar; return *this; } -inline const Vector4 Vector4::operator -( ) const +__forceinline const Vector4 Vector4::operator -( ) const { return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); } -inline const Vector4 operator *( float scalar, const Vector4 &vec ) +__forceinline const Vector4 operator *( float scalar, const Vector4 &vec ) { return floatInVec(scalar) * vec; } -inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ) +__forceinline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ) { return vec * scalar; } -inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ) { return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); } -inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ) { return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) ); } -inline const Vector4 recipPerElem( const Vector4 &vec ) +__forceinline const Vector4 recipPerElem( const Vector4 &vec ) { return Vector4( _mm_rcp_ps( vec.get128() ) ); } -inline const Vector4 absPerElem( const Vector4 &vec ) +__forceinline const Vector4 absPerElem( const Vector4 &vec ) { return Vector4( fabsf4( vec.get128() ) ); } -inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ) { __m128 vmask = toM128(0x7fffffff); return Vector4( _mm_or_ps( @@ -981,82 +1023,78 @@ inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ) _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs } -inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ) { return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) ); } -inline const floatInVec maxElem( const Vector4 &vec ) +__forceinline const floatInVec maxElem( const Vector4 &vec ) { return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); } -inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ) { return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) ); } -inline const floatInVec minElem( const Vector4 &vec ) +__forceinline const floatInVec minElem( const Vector4 &vec ) { return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); } -inline const floatInVec sum( const Vector4 &vec ) +__forceinline const floatInVec sum( const Vector4 &vec ) { return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); } -inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ) +__forceinline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ) { return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 ); } -inline const floatInVec lengthSqr( const Vector4 &vec ) +__forceinline const floatInVec lengthSqr( const Vector4 &vec ) { return floatInVec( _vmathVfDot4( vec.get128(), vec.get128() ), 0 ); } -inline const floatInVec length( const Vector4 &vec ) +__forceinline const floatInVec length( const Vector4 &vec ) { return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 ); } -inline const Vector4 normalizeApprox( const Vector4 &vec ) +__forceinline const Vector4 normalizeApprox( const Vector4 &vec ) { return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); } -inline const Vector4 normalize( const Vector4 &vec ) +__forceinline const Vector4 normalize( const Vector4 &vec ) { return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); } -inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ) +__forceinline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ) { return select( vec0, vec1, boolInVec(select1) ); } -inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ) -{ - return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) ); -} #ifdef _VECTORMATH_DEBUG -inline void print( const Vector4 &vec ) +__forceinline void print( const Vector4 &vec ) { union { __m128 v; float s[4]; } tmp; tmp.v = vec.get128(); printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); } -inline void print( const Vector4 &vec, const char * name ) +__forceinline void print( const Vector4 &vec, const char * name ) { union { __m128 v; float s[4]; } tmp; tmp.v = vec.get128(); @@ -1065,52 +1103,52 @@ inline void print( const Vector4 &vec, const char * name ) #endif -inline Point3::Point3( float _x, float _y, float _z ) +__forceinline Point3::Point3( float _x, float _y, float _z ) { mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); } -inline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +__forceinline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) { mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() ); } -inline Point3::Point3( const Vector3 &vec ) +__forceinline Point3::Point3( const Vector3 &vec ) { mVec128 = vec.get128(); } -inline Point3::Point3( float scalar ) +__forceinline Point3::Point3( float scalar ) { mVec128 = floatInVec(scalar).get128(); } -inline Point3::Point3( const floatInVec &scalar ) +__forceinline Point3::Point3( const floatInVec &scalar ) { mVec128 = scalar.get128(); } -inline Point3::Point3( __m128 vf4 ) +__forceinline Point3::Point3( __m128 vf4 ) { mVec128 = vf4; } -inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ) { return lerp( floatInVec(t), pnt0, pnt1 ); } -inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ) { return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); } -inline __m128 Point3::get128( ) const +__forceinline __m128 Point3::get128( ) const { return mVec128; } -inline void storeXYZ( const Point3 &pnt, __m128 * quad ) +__forceinline void storeXYZ( const Point3 &pnt, __m128 * quad ) { __m128 dstVec = *quad; __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize @@ -1118,7 +1156,7 @@ inline void storeXYZ( const Point3 &pnt, __m128 * quad ) *quad = dstVec; } -inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ) +__forceinline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ) { const float *quads = (float *)threeQuads; pnt0 = Point3( _mm_load_ps(quads) ); @@ -1127,7 +1165,7 @@ inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3 = Point3( _mm_loadu_ps(quads + 9) ); } -inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ) +__forceinline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ) { __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); @@ -1138,7 +1176,7 @@ inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); } /* -inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ) +__forceinline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ) { #if 0 __m128 xyz0[3]; @@ -1153,138 +1191,138 @@ inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point #endif } */ -inline Point3 & Point3::operator =( const Point3 &pnt ) +__forceinline Point3 & Point3::operator =( const Point3 &pnt ) { mVec128 = pnt.mVec128; return *this; } -inline Point3 & Point3::setX( float _x ) +__forceinline Point3 & Point3::setX( float _x ) { _vmathVfSetElement(mVec128, _x, 0); return *this; } -inline Point3 & Point3::setX( const floatInVec &_x ) +__forceinline Point3 & Point3::setX( const floatInVec &_x ) { mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); return *this; } -inline const floatInVec Point3::getX( ) const +__forceinline const floatInVec Point3::getX( ) const { return floatInVec( mVec128, 0 ); } -inline Point3 & Point3::setY( float _y ) +__forceinline Point3 & Point3::setY( float _y ) { _vmathVfSetElement(mVec128, _y, 1); return *this; } -inline Point3 & Point3::setY( const floatInVec &_y ) +__forceinline Point3 & Point3::setY( const floatInVec &_y ) { mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); return *this; } -inline const floatInVec Point3::getY( ) const +__forceinline const floatInVec Point3::getY( ) const { return floatInVec( mVec128, 1 ); } -inline Point3 & Point3::setZ( float _z ) +__forceinline Point3 & Point3::setZ( float _z ) { _vmathVfSetElement(mVec128, _z, 2); return *this; } -inline Point3 & Point3::setZ( const floatInVec &_z ) +__forceinline Point3 & Point3::setZ( const floatInVec &_z ) { mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); return *this; } -inline const floatInVec Point3::getZ( ) const +__forceinline const floatInVec Point3::getZ( ) const { return floatInVec( mVec128, 2 ); } -inline Point3 & Point3::setElem( int idx, float value ) +__forceinline Point3 & Point3::setElem( int idx, float value ) { _vmathVfSetElement(mVec128, value, idx); return *this; } -inline Point3 & Point3::setElem( int idx, const floatInVec &value ) +__forceinline Point3 & Point3::setElem( int idx, const floatInVec &value ) { mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); return *this; } -inline const floatInVec Point3::getElem( int idx ) const +__forceinline const floatInVec Point3::getElem( int idx ) const { return floatInVec( mVec128, idx ); } -inline VecIdx Point3::operator []( int idx ) +__forceinline VecIdx Point3::operator []( int idx ) { return VecIdx( mVec128, idx ); } -inline const floatInVec Point3::operator []( int idx ) const +__forceinline const floatInVec Point3::operator []( int idx ) const { return floatInVec( mVec128, idx ); } -inline const Vector3 Point3::operator -( const Point3 &pnt ) const +__forceinline const Vector3 Point3::operator -( const Point3 &pnt ) const { return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) ); } -inline const Point3 Point3::operator +( const Vector3 &vec ) const +__forceinline const Point3 Point3::operator +( const Vector3 &vec ) const { return Point3( _mm_add_ps( mVec128, vec.get128() ) ); } -inline const Point3 Point3::operator -( const Vector3 &vec ) const +__forceinline const Point3 Point3::operator -( const Vector3 &vec ) const { return Point3( _mm_sub_ps( mVec128, vec.get128() ) ); } -inline Point3 & Point3::operator +=( const Vector3 &vec ) +__forceinline Point3 & Point3::operator +=( const Vector3 &vec ) { *this = *this + vec; return *this; } -inline Point3 & Point3::operator -=( const Vector3 &vec ) +__forceinline Point3 & Point3::operator -=( const Vector3 &vec ) { *this = *this - vec; return *this; } -inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ) { return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) ); } -inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ) { return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) ); } -inline const Point3 recipPerElem( const Point3 &pnt ) +__forceinline const Point3 recipPerElem( const Point3 &pnt ) { return Point3( _mm_rcp_ps( pnt.get128() ) ); } -inline const Point3 absPerElem( const Point3 &pnt ) +__forceinline const Point3 absPerElem( const Point3 &pnt ) { return Point3( fabsf4( pnt.get128() ) ); } -inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ) { __m128 vmask = toM128(0x7fffffff); return Point3( _mm_or_ps( @@ -1292,91 +1330,93 @@ inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ) _mm_andnot_ps( vmask, pnt1.get128() ) ) ); // Signs } -inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ) { return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) ); } -inline const floatInVec maxElem( const Point3 &pnt ) +__forceinline const floatInVec maxElem( const Point3 &pnt ) { return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); } -inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ) { return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) ); } -inline const floatInVec minElem( const Point3 &pnt ) +__forceinline const floatInVec minElem( const Point3 &pnt ) { return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); } -inline const floatInVec sum( const Point3 &pnt ) +__forceinline const floatInVec sum( const Point3 &pnt ) { return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); } -inline const Point3 scale( const Point3 &pnt, float scaleVal ) +__forceinline const Point3 scale( const Point3 &pnt, float scaleVal ) { return scale( pnt, floatInVec( scaleVal ) ); } -inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ) +__forceinline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ) { return mulPerElem( pnt, Point3( scaleVal ) ); } -inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ) +__forceinline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ) { return mulPerElem( pnt, Point3( scaleVec ) ); } -inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ) +__forceinline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ) { return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 ); } -inline const floatInVec distSqrFromOrigin( const Point3 &pnt ) +__forceinline const floatInVec distSqrFromOrigin( const Point3 &pnt ) { return lengthSqr( Vector3( pnt ) ); } -inline const floatInVec distFromOrigin( const Point3 &pnt ) +__forceinline const floatInVec distFromOrigin( const Point3 &pnt ) { return length( Vector3( pnt ) ); } -inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ) { return lengthSqr( ( pnt1 - pnt0 ) ); } -inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ) +__forceinline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ) { return length( ( pnt1 - pnt0 ) ); } -inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ) +__forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ) { return select( pnt0, pnt1, boolInVec(select1) ); } -inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ) +__forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ) { return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) ); } + + #ifdef _VECTORMATH_DEBUG -inline void print( const Point3 &pnt ) +__forceinline void print( const Point3 &pnt ) { union { __m128 v; float s[4]; } tmp; tmp.v = pnt.get128(); printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); } -inline void print( const Point3 &pnt, const char * name ) +__forceinline void print( const Point3 &pnt, const char * name ) { union { __m128 v; float s[4]; } tmp; tmp.v = pnt.get128(); diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h index 14dc408d0..703790f96 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. All rights reserved. Redistribution and use in source and binary forms, diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h index ebca2094d..131c754a6 100644 --- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h @@ -1,2500 +1,2527 @@ -/* - Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. - All rights reserved. - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided that the - following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Sony Computer Entertainment Inc nor the names - of its contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _VECTORMATH_AOS_CPP_SSE_H -#define _VECTORMATH_AOS_CPP_SSE_H - -#include -#include -#include -#include - -// TODO: Tidy -typedef __m128 vec_float4; -typedef __m128 vec_uint4; -typedef __m128 vec_int4; -typedef __m128i vec_uchar16; -typedef __m128i vec_ushort8; - -#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e)) - -#define _mm_ror_ps(vec,i) \ - (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec)) -#define _mm_rol_ps(vec,i) \ - (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec)) - -#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4)) - -#define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec) -#define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec) - -#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) ) - -union SSEFloat -{ - __m128 m128; - float f[4]; -}; - -static inline __m128 vec_sel(__m128 a, __m128 b, __m128 mask) -{ - return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); -} -static inline __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask) -{ - return vec_sel(a, b, _mm_load_ps((float *)_mask)); -} -static inline __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask) -{ - return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask)); -} - -static inline __m128 toM128(unsigned int x) -{ - return _mm_set1_ps( *(float *)&x ); -} - -static inline __m128 fabsf4(__m128 x) -{ - return _mm_and_ps( x, toM128( 0x7fffffff ) ); -} -/* -union SSE64 -{ - __m128 m128; - struct - { - __m64 m01; - __m64 m23; - } m64; -}; - -static inline __m128 vec_cts(__m128 x, int a) -{ - assert(a == 0); // Only 2^0 supported - (void)a; - SSE64 sse64; - sse64.m64.m01 = _mm_cvttps_pi32(x); - sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2)); - _mm_empty(); - return sse64.m128; -} - -static inline __m128 vec_ctf(__m128 x, int a) -{ - assert(a == 0); // Only 2^0 supported - (void)a; - SSE64 sse64; - sse64.m128 = x; - __m128 result =_mm_movelh_ps( - _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01), - _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23)); - _mm_empty(); - return result; -} -*/ -static inline __m128 vec_cts(__m128 x, int a) -{ - assert(a == 0); // Only 2^0 supported - (void)a; - __m128i result = _mm_cvtps_epi32(x); - return (__m128 &)result; -} - -static inline __m128 vec_ctf(__m128 x, int a) -{ - assert(a == 0); // Only 2^0 supported - (void)a; - return _mm_cvtepi32_ps((__m128i &)x); -} - -#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) ) -#define vec_sub(a,b) _mm_sub_ps( a, b ) -#define vec_add(a,b) _mm_add_ps( a, b ) -#define vec_mul(a,b) _mm_mul_ps( a, b ) -#define vec_xor(a,b) _mm_xor_ps( a, b ) -#define vec_and(a,b) _mm_and_ps( a, b ) -#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b ) -#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b ) - -#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b ) -#define vec_mergel(a,b) _mm_unpackhi_ps( a, b ) - -#define vec_andc(a,b) _mm_andnot_ps( b, a ) - -#define sqrtf4(x) _mm_sqrt_ps( x ) -#define rsqrtf4(x) _mm_rsqrt_ps( x ) -#define recipf4(x) _mm_rcp_ps( x ) -#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x ) - +/* + Copyright (C) 2006, 2010 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_AOS_CPP_SSE_H +#define _VECTORMATH_AOS_CPP_SSE_H + +#include +#include +#include +#include + +#define USE_SSE2_LDDQU +#ifdef USE_SSE2_LDDQU +#include //used for _mm_lddqu_si128 +#endif //USE_SSE2_LDDQU + +// TODO: Tidy +typedef __m128 vec_float4; +typedef __m128 vec_uint4; +typedef __m128 vec_int4; +typedef __m128i vec_uchar16; +typedef __m128i vec_ushort8; + +#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e)) + +#define _mm_ror_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec)) +#define _mm_rol_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec)) + +#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4)) + +#define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec) +#define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec) + +#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) ) + +union SSEFloat +{ + __m128i vi; + __m128 m128; + __m128 vf; + unsigned int ui[4]; + unsigned short s[8]; + float f[4]; + SSEFloat(__m128 v) : m128(v) {} + SSEFloat(__m128i v) : vi(v) {} + SSEFloat() {}//uninitialized +}; + +static __forceinline __m128 vec_sel(__m128 a, __m128 b, __m128 mask) +{ + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +} +static __forceinline __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask) +{ + return vec_sel(a, b, _mm_load_ps((float *)_mask)); +} +static __forceinline __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask) +{ + return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask)); +} + +static __forceinline __m128 toM128(unsigned int x) +{ + return _mm_set1_ps( *(float *)&x ); +} + +static __forceinline __m128 fabsf4(__m128 x) +{ + return _mm_and_ps( x, toM128( 0x7fffffff ) ); +} +/* +union SSE64 +{ + __m128 m128; + struct + { + __m64 m01; + __m64 m23; + } m64; +}; + +static __forceinline __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m64.m01 = _mm_cvttps_pi32(x); + sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2)); + _mm_empty(); + return sse64.m128; +} + +static __forceinline __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m128 = x; + __m128 result =_mm_movelh_ps( + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01), + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23)); + _mm_empty(); + return result; +} +*/ +static __forceinline __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + __m128i result = _mm_cvtps_epi32(x); + return (__m128 &)result; +} + +static __forceinline __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + return _mm_cvtepi32_ps((__m128i &)x); +} + +#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) ) +#define vec_sub(a,b) _mm_sub_ps( a, b ) +#define vec_add(a,b) _mm_add_ps( a, b ) +#define vec_mul(a,b) _mm_mul_ps( a, b ) +#define vec_xor(a,b) _mm_xor_ps( a, b ) +#define vec_and(a,b) _mm_and_ps( a, b ) +#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b ) +#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b ) + +#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b ) +#define vec_mergel(a,b) _mm_unpackhi_ps( a, b ) + +#define vec_andc(a,b) _mm_andnot_ps( b, a ) + +#define sqrtf4(x) _mm_sqrt_ps( x ) +#define rsqrtf4(x) _mm_rsqrt_ps( x ) +#define recipf4(x) _mm_rcp_ps( x ) +#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x ) + static __forceinline __m128 newtonrapson_rsqrt4( const __m128 v ) { -#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) +#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) #define _three _mm_setr_ps(3.f,3.f,3.f,3.f) const __m128 approx = _mm_rsqrt_ps( v ); const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx); return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) ); -} - -static inline __m128 acosf4(__m128 x) -{ - __m128 xabs = fabsf4(x); - __m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() ); - __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs)); - - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves (hi amd lo). - */ - __m128 xabs2 = _mm_mul_ps(xabs, xabs); - __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); - __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f), - xabs, _mm_set1_ps(0.0066700901f)), - xabs, _mm_set1_ps(-0.0170881256f)), - xabs, _mm_set1_ps( 0.0308918810f)); - __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f), - xabs, _mm_set1_ps(0.0889789874f)), - xabs, _mm_set1_ps(-0.2145988016f)), - xabs, _mm_set1_ps( 1.5707963050f)); - - __m128 result = vec_madd(hi, xabs4, lo); - - // Adjust the result if x is negactive. - return vec_sel( - vec_mul(t1, result), // Positive - vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative - select); -} - -static inline __m128 sinf4(vec_float4 x) -{ - -// -// Common constants used to evaluate sinf4/cosf4/tanf4 -// -#define _SINCOS_CC0 -0.0013602249f -#define _SINCOS_CC1 0.0416566950f -#define _SINCOS_CC2 -0.4999990225f -#define _SINCOS_SC0 -0.0001950727f -#define _SINCOS_SC1 0.0083320758f -#define _SINCOS_SC2 -0.1666665247f - -#define _SINCOS_KC1 1.57079625129f -#define _SINCOS_KC2 7.54978995489e-8f - - vec_float4 xl,xl2,xl3,res; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - vec_int4 q = vec_cts(xl,0); - - // Compute an offset based on the quadrant that the angle falls in - // - vec_int4 offset = _mm_and_ps(q,toM128(0x3)); - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = vec_ctf(q,0); - xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); - - // Compute x^2 and x^3 - // - xl2 = vec_mul(xl,xl); - xl3 = vec_mul(xl2,xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - - vec_float4 cx = - vec_madd( - vec_madd( - vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); - vec_float4 sx = - vec_madd( - vec_madd( - vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset, - toM128(0x1)), - _mm_setzero_ps())); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - return vec_sel( - vec_xor(toM128(0x80000000U), res), // Negative - res, // Positive - vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps())); -} - -static inline void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c) -{ - vec_float4 xl,xl2,xl3; - vec_int4 offsetSin, offsetCos; - - // Range reduction using : xl = angle * TwoOverPi; - // - xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); - - // Find the quadrant the angle falls in - // using: q = (int) (ceil(abs(xl))*sign(xl)) - // - //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0); - vec_int4 q = vec_cts(xl,0); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - // - offsetSin = vec_and(q,toM128((int)0x3)); - __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin); - offsetCos = (__m128 &)temp; - - // Remainder in range [-pi/4..pi/4] - // - vec_float4 qf = vec_ctf(q,0); - xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); - - // Compute x^2 and x^3 - // - xl2 = vec_mul(xl,xl); - xl3 = vec_mul(xl2,xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - // - vec_float4 cx = - vec_madd( - vec_madd( - vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); - vec_float4 sx = - vec_madd( - vec_madd( - vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - // - vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps()); - vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps()); - *s = vec_sel(cx,sx,sinMask); - *c = vec_sel(cx,sx,cosMask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - // - sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps()); - cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps()); - - *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask); - *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask); -} - -#include "vecidx_aos.h" -#include "floatInVec.h" -#include "boolInVec.h" - -#ifdef _VECTORMATH_DEBUG -#include -#endif -namespace Vectormath { - -namespace Aos { - -//----------------------------------------------------------------------------- -// Forward Declarations -// - -class Vector3; -class Vector4; -class Point3; -class Quat; -class Matrix3; -class Matrix4; -class Transform3; - -// A 3-D vector in array-of-structures format -// -class Vector3 -{ - __m128 mVec128; - -public: - // Default constructor; does no initialization - // - inline Vector3( ) { }; - - // Construct a 3-D vector from x, y, and z elements - // - inline Vector3( float x, float y, float z ); - - // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type) - // - inline Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); - - // Copy elements from a 3-D point into a 3-D vector - // - explicit inline Vector3( const Point3 &pnt ); - - // Set all elements of a 3-D vector to the same scalar value - // - explicit inline Vector3( float scalar ); - - // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type) - // - explicit inline Vector3( const floatInVec &scalar ); - - // Set vector float data in a 3-D vector - // - explicit inline Vector3( __m128 vf4 ); - - // Get vector float data from a 3-D vector - // - inline __m128 get128( ) const; - - // Assign one 3-D vector to another - // - inline Vector3 & operator =( const Vector3 &vec ); - - // Set the x element of a 3-D vector - // - inline Vector3 & setX( float x ); - - // Set the y element of a 3-D vector - // - inline Vector3 & setY( float y ); - - // Set the z element of a 3-D vector - // - inline Vector3 & setZ( float z ); - - // Set the x element of a 3-D vector (scalar data contained in vector data type) - // - inline Vector3 & setX( const floatInVec &x ); - - // Set the y element of a 3-D vector (scalar data contained in vector data type) - // - inline Vector3 & setY( const floatInVec &y ); - - // Set the z element of a 3-D vector (scalar data contained in vector data type) - // - inline Vector3 & setZ( const floatInVec &z ); - - // Get the x element of a 3-D vector - // - inline const floatInVec getX( ) const; - - // Get the y element of a 3-D vector - // - inline const floatInVec getY( ) const; - - // Get the z element of a 3-D vector - // - inline const floatInVec getZ( ) const; - - // Set an x, y, or z element of a 3-D vector by index - // - inline Vector3 & setElem( int idx, float value ); - - // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type) - // - inline Vector3 & setElem( int idx, const floatInVec &value ); - - // Get an x, y, or z element of a 3-D vector by index - // - inline const floatInVec getElem( int idx ) const; - - // Subscripting operator to set or get an element - // - inline VecIdx operator []( int idx ); - - // Subscripting operator to get an element - // - inline const floatInVec operator []( int idx ) const; - - // Add two 3-D vectors - // - inline const Vector3 operator +( const Vector3 &vec ) const; - - // Subtract a 3-D vector from another 3-D vector - // - inline const Vector3 operator -( const Vector3 &vec ) const; - - // Add a 3-D vector to a 3-D point - // - inline const Point3 operator +( const Point3 &pnt ) const; - - // Multiply a 3-D vector by a scalar - // - inline const Vector3 operator *( float scalar ) const; - - // Divide a 3-D vector by a scalar - // - inline const Vector3 operator /( float scalar ) const; - - // Multiply a 3-D vector by a scalar (scalar data contained in vector data type) - // - inline const Vector3 operator *( const floatInVec &scalar ) const; - - // Divide a 3-D vector by a scalar (scalar data contained in vector data type) - // - inline const Vector3 operator /( const floatInVec &scalar ) const; - - // Perform compound assignment and addition with a 3-D vector - // - inline Vector3 & operator +=( const Vector3 &vec ); - - // Perform compound assignment and subtraction by a 3-D vector - // - inline Vector3 & operator -=( const Vector3 &vec ); - - // Perform compound assignment and multiplication by a scalar - // - inline Vector3 & operator *=( float scalar ); - - // Perform compound assignment and division by a scalar - // - inline Vector3 & operator /=( float scalar ); - - // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) - // - inline Vector3 & operator *=( const floatInVec &scalar ); - - // Perform compound assignment and division by a scalar (scalar data contained in vector data type) - // - inline Vector3 & operator /=( const floatInVec &scalar ); - - // Negate all elements of a 3-D vector - // - inline const Vector3 operator -( ) const; - - // Construct x axis - // - static inline const Vector3 xAxis( ); - - // Construct y axis - // - static inline const Vector3 yAxis( ); - - // Construct z axis - // - static inline const Vector3 zAxis( ); - -}; - -// Multiply a 3-D vector by a scalar -// -inline const Vector3 operator *( float scalar, const Vector3 &vec ); - -// Multiply a 3-D vector by a scalar (scalar data contained in vector data type) -// -inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ); - -// Multiply two 3-D vectors per element -// -inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ); - -// Divide two 3-D vectors per element -// NOTE: -// Floating-point behavior matches standard library function divf4. -// -inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ); - -// Compute the reciprocal of a 3-D vector per element -// NOTE: -// Floating-point behavior matches standard library function recipf4. -// -inline const Vector3 recipPerElem( const Vector3 &vec ); - -// Compute the absolute value of a 3-D vector per element -// -inline const Vector3 absPerElem( const Vector3 &vec ); - -// Copy sign from one 3-D vector to another, per element -// -inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ); - -// Maximum of two 3-D vectors per element -// -inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ); - -// Minimum of two 3-D vectors per element -// -inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ); - -// Maximum element of a 3-D vector -// -inline const floatInVec maxElem( const Vector3 &vec ); - -// Minimum element of a 3-D vector -// -inline const floatInVec minElem( const Vector3 &vec ); - -// Compute the sum of all elements of a 3-D vector -// -inline const floatInVec sum( const Vector3 &vec ); - -// Compute the dot product of two 3-D vectors -// -inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ); - -// Compute the square of the length of a 3-D vector -// -inline const floatInVec lengthSqr( const Vector3 &vec ); - -// Compute the length of a 3-D vector -// -inline const floatInVec length( const Vector3 &vec ); - -// Normalize a 3-D vector -// NOTE: -// The result is unpredictable when all elements of vec are at or near zero. -// -inline const Vector3 normalize( const Vector3 &vec ); - -// Compute cross product of two 3-D vectors -// -inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ); - -// Outer product of two 3-D vectors -// -inline const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 ); - -// Pre-multiply a row vector by a 3x3 matrix -// NOTE: -// Slower than column post-multiply. -// -inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ); - -// Cross-product matrix of a 3-D vector -// -inline const Matrix3 crossMatrix( const Vector3 &vec ); - -// Create cross-product matrix and multiply -// NOTE: -// Faster than separately creating a cross-product matrix and multiplying. -// -inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ); - -// Linear interpolation between two 3-D vectors -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ); - -// Linear interpolation between two 3-D vectors (scalar data contained in vector data type) -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ); - -// Spherical linear interpolation between two 3-D vectors -// NOTE: -// The result is unpredictable if the vectors point in opposite directions. -// Does not clamp t between 0 and 1. -// -inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); - -// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type) -// NOTE: -// The result is unpredictable if the vectors point in opposite directions. -// Does not clamp t between 0 and 1. -// -inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); - -// Conditionally select between two 3-D vectors -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ); - -// Conditionally select between two 3-D vectors (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ); - -// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word -// -inline void storeXYZ( const Vector3 &vec, __m128 * quad ); - -// Load four three-float 3-D vectors, stored in three quadwords -// -inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ); - -// Store four 3-D vectors in three quadwords -// -inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ); - -// Store eight 3-D vectors as half-floats -// -inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 3-D vector -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Vector3 &vec ); - -// Print a 3-D vector and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Vector3 &vec, const char * name ); - -#endif - -// A 4-D vector in array-of-structures format -// -class Vector4 -{ - __m128 mVec128; - -public: - // Default constructor; does no initialization - // - inline Vector4( ) { }; - - // Construct a 4-D vector from x, y, z, and w elements - // - inline Vector4( float x, float y, float z, float w ); - - // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type) - // - inline Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); - - // Construct a 4-D vector from a 3-D vector and a scalar - // - inline Vector4( const Vector3 &xyz, float w ); - - // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type) - // - inline Vector4( const Vector3 &xyz, const floatInVec &w ); - - // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 - // - explicit inline Vector4( const Vector3 &vec ); - - // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 - // - explicit inline Vector4( const Point3 &pnt ); - - // Copy elements from a quaternion into a 4-D vector - // - explicit inline Vector4( const Quat &quat ); - - // Set all elements of a 4-D vector to the same scalar value - // - explicit inline Vector4( float scalar ); - - // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type) - // - explicit inline Vector4( const floatInVec &scalar ); - - // Set vector float data in a 4-D vector - // - explicit inline Vector4( __m128 vf4 ); - - // Get vector float data from a 4-D vector - // - inline __m128 get128( ) const; - - // Assign one 4-D vector to another - // - inline Vector4 & operator =( const Vector4 &vec ); - - // Set the x, y, and z elements of a 4-D vector - // NOTE: - // This function does not change the w element. - // - inline Vector4 & setXYZ( const Vector3 &vec ); - - // Get the x, y, and z elements of a 4-D vector - // - inline const Vector3 getXYZ( ) const; - - // Set the x element of a 4-D vector - // - inline Vector4 & setX( float x ); - - // Set the y element of a 4-D vector - // - inline Vector4 & setY( float y ); - - // Set the z element of a 4-D vector - // - inline Vector4 & setZ( float z ); - - // Set the w element of a 4-D vector - // - inline Vector4 & setW( float w ); - - // Set the x element of a 4-D vector (scalar data contained in vector data type) - // - inline Vector4 & setX( const floatInVec &x ); - - // Set the y element of a 4-D vector (scalar data contained in vector data type) - // - inline Vector4 & setY( const floatInVec &y ); - - // Set the z element of a 4-D vector (scalar data contained in vector data type) - // - inline Vector4 & setZ( const floatInVec &z ); - - // Set the w element of a 4-D vector (scalar data contained in vector data type) - // - inline Vector4 & setW( const floatInVec &w ); - - // Get the x element of a 4-D vector - // - inline const floatInVec getX( ) const; - - // Get the y element of a 4-D vector - // - inline const floatInVec getY( ) const; - - // Get the z element of a 4-D vector - // - inline const floatInVec getZ( ) const; - - // Get the w element of a 4-D vector - // - inline const floatInVec getW( ) const; - - // Set an x, y, z, or w element of a 4-D vector by index - // - inline Vector4 & setElem( int idx, float value ); - - // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type) - // - inline Vector4 & setElem( int idx, const floatInVec &value ); - - // Get an x, y, z, or w element of a 4-D vector by index - // - inline const floatInVec getElem( int idx ) const; - - // Subscripting operator to set or get an element - // - inline VecIdx operator []( int idx ); - - // Subscripting operator to get an element - // - inline const floatInVec operator []( int idx ) const; - - // Add two 4-D vectors - // - inline const Vector4 operator +( const Vector4 &vec ) const; - - // Subtract a 4-D vector from another 4-D vector - // - inline const Vector4 operator -( const Vector4 &vec ) const; - - // Multiply a 4-D vector by a scalar - // - inline const Vector4 operator *( float scalar ) const; - - // Divide a 4-D vector by a scalar - // - inline const Vector4 operator /( float scalar ) const; - - // Multiply a 4-D vector by a scalar (scalar data contained in vector data type) - // - inline const Vector4 operator *( const floatInVec &scalar ) const; - - // Divide a 4-D vector by a scalar (scalar data contained in vector data type) - // - inline const Vector4 operator /( const floatInVec &scalar ) const; - - // Perform compound assignment and addition with a 4-D vector - // - inline Vector4 & operator +=( const Vector4 &vec ); - - // Perform compound assignment and subtraction by a 4-D vector - // - inline Vector4 & operator -=( const Vector4 &vec ); - - // Perform compound assignment and multiplication by a scalar - // - inline Vector4 & operator *=( float scalar ); - - // Perform compound assignment and division by a scalar - // - inline Vector4 & operator /=( float scalar ); - - // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) - // - inline Vector4 & operator *=( const floatInVec &scalar ); - - // Perform compound assignment and division by a scalar (scalar data contained in vector data type) - // - inline Vector4 & operator /=( const floatInVec &scalar ); - - // Negate all elements of a 4-D vector - // - inline const Vector4 operator -( ) const; - - // Construct x axis - // - static inline const Vector4 xAxis( ); - - // Construct y axis - // - static inline const Vector4 yAxis( ); - - // Construct z axis - // - static inline const Vector4 zAxis( ); - - // Construct w axis - // - static inline const Vector4 wAxis( ); - -}; - -// Multiply a 4-D vector by a scalar -// -inline const Vector4 operator *( float scalar, const Vector4 &vec ); - -// Multiply a 4-D vector by a scalar (scalar data contained in vector data type) -// -inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ); - -// Multiply two 4-D vectors per element -// -inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ); - -// Divide two 4-D vectors per element -// NOTE: -// Floating-point behavior matches standard library function divf4. -// -inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ); - -// Compute the reciprocal of a 4-D vector per element -// NOTE: -// Floating-point behavior matches standard library function recipf4. -// -inline const Vector4 recipPerElem( const Vector4 &vec ); - -// Compute the absolute value of a 4-D vector per element -// -inline const Vector4 absPerElem( const Vector4 &vec ); - -// Copy sign from one 4-D vector to another, per element -// -inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ); - -// Maximum of two 4-D vectors per element -// -inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ); - -// Minimum of two 4-D vectors per element -// -inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ); - -// Maximum element of a 4-D vector -// -inline const floatInVec maxElem( const Vector4 &vec ); - -// Minimum element of a 4-D vector -// -inline const floatInVec minElem( const Vector4 &vec ); - -// Compute the sum of all elements of a 4-D vector -// -inline const floatInVec sum( const Vector4 &vec ); - -// Compute the dot product of two 4-D vectors -// -inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ); - -// Compute the square of the length of a 4-D vector -// -inline const floatInVec lengthSqr( const Vector4 &vec ); - -// Compute the length of a 4-D vector -// -inline const floatInVec length( const Vector4 &vec ); - -// Normalize a 4-D vector -// NOTE: -// The result is unpredictable when all elements of vec are at or near zero. -// -inline const Vector4 normalize( const Vector4 &vec ); - -// Outer product of two 4-D vectors -// -inline const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 ); - -// Linear interpolation between two 4-D vectors -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ); - -// Linear interpolation between two 4-D vectors (scalar data contained in vector data type) -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ); - -// Spherical linear interpolation between two 4-D vectors -// NOTE: -// The result is unpredictable if the vectors point in opposite directions. -// Does not clamp t between 0 and 1. -// -inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); - -// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type) -// NOTE: -// The result is unpredictable if the vectors point in opposite directions. -// Does not clamp t between 0 and 1. -// -inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); - -// Conditionally select between two 4-D vectors -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ); - -// Conditionally select between two 4-D vectors (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ); - -// Store four 4-D vectors as half-floats -// -inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 4-D vector -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Vector4 &vec ); - -// Print a 4-D vector and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Vector4 &vec, const char * name ); - -#endif - -// A 3-D point in array-of-structures format -// -class Point3 -{ - __m128 mVec128; - -public: - // Default constructor; does no initialization - // - inline Point3( ) { }; - - // Construct a 3-D point from x, y, and z elements - // - inline Point3( float x, float y, float z ); - - // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type) - // - inline Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); - - // Copy elements from a 3-D vector into a 3-D point - // - explicit inline Point3( const Vector3 &vec ); - - // Set all elements of a 3-D point to the same scalar value - // - explicit inline Point3( float scalar ); - - // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type) - // - explicit inline Point3( const floatInVec &scalar ); - - // Set vector float data in a 3-D point - // - explicit inline Point3( __m128 vf4 ); - - // Get vector float data from a 3-D point - // - inline __m128 get128( ) const; - - // Assign one 3-D point to another - // - inline Point3 & operator =( const Point3 &pnt ); - - // Set the x element of a 3-D point - // - inline Point3 & setX( float x ); - - // Set the y element of a 3-D point - // - inline Point3 & setY( float y ); - - // Set the z element of a 3-D point - // - inline Point3 & setZ( float z ); - - // Set the x element of a 3-D point (scalar data contained in vector data type) - // - inline Point3 & setX( const floatInVec &x ); - - // Set the y element of a 3-D point (scalar data contained in vector data type) - // - inline Point3 & setY( const floatInVec &y ); - - // Set the z element of a 3-D point (scalar data contained in vector data type) - // - inline Point3 & setZ( const floatInVec &z ); - - // Get the x element of a 3-D point - // - inline const floatInVec getX( ) const; - - // Get the y element of a 3-D point - // - inline const floatInVec getY( ) const; - - // Get the z element of a 3-D point - // - inline const floatInVec getZ( ) const; - - // Set an x, y, or z element of a 3-D point by index - // - inline Point3 & setElem( int idx, float value ); - - // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type) - // - inline Point3 & setElem( int idx, const floatInVec &value ); - - // Get an x, y, or z element of a 3-D point by index - // - inline const floatInVec getElem( int idx ) const; - - // Subscripting operator to set or get an element - // - inline VecIdx operator []( int idx ); - - // Subscripting operator to get an element - // - inline const floatInVec operator []( int idx ) const; - - // Subtract a 3-D point from another 3-D point - // - inline const Vector3 operator -( const Point3 &pnt ) const; - - // Add a 3-D point to a 3-D vector - // - inline const Point3 operator +( const Vector3 &vec ) const; - - // Subtract a 3-D vector from a 3-D point - // - inline const Point3 operator -( const Vector3 &vec ) const; - - // Perform compound assignment and addition with a 3-D vector - // - inline Point3 & operator +=( const Vector3 &vec ); - - // Perform compound assignment and subtraction by a 3-D vector - // - inline Point3 & operator -=( const Vector3 &vec ); - -}; - -// Multiply two 3-D points per element -// -inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ); - -// Divide two 3-D points per element -// NOTE: -// Floating-point behavior matches standard library function divf4. -// -inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ); - -// Compute the reciprocal of a 3-D point per element -// NOTE: -// Floating-point behavior matches standard library function recipf4. -// -inline const Point3 recipPerElem( const Point3 &pnt ); - -// Compute the absolute value of a 3-D point per element -// -inline const Point3 absPerElem( const Point3 &pnt ); - -// Copy sign from one 3-D point to another, per element -// -inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ); - -// Maximum of two 3-D points per element -// -inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ); - -// Minimum of two 3-D points per element -// -inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ); - -// Maximum element of a 3-D point -// -inline const floatInVec maxElem( const Point3 &pnt ); - -// Minimum element of a 3-D point -// -inline const floatInVec minElem( const Point3 &pnt ); - -// Compute the sum of all elements of a 3-D point -// -inline const floatInVec sum( const Point3 &pnt ); - -// Apply uniform scale to a 3-D point -// -inline const Point3 scale( const Point3 &pnt, float scaleVal ); - -// Apply uniform scale to a 3-D point (scalar data contained in vector data type) -// -inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ); - -// Apply non-uniform scale to a 3-D point -// -inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ); - -// Scalar projection of a 3-D point on a unit-length 3-D vector -// -inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ); - -// Compute the square of the distance of a 3-D point from the coordinate-system origin -// -inline const floatInVec distSqrFromOrigin( const Point3 &pnt ); - -// Compute the distance of a 3-D point from the coordinate-system origin -// -inline const floatInVec distFromOrigin( const Point3 &pnt ); - -// Compute the square of the distance between two 3-D points -// -inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ); - -// Compute the distance between two 3-D points -// -inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ); - -// Linear interpolation between two 3-D points -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ); - -// Linear interpolation between two 3-D points (scalar data contained in vector data type) -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ); - -// Conditionally select between two 3-D points -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ); - -// Conditionally select between two 3-D points (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ); - -// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word -// -inline void storeXYZ( const Point3 &pnt, __m128 * quad ); - -// Load four three-float 3-D points, stored in three quadwords -// -inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ); - -// Store four 3-D points in three quadwords -// -inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ); - -// Store eight 3-D points as half-floats -// -inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 3-D point -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Point3 &pnt ); - -// Print a 3-D point and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Point3 &pnt, const char * name ); - -#endif - -// A quaternion in array-of-structures format -// -class Quat -{ - __m128 mVec128; - -public: - // Default constructor; does no initialization - // - inline Quat( ) { }; - - // Construct a quaternion from x, y, z, and w elements - // - inline Quat( float x, float y, float z, float w ); - - // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type) - // - inline Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); - - // Construct a quaternion from a 3-D vector and a scalar - // - inline Quat( const Vector3 &xyz, float w ); - - // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type) - // - inline Quat( const Vector3 &xyz, const floatInVec &w ); - - // Copy elements from a 4-D vector into a quaternion - // - explicit inline Quat( const Vector4 &vec ); - - // Convert a rotation matrix to a unit-length quaternion - // - explicit inline Quat( const Matrix3 & rotMat ); - - // Set all elements of a quaternion to the same scalar value - // - explicit inline Quat( float scalar ); - - // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type) - // - explicit inline Quat( const floatInVec &scalar ); - - // Set vector float data in a quaternion - // - explicit inline Quat( __m128 vf4 ); - - // Get vector float data from a quaternion - // - inline __m128 get128( ) const; - - // Assign one quaternion to another - // - inline Quat & operator =( const Quat &quat ); - - // Set the x, y, and z elements of a quaternion - // NOTE: - // This function does not change the w element. - // - inline Quat & setXYZ( const Vector3 &vec ); - - // Get the x, y, and z elements of a quaternion - // - inline const Vector3 getXYZ( ) const; - - // Set the x element of a quaternion - // - inline Quat & setX( float x ); - - // Set the y element of a quaternion - // - inline Quat & setY( float y ); - - // Set the z element of a quaternion - // - inline Quat & setZ( float z ); - - // Set the w element of a quaternion - // - inline Quat & setW( float w ); - - // Set the x element of a quaternion (scalar data contained in vector data type) - // - inline Quat & setX( const floatInVec &x ); - - // Set the y element of a quaternion (scalar data contained in vector data type) - // - inline Quat & setY( const floatInVec &y ); - - // Set the z element of a quaternion (scalar data contained in vector data type) - // - inline Quat & setZ( const floatInVec &z ); - - // Set the w element of a quaternion (scalar data contained in vector data type) - // - inline Quat & setW( const floatInVec &w ); - - // Get the x element of a quaternion - // - inline const floatInVec getX( ) const; - - // Get the y element of a quaternion - // - inline const floatInVec getY( ) const; - - // Get the z element of a quaternion - // - inline const floatInVec getZ( ) const; - - // Get the w element of a quaternion - // - inline const floatInVec getW( ) const; - - // Set an x, y, z, or w element of a quaternion by index - // - inline Quat & setElem( int idx, float value ); - - // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type) - // - inline Quat & setElem( int idx, const floatInVec &value ); - - // Get an x, y, z, or w element of a quaternion by index - // - inline const floatInVec getElem( int idx ) const; - - // Subscripting operator to set or get an element - // - inline VecIdx operator []( int idx ); - - // Subscripting operator to get an element - // - inline const floatInVec operator []( int idx ) const; - - // Add two quaternions - // - inline const Quat operator +( const Quat &quat ) const; - - // Subtract a quaternion from another quaternion - // - inline const Quat operator -( const Quat &quat ) const; - - // Multiply two quaternions - // - inline const Quat operator *( const Quat &quat ) const; - - // Multiply a quaternion by a scalar - // - inline const Quat operator *( float scalar ) const; - - // Divide a quaternion by a scalar - // - inline const Quat operator /( float scalar ) const; - - // Multiply a quaternion by a scalar (scalar data contained in vector data type) - // - inline const Quat operator *( const floatInVec &scalar ) const; - - // Divide a quaternion by a scalar (scalar data contained in vector data type) - // - inline const Quat operator /( const floatInVec &scalar ) const; - - // Perform compound assignment and addition with a quaternion - // - inline Quat & operator +=( const Quat &quat ); - - // Perform compound assignment and subtraction by a quaternion - // - inline Quat & operator -=( const Quat &quat ); - - // Perform compound assignment and multiplication by a quaternion - // - inline Quat & operator *=( const Quat &quat ); - - // Perform compound assignment and multiplication by a scalar - // - inline Quat & operator *=( float scalar ); - - // Perform compound assignment and division by a scalar - // - inline Quat & operator /=( float scalar ); - - // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) - // - inline Quat & operator *=( const floatInVec &scalar ); - - // Perform compound assignment and division by a scalar (scalar data contained in vector data type) - // - inline Quat & operator /=( const floatInVec &scalar ); - - // Negate all elements of a quaternion - // - inline const Quat operator -( ) const; - - // Construct an identity quaternion - // - static inline const Quat identity( ); - - // Construct a quaternion to rotate between two unit-length 3-D vectors - // NOTE: - // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. - // - static inline const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ); - - // Construct a quaternion to rotate around a unit-length 3-D vector - // - static inline const Quat rotation( float radians, const Vector3 &unitVec ); - - // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type) - // - static inline const Quat rotation( const floatInVec &radians, const Vector3 &unitVec ); - - // Construct a quaternion to rotate around the x axis - // - static inline const Quat rotationX( float radians ); - - // Construct a quaternion to rotate around the y axis - // - static inline const Quat rotationY( float radians ); - - // Construct a quaternion to rotate around the z axis - // - static inline const Quat rotationZ( float radians ); - - // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type) - // - static inline const Quat rotationX( const floatInVec &radians ); - - // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type) - // - static inline const Quat rotationY( const floatInVec &radians ); - - // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type) - // - static inline const Quat rotationZ( const floatInVec &radians ); - -}; - -// Multiply a quaternion by a scalar -// -inline const Quat operator *( float scalar, const Quat &quat ); - -// Multiply a quaternion by a scalar (scalar data contained in vector data type) -// -inline const Quat operator *( const floatInVec &scalar, const Quat &quat ); - -// Compute the conjugate of a quaternion -// -inline const Quat conj( const Quat &quat ); - -// Use a unit-length quaternion to rotate a 3-D vector -// -inline const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec ); - -// Compute the dot product of two quaternions -// -inline const floatInVec dot( const Quat &quat0, const Quat &quat1 ); - -// Compute the norm of a quaternion -// -inline const floatInVec norm( const Quat &quat ); - -// Compute the length of a quaternion -// -inline const floatInVec length( const Quat &quat ); - -// Normalize a quaternion -// NOTE: -// The result is unpredictable when all elements of quat are at or near zero. -// -inline const Quat normalize( const Quat &quat ); - -// Linear interpolation between two quaternions -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ); - -// Linear interpolation between two quaternions (scalar data contained in vector data type) -// NOTE: -// Does not clamp t between 0 and 1. -// -inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ); - -// Spherical linear interpolation between two quaternions -// NOTE: -// Interpolates along the shortest path between orientations. -// Does not clamp t between 0 and 1. -// -inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ); - -// Spherical linear interpolation between two quaternions (scalar data contained in vector data type) -// NOTE: -// Interpolates along the shortest path between orientations. -// Does not clamp t between 0 and 1. -// -inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ); - -// Spherical quadrangle interpolation -// -inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); - -// Spherical quadrangle interpolation (scalar data contained in vector data type) -// -inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); - -// Conditionally select between two quaternions -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ); - -// Conditionally select between two quaternions (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ); - -#ifdef _VECTORMATH_DEBUG - -// Print a quaternion -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Quat &quat ); - -// Print a quaternion and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Quat &quat, const char * name ); - -#endif - -// A 3x3 matrix in array-of-structures format -// -class Matrix3 -{ - Vector3 mCol0; - Vector3 mCol1; - Vector3 mCol2; - -public: - // Default constructor; does no initialization - // - inline Matrix3( ) { }; - - // Copy a 3x3 matrix - // - inline Matrix3( const Matrix3 & mat ); - - // Construct a 3x3 matrix containing the specified columns - // - inline Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 ); - - // Construct a 3x3 rotation matrix from a unit-length quaternion - // - explicit inline Matrix3( const Quat &unitQuat ); - - // Set all elements of a 3x3 matrix to the same scalar value - // - explicit inline Matrix3( float scalar ); - - // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type) - // - explicit inline Matrix3( const floatInVec &scalar ); - - // Assign one 3x3 matrix to another - // - inline Matrix3 & operator =( const Matrix3 & mat ); - - // Set column 0 of a 3x3 matrix - // - inline Matrix3 & setCol0( const Vector3 &col0 ); - - // Set column 1 of a 3x3 matrix - // - inline Matrix3 & setCol1( const Vector3 &col1 ); - - // Set column 2 of a 3x3 matrix - // - inline Matrix3 & setCol2( const Vector3 &col2 ); - - // Get column 0 of a 3x3 matrix - // - inline const Vector3 getCol0( ) const; - - // Get column 1 of a 3x3 matrix - // - inline const Vector3 getCol1( ) const; - - // Get column 2 of a 3x3 matrix - // - inline const Vector3 getCol2( ) const; - - // Set the column of a 3x3 matrix referred to by the specified index - // - inline Matrix3 & setCol( int col, const Vector3 &vec ); - - // Set the row of a 3x3 matrix referred to by the specified index - // - inline Matrix3 & setRow( int row, const Vector3 &vec ); - - // Get the column of a 3x3 matrix referred to by the specified index - // - inline const Vector3 getCol( int col ) const; - - // Get the row of a 3x3 matrix referred to by the specified index - // - inline const Vector3 getRow( int row ) const; - - // Subscripting operator to set or get a column - // - inline Vector3 & operator []( int col ); - - // Subscripting operator to get a column - // - inline const Vector3 operator []( int col ) const; - - // Set the element of a 3x3 matrix referred to by column and row indices - // - inline Matrix3 & setElem( int col, int row, float val ); - - // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type) - // - inline Matrix3 & setElem( int col, int row, const floatInVec &val ); - - // Get the element of a 3x3 matrix referred to by column and row indices - // - inline const floatInVec getElem( int col, int row ) const; - - // Add two 3x3 matrices - // - inline const Matrix3 operator +( const Matrix3 & mat ) const; - - // Subtract a 3x3 matrix from another 3x3 matrix - // - inline const Matrix3 operator -( const Matrix3 & mat ) const; - - // Negate all elements of a 3x3 matrix - // - inline const Matrix3 operator -( ) const; - - // Multiply a 3x3 matrix by a scalar - // - inline const Matrix3 operator *( float scalar ) const; - - // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) - // - inline const Matrix3 operator *( const floatInVec &scalar ) const; - - // Multiply a 3x3 matrix by a 3-D vector - // - inline const Vector3 operator *( const Vector3 &vec ) const; - - // Multiply two 3x3 matrices - // - inline const Matrix3 operator *( const Matrix3 & mat ) const; - - // Perform compound assignment and addition with a 3x3 matrix - // - inline Matrix3 & operator +=( const Matrix3 & mat ); - - // Perform compound assignment and subtraction by a 3x3 matrix - // - inline Matrix3 & operator -=( const Matrix3 & mat ); - - // Perform compound assignment and multiplication by a scalar - // - inline Matrix3 & operator *=( float scalar ); - - // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) - // - inline Matrix3 & operator *=( const floatInVec &scalar ); - - // Perform compound assignment and multiplication by a 3x3 matrix - // - inline Matrix3 & operator *=( const Matrix3 & mat ); - - // Construct an identity 3x3 matrix - // - static inline const Matrix3 identity( ); - - // Construct a 3x3 matrix to rotate around the x axis - // - static inline const Matrix3 rotationX( float radians ); - - // Construct a 3x3 matrix to rotate around the y axis - // - static inline const Matrix3 rotationY( float radians ); - - // Construct a 3x3 matrix to rotate around the z axis - // - static inline const Matrix3 rotationZ( float radians ); - - // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type) - // - static inline const Matrix3 rotationX( const floatInVec &radians ); - - // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type) - // - static inline const Matrix3 rotationY( const floatInVec &radians ); - - // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type) - // - static inline const Matrix3 rotationZ( const floatInVec &radians ); - - // Construct a 3x3 matrix to rotate around the x, y, and z axes - // - static inline const Matrix3 rotationZYX( const Vector3 &radiansXYZ ); - - // Construct a 3x3 matrix to rotate around a unit-length 3-D vector - // - static inline const Matrix3 rotation( float radians, const Vector3 &unitVec ); - - // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) - // - static inline const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec ); - - // Construct a rotation matrix from a unit-length quaternion - // - static inline const Matrix3 rotation( const Quat &unitQuat ); - - // Construct a 3x3 matrix to perform scaling - // - static inline const Matrix3 scale( const Vector3 &scaleVec ); - -}; -// Multiply a 3x3 matrix by a scalar -// -inline const Matrix3 operator *( float scalar, const Matrix3 & mat ); - -// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) -// -inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ); - -// Append (post-multiply) a scale transformation to a 3x3 matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ); - -// Prepend (pre-multiply) a scale transformation to a 3x3 matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ); - -// Multiply two 3x3 matrices per element -// -inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); - -// Compute the absolute value of a 3x3 matrix per element -// -inline const Matrix3 absPerElem( const Matrix3 & mat ); - -// Transpose of a 3x3 matrix -// -inline const Matrix3 transpose( const Matrix3 & mat ); - -// Compute the inverse of a 3x3 matrix -// NOTE: -// Result is unpredictable when the determinant of mat is equal to or near 0. -// -inline const Matrix3 inverse( const Matrix3 & mat ); - -// Determinant of a 3x3 matrix -// -inline const floatInVec determinant( const Matrix3 & mat ); - -// Conditionally select between two 3x3 matrices -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); - -// Conditionally select between two 3x3 matrices (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 3x3 matrix -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Matrix3 & mat ); - -// Print a 3x3 matrix and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Matrix3 & mat, const char * name ); - -#endif - -// A 4x4 matrix in array-of-structures format -// -class Matrix4 -{ - Vector4 mCol0; - Vector4 mCol1; - Vector4 mCol2; - Vector4 mCol3; - -public: - // Default constructor; does no initialization - // - inline Matrix4( ) { }; - - // Copy a 4x4 matrix - // - inline Matrix4( const Matrix4 & mat ); - - // Construct a 4x4 matrix containing the specified columns - // - inline Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 ); - - // Construct a 4x4 matrix from a 3x4 transformation matrix - // - explicit inline Matrix4( const Transform3 & mat ); - - // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector - // - inline Matrix4( const Matrix3 & mat, const Vector3 &translateVec ); - - // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector - // - inline Matrix4( const Quat &unitQuat, const Vector3 &translateVec ); - - // Set all elements of a 4x4 matrix to the same scalar value - // - explicit inline Matrix4( float scalar ); - - // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type) - // - explicit inline Matrix4( const floatInVec &scalar ); - - // Assign one 4x4 matrix to another - // - inline Matrix4 & operator =( const Matrix4 & mat ); - - // Set the upper-left 3x3 submatrix - // NOTE: - // This function does not change the bottom row elements. - // - inline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); - - // Get the upper-left 3x3 submatrix of a 4x4 matrix - // - inline const Matrix3 getUpper3x3( ) const; - - // Set translation component - // NOTE: - // This function does not change the bottom row elements. - // - inline Matrix4 & setTranslation( const Vector3 &translateVec ); - - // Get the translation component of a 4x4 matrix - // - inline const Vector3 getTranslation( ) const; - - // Set column 0 of a 4x4 matrix - // - inline Matrix4 & setCol0( const Vector4 &col0 ); - - // Set column 1 of a 4x4 matrix - // - inline Matrix4 & setCol1( const Vector4 &col1 ); - - // Set column 2 of a 4x4 matrix - // - inline Matrix4 & setCol2( const Vector4 &col2 ); - - // Set column 3 of a 4x4 matrix - // - inline Matrix4 & setCol3( const Vector4 &col3 ); - - // Get column 0 of a 4x4 matrix - // - inline const Vector4 getCol0( ) const; - - // Get column 1 of a 4x4 matrix - // - inline const Vector4 getCol1( ) const; - - // Get column 2 of a 4x4 matrix - // - inline const Vector4 getCol2( ) const; - - // Get column 3 of a 4x4 matrix - // - inline const Vector4 getCol3( ) const; - - // Set the column of a 4x4 matrix referred to by the specified index - // - inline Matrix4 & setCol( int col, const Vector4 &vec ); - - // Set the row of a 4x4 matrix referred to by the specified index - // - inline Matrix4 & setRow( int row, const Vector4 &vec ); - - // Get the column of a 4x4 matrix referred to by the specified index - // - inline const Vector4 getCol( int col ) const; - - // Get the row of a 4x4 matrix referred to by the specified index - // - inline const Vector4 getRow( int row ) const; - - // Subscripting operator to set or get a column - // - inline Vector4 & operator []( int col ); - - // Subscripting operator to get a column - // - inline const Vector4 operator []( int col ) const; - - // Set the element of a 4x4 matrix referred to by column and row indices - // - inline Matrix4 & setElem( int col, int row, float val ); - - // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type) - // - inline Matrix4 & setElem( int col, int row, const floatInVec &val ); - - // Get the element of a 4x4 matrix referred to by column and row indices - // - inline const floatInVec getElem( int col, int row ) const; - - // Add two 4x4 matrices - // - inline const Matrix4 operator +( const Matrix4 & mat ) const; - - // Subtract a 4x4 matrix from another 4x4 matrix - // - inline const Matrix4 operator -( const Matrix4 & mat ) const; - - // Negate all elements of a 4x4 matrix - // - inline const Matrix4 operator -( ) const; - - // Multiply a 4x4 matrix by a scalar - // - inline const Matrix4 operator *( float scalar ) const; - - // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) - // - inline const Matrix4 operator *( const floatInVec &scalar ) const; - - // Multiply a 4x4 matrix by a 4-D vector - // - inline const Vector4 operator *( const Vector4 &vec ) const; - - // Multiply a 4x4 matrix by a 3-D vector - // - inline const Vector4 operator *( const Vector3 &vec ) const; - - // Multiply a 4x4 matrix by a 3-D point - // - inline const Vector4 operator *( const Point3 &pnt ) const; - - // Multiply two 4x4 matrices - // - inline const Matrix4 operator *( const Matrix4 & mat ) const; - - // Multiply a 4x4 matrix by a 3x4 transformation matrix - // - inline const Matrix4 operator *( const Transform3 & tfrm ) const; - - // Perform compound assignment and addition with a 4x4 matrix - // - inline Matrix4 & operator +=( const Matrix4 & mat ); - - // Perform compound assignment and subtraction by a 4x4 matrix - // - inline Matrix4 & operator -=( const Matrix4 & mat ); - - // Perform compound assignment and multiplication by a scalar - // - inline Matrix4 & operator *=( float scalar ); - - // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) - // - inline Matrix4 & operator *=( const floatInVec &scalar ); - - // Perform compound assignment and multiplication by a 4x4 matrix - // - inline Matrix4 & operator *=( const Matrix4 & mat ); - - // Perform compound assignment and multiplication by a 3x4 transformation matrix - // - inline Matrix4 & operator *=( const Transform3 & tfrm ); - - // Construct an identity 4x4 matrix - // - static inline const Matrix4 identity( ); - - // Construct a 4x4 matrix to rotate around the x axis - // - static inline const Matrix4 rotationX( float radians ); - - // Construct a 4x4 matrix to rotate around the y axis - // - static inline const Matrix4 rotationY( float radians ); - - // Construct a 4x4 matrix to rotate around the z axis - // - static inline const Matrix4 rotationZ( float radians ); - - // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type) - // - static inline const Matrix4 rotationX( const floatInVec &radians ); - - // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type) - // - static inline const Matrix4 rotationY( const floatInVec &radians ); - - // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type) - // - static inline const Matrix4 rotationZ( const floatInVec &radians ); - - // Construct a 4x4 matrix to rotate around the x, y, and z axes - // - static inline const Matrix4 rotationZYX( const Vector3 &radiansXYZ ); - - // Construct a 4x4 matrix to rotate around a unit-length 3-D vector - // - static inline const Matrix4 rotation( float radians, const Vector3 &unitVec ); - - // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) - // - static inline const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec ); - - // Construct a rotation matrix from a unit-length quaternion - // - static inline const Matrix4 rotation( const Quat &unitQuat ); - - // Construct a 4x4 matrix to perform scaling - // - static inline const Matrix4 scale( const Vector3 &scaleVec ); - - // Construct a 4x4 matrix to perform translation - // - static inline const Matrix4 translation( const Vector3 &translateVec ); - - // Construct viewing matrix based on eye, position looked at, and up direction - // - static inline const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ); - - // Construct a perspective projection matrix - // - static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); - - // Construct a perspective projection matrix based on frustum - // - static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); - - // Construct an orthographic projection matrix - // - static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); - -}; -// Multiply a 4x4 matrix by a scalar -// -inline const Matrix4 operator *( float scalar, const Matrix4 & mat ); - -// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) -// -inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ); - -// Append (post-multiply) a scale transformation to a 4x4 matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ); - -// Prepend (pre-multiply) a scale transformation to a 4x4 matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ); - -// Multiply two 4x4 matrices per element -// -inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); - -// Compute the absolute value of a 4x4 matrix per element -// -inline const Matrix4 absPerElem( const Matrix4 & mat ); - -// Transpose of a 4x4 matrix -// -inline const Matrix4 transpose( const Matrix4 & mat ); - -// Compute the inverse of a 4x4 matrix -// NOTE: -// Result is unpredictable when the determinant of mat is equal to or near 0. -// -inline const Matrix4 inverse( const Matrix4 & mat ); - -// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix -// NOTE: -// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. -// -inline const Matrix4 affineInverse( const Matrix4 & mat ); - -// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix -// NOTE: -// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. -// -inline const Matrix4 orthoInverse( const Matrix4 & mat ); - -// Determinant of a 4x4 matrix -// -inline const floatInVec determinant( const Matrix4 & mat ); - -// Conditionally select between two 4x4 matrices -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); - -// Conditionally select between two 4x4 matrices (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 4x4 matrix -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Matrix4 & mat ); - -// Print a 4x4 matrix and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Matrix4 & mat, const char * name ); - -#endif - -// A 3x4 transformation matrix in array-of-structures format -// -class Transform3 -{ - Vector3 mCol0; - Vector3 mCol1; - Vector3 mCol2; - Vector3 mCol3; - -public: - // Default constructor; does no initialization - // - inline Transform3( ) { }; - - // Copy a 3x4 transformation matrix - // - inline Transform3( const Transform3 & tfrm ); - - // Construct a 3x4 transformation matrix containing the specified columns - // - inline Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 ); - - // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector - // - inline Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ); - - // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector - // - inline Transform3( const Quat &unitQuat, const Vector3 &translateVec ); - - // Set all elements of a 3x4 transformation matrix to the same scalar value - // - explicit inline Transform3( float scalar ); - - // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type) - // - explicit inline Transform3( const floatInVec &scalar ); - - // Assign one 3x4 transformation matrix to another - // - inline Transform3 & operator =( const Transform3 & tfrm ); - - // Set the upper-left 3x3 submatrix - // - inline Transform3 & setUpper3x3( const Matrix3 & mat3 ); - - // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix - // - inline const Matrix3 getUpper3x3( ) const; - - // Set translation component - // - inline Transform3 & setTranslation( const Vector3 &translateVec ); - - // Get the translation component of a 3x4 transformation matrix - // - inline const Vector3 getTranslation( ) const; - - // Set column 0 of a 3x4 transformation matrix - // - inline Transform3 & setCol0( const Vector3 &col0 ); - - // Set column 1 of a 3x4 transformation matrix - // - inline Transform3 & setCol1( const Vector3 &col1 ); - - // Set column 2 of a 3x4 transformation matrix - // - inline Transform3 & setCol2( const Vector3 &col2 ); - - // Set column 3 of a 3x4 transformation matrix - // - inline Transform3 & setCol3( const Vector3 &col3 ); - - // Get column 0 of a 3x4 transformation matrix - // - inline const Vector3 getCol0( ) const; - - // Get column 1 of a 3x4 transformation matrix - // - inline const Vector3 getCol1( ) const; - - // Get column 2 of a 3x4 transformation matrix - // - inline const Vector3 getCol2( ) const; - - // Get column 3 of a 3x4 transformation matrix - // - inline const Vector3 getCol3( ) const; - - // Set the column of a 3x4 transformation matrix referred to by the specified index - // - inline Transform3 & setCol( int col, const Vector3 &vec ); - - // Set the row of a 3x4 transformation matrix referred to by the specified index - // - inline Transform3 & setRow( int row, const Vector4 &vec ); - - // Get the column of a 3x4 transformation matrix referred to by the specified index - // - inline const Vector3 getCol( int col ) const; - - // Get the row of a 3x4 transformation matrix referred to by the specified index - // - inline const Vector4 getRow( int row ) const; - - // Subscripting operator to set or get a column - // - inline Vector3 & operator []( int col ); - - // Subscripting operator to get a column - // - inline const Vector3 operator []( int col ) const; - - // Set the element of a 3x4 transformation matrix referred to by column and row indices - // - inline Transform3 & setElem( int col, int row, float val ); - - // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type) - // - inline Transform3 & setElem( int col, int row, const floatInVec &val ); - - // Get the element of a 3x4 transformation matrix referred to by column and row indices - // - inline const floatInVec getElem( int col, int row ) const; - - // Multiply a 3x4 transformation matrix by a 3-D vector - // - inline const Vector3 operator *( const Vector3 &vec ) const; - - // Multiply a 3x4 transformation matrix by a 3-D point - // - inline const Point3 operator *( const Point3 &pnt ) const; - - // Multiply two 3x4 transformation matrices - // - inline const Transform3 operator *( const Transform3 & tfrm ) const; - - // Perform compound assignment and multiplication by a 3x4 transformation matrix - // - inline Transform3 & operator *=( const Transform3 & tfrm ); - - // Construct an identity 3x4 transformation matrix - // - static inline const Transform3 identity( ); - - // Construct a 3x4 transformation matrix to rotate around the x axis - // - static inline const Transform3 rotationX( float radians ); - - // Construct a 3x4 transformation matrix to rotate around the y axis - // - static inline const Transform3 rotationY( float radians ); - - // Construct a 3x4 transformation matrix to rotate around the z axis - // - static inline const Transform3 rotationZ( float radians ); - - // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type) - // - static inline const Transform3 rotationX( const floatInVec &radians ); - - // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type) - // - static inline const Transform3 rotationY( const floatInVec &radians ); - - // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type) - // - static inline const Transform3 rotationZ( const floatInVec &radians ); - - // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes - // - static inline const Transform3 rotationZYX( const Vector3 &radiansXYZ ); - - // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector - // - static inline const Transform3 rotation( float radians, const Vector3 &unitVec ); - - // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) - // - static inline const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec ); - - // Construct a rotation matrix from a unit-length quaternion - // - static inline const Transform3 rotation( const Quat &unitQuat ); - - // Construct a 3x4 transformation matrix to perform scaling - // - static inline const Transform3 scale( const Vector3 &scaleVec ); - - // Construct a 3x4 transformation matrix to perform translation - // - static inline const Transform3 translation( const Vector3 &translateVec ); - -}; -// Append (post-multiply) a scale transformation to a 3x4 transformation matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ); - -// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix -// NOTE: -// Faster than creating and multiplying a scale transformation matrix. -// -inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ); - -// Multiply two 3x4 transformation matrices per element -// -inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); - -// Compute the absolute value of a 3x4 transformation matrix per element -// -inline const Transform3 absPerElem( const Transform3 & tfrm ); - -// Inverse of a 3x4 transformation matrix -// NOTE: -// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. -// -inline const Transform3 inverse( const Transform3 & tfrm ); - -// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix -// NOTE: -// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. -// -inline const Transform3 orthoInverse( const Transform3 & tfrm ); - -// Conditionally select between two 3x4 transformation matrices -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// However, the transfer of select1 to a VMX register may use more processing time than a branch. -// Use the boolInVec version for better performance. -// -inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); - -// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type) -// NOTE: -// This function uses a conditional select instruction to avoid a branch. -// -inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ); - -#ifdef _VECTORMATH_DEBUG - -// Print a 3x4 transformation matrix -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Transform3 & tfrm ); - -// Print a 3x4 transformation matrix and an associated string identifier -// NOTE: -// Function is only defined when _VECTORMATH_DEBUG is defined. -// -inline void print( const Transform3 & tfrm, const char * name ); - -#endif - -} // namespace Aos -} // namespace Vectormath - -#include "vec_aos.h" -#include "quat_aos.h" -#include "mat_aos.h" - -#endif +} + +static __forceinline __m128 acosf4(__m128 x) +{ + __m128 xabs = fabsf4(x); + __m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() ); + __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + __m128 xabs2 = _mm_mul_ps(xabs, xabs); + __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); + __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f), + xabs, _mm_set1_ps(0.0066700901f)), + xabs, _mm_set1_ps(-0.0170881256f)), + xabs, _mm_set1_ps( 0.0308918810f)); + __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f), + xabs, _mm_set1_ps(0.0889789874f)), + xabs, _mm_set1_ps(-0.2145988016f)), + xabs, _mm_set1_ps( 1.5707963050f)); + + __m128 result = vec_madd(hi, xabs4, lo); + + // Adjust the result if x is negactive. + return vec_sel( + vec_mul(t1, result), // Positive + vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative + select); +} + +static __forceinline __m128 sinf4(vec_float4 x) +{ + +// +// Common constants used to evaluate sinf4/cosf4/tanf4 +// +#define _SINCOS_CC0 -0.0013602249f +#define _SINCOS_CC1 0.0416566950f +#define _SINCOS_CC2 -0.4999990225f +#define _SINCOS_SC0 -0.0001950727f +#define _SINCOS_SC1 0.0083320758f +#define _SINCOS_SC2 -0.1666665247f + +#define _SINCOS_KC1 1.57079625129f +#define _SINCOS_KC2 7.54978995489e-8f + + vec_float4 xl,xl2,xl3,res; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + vec_int4 q = vec_cts(xl,0); + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = _mm_and_ps(q,toM128(0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset, + toM128(0x1)), + _mm_setzero_ps())); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + return vec_sel( + vec_xor(toM128(0x80000000U), res), // Negative + res, // Positive + vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps())); +} + +static __forceinline void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0); + vec_int4 q = vec_cts(xl,0); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = vec_and(q,toM128((int)0x3)); + __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin); + offsetCos = (__m128 &)temp; + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps()); + vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps()); + *s = vec_sel(cx,sx,sinMask); + *c = vec_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps()); + cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps()); + + *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask); + *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask); +} + +#include "vecidx_aos.h" +#include "floatInVec.h" +#include "boolInVec.h" + +#ifdef _VECTORMATH_DEBUG +#include +#endif +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + __m128 mVec128; + + __forceinline void set128(vec_float4 vec); + + __forceinline vec_float4& get128Ref(); + +public: + // Default constructor; does no initialization + // + __forceinline Vector3( ) { }; + + // Default copy constructor + // + __forceinline Vector3(const Vector3& vec); + + // Construct a 3-D vector from x, y, and z elements + // + __forceinline Vector3( float x, float y, float z ); + + // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type) + // + __forceinline Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit __forceinline Vector3( const Point3 &pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit __forceinline Vector3( float scalar ); + + // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Vector3( const floatInVec &scalar ); + + // Set vector float data in a 3-D vector + // + explicit __forceinline Vector3( __m128 vf4 ); + + // Get vector float data from a 3-D vector + // + __forceinline __m128 get128( ) const; + + // Assign one 3-D vector to another + // + __forceinline Vector3 & operator =( const Vector3 &vec ); + + // Set the x element of a 3-D vector + // + __forceinline Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + __forceinline Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + __forceinline Vector3 & setZ( float z ); + + // Set the x element of a 3-D vector (scalar data contained in vector data type) + // + __forceinline Vector3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D vector (scalar data contained in vector data type) + // + __forceinline Vector3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D vector (scalar data contained in vector data type) + // + __forceinline Vector3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D vector + // + __forceinline const floatInVec getX( ) const; + + // Get the y element of a 3-D vector + // + __forceinline const floatInVec getY( ) const; + + // Get the z element of a 3-D vector + // + __forceinline const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + __forceinline Vector3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type) + // + __forceinline Vector3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D vector by index + // + __forceinline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + __forceinline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + __forceinline const floatInVec operator []( int idx ) const; + + // Add two 3-D vectors + // + __forceinline const Vector3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + __forceinline const Vector3 operator -( const Vector3 &vec ) const; + + // Add a 3-D vector to a 3-D point + // + __forceinline const Point3 operator +( const Point3 &pnt ) const; + + // Multiply a 3-D vector by a scalar + // + __forceinline const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + __forceinline const Vector3 operator /( float scalar ) const; + + // Multiply a 3-D vector by a scalar (scalar data contained in vector data type) + // + __forceinline const Vector3 operator *( const floatInVec &scalar ) const; + + // Divide a 3-D vector by a scalar (scalar data contained in vector data type) + // + __forceinline const Vector3 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + __forceinline Vector3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + __forceinline Vector3 & operator -=( const Vector3 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + __forceinline Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + __forceinline Vector3 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + __forceinline Vector3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + __forceinline Vector3 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 3-D vector + // + __forceinline const Vector3 operator -( ) const; + + // Construct x axis + // + static __forceinline const Vector3 xAxis( ); + + // Construct y axis + // + static __forceinline const Vector3 yAxis( ); + + // Construct z axis + // + static __forceinline const Vector3 zAxis( ); + +}; + +// Multiply a 3-D vector by a scalar +// +__forceinline const Vector3 operator *( float scalar, const Vector3 &vec ); + +// Multiply a 3-D vector by a scalar (scalar data contained in vector data type) +// +__forceinline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ); + +// Multiply two 3-D vectors per element +// +__forceinline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +__forceinline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +__forceinline const Vector3 recipPerElem( const Vector3 &vec ); + +// Compute the absolute value of a 3-D vector per element +// +__forceinline const Vector3 absPerElem( const Vector3 &vec ); + +// Copy sign from one 3-D vector to another, per element +// +__forceinline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum of two 3-D vectors per element +// +__forceinline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Minimum of two 3-D vectors per element +// +__forceinline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum element of a 3-D vector +// +__forceinline const floatInVec maxElem( const Vector3 &vec ); + +// Minimum element of a 3-D vector +// +__forceinline const floatInVec minElem( const Vector3 &vec ); + +// Compute the sum of all elements of a 3-D vector +// +__forceinline const floatInVec sum( const Vector3 &vec ); + +// Compute the dot product of two 3-D vectors +// +__forceinline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the square of the length of a 3-D vector +// +__forceinline const floatInVec lengthSqr( const Vector3 &vec ); + +// Compute the length of a 3-D vector +// +__forceinline const floatInVec length( const Vector3 &vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +__forceinline const Vector3 normalize( const Vector3 &vec ); + +// Compute cross product of two 3-D vectors +// +__forceinline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ); + +// Outer product of two 3-D vectors +// +__forceinline const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// NOTE: +// Slower than column post-multiply. +// +__forceinline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +__forceinline const Matrix3 crossMatrix( const Vector3 &vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +__forceinline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +__forceinline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +__forceinline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Conditionally select between two 3-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ); + +// Conditionally select between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word +// +__forceinline void storeXYZ( const Vector3 &vec, __m128 * quad ); + +// Load four three-float 3-D vectors, stored in three quadwords +// +__forceinline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ); + +// Store four 3-D vectors in three quadwords +// +__forceinline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ); + +// Store eight 3-D vectors as half-floats +// +__forceinline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Vector3 &vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Vector3 &vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + __forceinline Vector4( ) { }; + + // Construct a 4-D vector from x, y, z, and w elements + // + __forceinline Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type) + // + __forceinline Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + __forceinline Vector4( const Vector3 &xyz, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type) + // + __forceinline Vector4( const Vector3 &xyz, const floatInVec &w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit __forceinline Vector4( const Vector3 &vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit __forceinline Vector4( const Point3 &pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit __forceinline Vector4( const Quat &quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit __forceinline Vector4( float scalar ); + + // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Vector4( const floatInVec &scalar ); + + // Set vector float data in a 4-D vector + // + explicit __forceinline Vector4( __m128 vf4 ); + + // Get vector float data from a 4-D vector + // + __forceinline __m128 get128( ) const; + + // Assign one 4-D vector to another + // + __forceinline Vector4 & operator =( const Vector4 &vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + __forceinline Vector4 & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a 4-D vector + // + __forceinline const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + __forceinline Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + __forceinline Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + __forceinline Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + __forceinline Vector4 & setW( float w ); + + // Set the x element of a 4-D vector (scalar data contained in vector data type) + // + __forceinline Vector4 & setX( const floatInVec &x ); + + // Set the y element of a 4-D vector (scalar data contained in vector data type) + // + __forceinline Vector4 & setY( const floatInVec &y ); + + // Set the z element of a 4-D vector (scalar data contained in vector data type) + // + __forceinline Vector4 & setZ( const floatInVec &z ); + + // Set the w element of a 4-D vector (scalar data contained in vector data type) + // + __forceinline Vector4 & setW( const floatInVec &w ); + + // Get the x element of a 4-D vector + // + __forceinline const floatInVec getX( ) const; + + // Get the y element of a 4-D vector + // + __forceinline const floatInVec getY( ) const; + + // Get the z element of a 4-D vector + // + __forceinline const floatInVec getZ( ) const; + + // Get the w element of a 4-D vector + // + __forceinline const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + __forceinline Vector4 & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type) + // + __forceinline Vector4 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + __forceinline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + __forceinline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + __forceinline const floatInVec operator []( int idx ) const; + + // Add two 4-D vectors + // + __forceinline const Vector4 operator +( const Vector4 &vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + __forceinline const Vector4 operator -( const Vector4 &vec ) const; + + // Multiply a 4-D vector by a scalar + // + __forceinline const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + __forceinline const Vector4 operator /( float scalar ) const; + + // Multiply a 4-D vector by a scalar (scalar data contained in vector data type) + // + __forceinline const Vector4 operator *( const floatInVec &scalar ) const; + + // Divide a 4-D vector by a scalar (scalar data contained in vector data type) + // + __forceinline const Vector4 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + __forceinline Vector4 & operator +=( const Vector4 &vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + __forceinline Vector4 & operator -=( const Vector4 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + __forceinline Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + __forceinline Vector4 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + __forceinline Vector4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + __forceinline Vector4 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 4-D vector + // + __forceinline const Vector4 operator -( ) const; + + // Construct x axis + // + static __forceinline const Vector4 xAxis( ); + + // Construct y axis + // + static __forceinline const Vector4 yAxis( ); + + // Construct z axis + // + static __forceinline const Vector4 zAxis( ); + + // Construct w axis + // + static __forceinline const Vector4 wAxis( ); + +}; + +// Multiply a 4-D vector by a scalar +// +__forceinline const Vector4 operator *( float scalar, const Vector4 &vec ); + +// Multiply a 4-D vector by a scalar (scalar data contained in vector data type) +// +__forceinline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ); + +// Multiply two 4-D vectors per element +// +__forceinline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +__forceinline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +__forceinline const Vector4 recipPerElem( const Vector4 &vec ); + +// Compute the absolute value of a 4-D vector per element +// +__forceinline const Vector4 absPerElem( const Vector4 &vec ); + +// Copy sign from one 4-D vector to another, per element +// +__forceinline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum of two 4-D vectors per element +// +__forceinline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Minimum of two 4-D vectors per element +// +__forceinline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum element of a 4-D vector +// +__forceinline const floatInVec maxElem( const Vector4 &vec ); + +// Minimum element of a 4-D vector +// +__forceinline const floatInVec minElem( const Vector4 &vec ); + +// Compute the sum of all elements of a 4-D vector +// +__forceinline const floatInVec sum( const Vector4 &vec ); + +// Compute the dot product of two 4-D vectors +// +__forceinline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the square of the length of a 4-D vector +// +__forceinline const floatInVec lengthSqr( const Vector4 &vec ); + +// Compute the length of a 4-D vector +// +__forceinline const floatInVec length( const Vector4 &vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +__forceinline const Vector4 normalize( const Vector4 &vec ); + +// Outer product of two 4-D vectors +// +__forceinline const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +__forceinline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +__forceinline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Conditionally select between two 4-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ); + +// Conditionally select between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ); + +// Store four 4-D vectors as half-floats +// +__forceinline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Vector4 &vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Vector4 &vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + __forceinline Point3( ) { }; + + // Construct a 3-D point from x, y, and z elements + // + __forceinline Point3( float x, float y, float z ); + + // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type) + // + __forceinline Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit __forceinline Point3( const Vector3 &vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit __forceinline Point3( float scalar ); + + // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Point3( const floatInVec &scalar ); + + // Set vector float data in a 3-D point + // + explicit __forceinline Point3( __m128 vf4 ); + + // Get vector float data from a 3-D point + // + __forceinline __m128 get128( ) const; + + // Assign one 3-D point to another + // + __forceinline Point3 & operator =( const Point3 &pnt ); + + // Set the x element of a 3-D point + // + __forceinline Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + __forceinline Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + __forceinline Point3 & setZ( float z ); + + // Set the x element of a 3-D point (scalar data contained in vector data type) + // + __forceinline Point3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D point (scalar data contained in vector data type) + // + __forceinline Point3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D point (scalar data contained in vector data type) + // + __forceinline Point3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D point + // + __forceinline const floatInVec getX( ) const; + + // Get the y element of a 3-D point + // + __forceinline const floatInVec getY( ) const; + + // Get the z element of a 3-D point + // + __forceinline const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + __forceinline Point3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type) + // + __forceinline Point3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D point by index + // + __forceinline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + __forceinline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + __forceinline const floatInVec operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + __forceinline const Vector3 operator -( const Point3 &pnt ) const; + + // Add a 3-D point to a 3-D vector + // + __forceinline const Point3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + __forceinline const Point3 operator -( const Vector3 &vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + __forceinline Point3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + __forceinline Point3 & operator -=( const Vector3 &vec ); + +}; + +// Multiply two 3-D points per element +// +__forceinline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +__forceinline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +__forceinline const Point3 recipPerElem( const Point3 &pnt ); + +// Compute the absolute value of a 3-D point per element +// +__forceinline const Point3 absPerElem( const Point3 &pnt ); + +// Copy sign from one 3-D point to another, per element +// +__forceinline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum of two 3-D points per element +// +__forceinline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Minimum of two 3-D points per element +// +__forceinline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum element of a 3-D point +// +__forceinline const floatInVec maxElem( const Point3 &pnt ); + +// Minimum element of a 3-D point +// +__forceinline const floatInVec minElem( const Point3 &pnt ); + +// Compute the sum of all elements of a 3-D point +// +__forceinline const floatInVec sum( const Point3 &pnt ); + +// Apply uniform scale to a 3-D point +// +__forceinline const Point3 scale( const Point3 &pnt, float scaleVal ); + +// Apply uniform scale to a 3-D point (scalar data contained in vector data type) +// +__forceinline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +__forceinline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +__forceinline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +__forceinline const floatInVec distSqrFromOrigin( const Point3 &pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +__forceinline const floatInVec distFromOrigin( const Point3 &pnt ); + +// Compute the square of the distance between two 3-D points +// +__forceinline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the distance between two 3-D points +// +__forceinline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Conditionally select between two 3-D points +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ); + +// Conditionally select between two 3-D points (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word +// +__forceinline void storeXYZ( const Point3 &pnt, __m128 * quad ); + +// Load four three-float 3-D points, stored in three quadwords +// +__forceinline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ); + +// Store four 3-D points in three quadwords +// +__forceinline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ); + +// Store eight 3-D points as half-floats +// +__forceinline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Point3 &pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Point3 &pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + __forceinline Quat( ) { }; + + __forceinline Quat::Quat(const Quat& quat); + + // Construct a quaternion from x, y, z, and w elements + // + __forceinline Quat( float x, float y, float z, float w ); + + // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type) + // + __forceinline Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a quaternion from a 3-D vector and a scalar + // + __forceinline Quat( const Vector3 &xyz, float w ); + + // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type) + // + __forceinline Quat( const Vector3 &xyz, const floatInVec &w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit __forceinline Quat( const Vector4 &vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit __forceinline Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit __forceinline Quat( float scalar ); + + // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Quat( const floatInVec &scalar ); + + // Set vector float data in a quaternion + // + explicit __forceinline Quat( __m128 vf4 ); + + // Get vector float data from a quaternion + // + __forceinline __m128 get128( ) const; + + // Set a quaterion from vector float data + // + __forceinline void set128(vec_float4 vec); + + // Assign one quaternion to another + // + __forceinline Quat & operator =( const Quat &quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + __forceinline Quat & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a quaternion + // + __forceinline const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + __forceinline Quat & setX( float x ); + + // Set the y element of a quaternion + // + __forceinline Quat & setY( float y ); + + // Set the z element of a quaternion + // + __forceinline Quat & setZ( float z ); + + // Set the w element of a quaternion + // + __forceinline Quat & setW( float w ); + + // Set the x element of a quaternion (scalar data contained in vector data type) + // + __forceinline Quat & setX( const floatInVec &x ); + + // Set the y element of a quaternion (scalar data contained in vector data type) + // + __forceinline Quat & setY( const floatInVec &y ); + + // Set the z element of a quaternion (scalar data contained in vector data type) + // + __forceinline Quat & setZ( const floatInVec &z ); + + // Set the w element of a quaternion (scalar data contained in vector data type) + // + __forceinline Quat & setW( const floatInVec &w ); + + // Get the x element of a quaternion + // + __forceinline const floatInVec getX( ) const; + + // Get the y element of a quaternion + // + __forceinline const floatInVec getY( ) const; + + // Get the z element of a quaternion + // + __forceinline const floatInVec getZ( ) const; + + // Get the w element of a quaternion + // + __forceinline const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + __forceinline Quat & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type) + // + __forceinline Quat & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a quaternion by index + // + __forceinline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + __forceinline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + __forceinline const floatInVec operator []( int idx ) const; + + // Add two quaternions + // + __forceinline const Quat operator +( const Quat &quat ) const; + + // Subtract a quaternion from another quaternion + // + __forceinline const Quat operator -( const Quat &quat ) const; + + // Multiply two quaternions + // + __forceinline const Quat operator *( const Quat &quat ) const; + + // Multiply a quaternion by a scalar + // + __forceinline const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + __forceinline const Quat operator /( float scalar ) const; + + // Multiply a quaternion by a scalar (scalar data contained in vector data type) + // + __forceinline const Quat operator *( const floatInVec &scalar ) const; + + // Divide a quaternion by a scalar (scalar data contained in vector data type) + // + __forceinline const Quat operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + __forceinline Quat & operator +=( const Quat &quat ); + + // Perform compound assignment and subtraction by a quaternion + // + __forceinline Quat & operator -=( const Quat &quat ); + + // Perform compound assignment and multiplication by a quaternion + // + __forceinline Quat & operator *=( const Quat &quat ); + + // Perform compound assignment and multiplication by a scalar + // + __forceinline Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + __forceinline Quat & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + __forceinline Quat & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + __forceinline Quat & operator /=( const floatInVec &scalar ); + + // Negate all elements of a quaternion + // + __forceinline const Quat operator -( ) const; + + // Construct an identity quaternion + // + static __forceinline const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static __forceinline const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static __forceinline const Quat rotation( float radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static __forceinline const Quat rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static __forceinline const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static __forceinline const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static __forceinline const Quat rotationZ( float radians ); + + // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type) + // + static __forceinline const Quat rotationX( const floatInVec &radians ); + + // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type) + // + static __forceinline const Quat rotationY( const floatInVec &radians ); + + // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type) + // + static __forceinline const Quat rotationZ( const floatInVec &radians ); + +}; + +// Multiply a quaternion by a scalar +// +__forceinline const Quat operator *( float scalar, const Quat &quat ); + +// Multiply a quaternion by a scalar (scalar data contained in vector data type) +// +__forceinline const Quat operator *( const floatInVec &scalar, const Quat &quat ); + +// Compute the conjugate of a quaternion +// +__forceinline const Quat conj( const Quat &quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +__forceinline const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec ); + +// Compute the dot product of two quaternions +// +__forceinline const floatInVec dot( const Quat &quat0, const Quat &quat1 ); + +// Compute the norm of a quaternion +// +__forceinline const floatInVec norm( const Quat &quat ); + +// Compute the length of a quaternion +// +__forceinline const floatInVec length( const Quat &quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +__forceinline const Quat normalize( const Quat &quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ); + +// Linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +__forceinline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +__forceinline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +__forceinline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical quadrangle interpolation +// +__forceinline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Spherical quadrangle interpolation (scalar data contained in vector data type) +// +__forceinline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Conditionally select between two quaternions +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ); + +// Conditionally select between two quaternions (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Quat &quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Quat &quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + __forceinline Matrix3( ) { }; + + // Copy a 3x3 matrix + // + __forceinline Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + __forceinline Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit __forceinline Matrix3( const Quat &unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit __forceinline Matrix3( float scalar ); + + // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Matrix3( const floatInVec &scalar ); + + // Assign one 3x3 matrix to another + // + __forceinline Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + __forceinline Matrix3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x3 matrix + // + __forceinline Matrix3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x3 matrix + // + __forceinline Matrix3 & setCol2( const Vector3 &col2 ); + + // Get column 0 of a 3x3 matrix + // + __forceinline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + __forceinline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + __forceinline const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + __forceinline Matrix3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + __forceinline Matrix3 & setRow( int row, const Vector3 &vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + __forceinline const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + __forceinline const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + __forceinline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + __forceinline const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + __forceinline Matrix3 & setElem( int col, int row, float val ); + + // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type) + // + __forceinline Matrix3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + __forceinline const floatInVec getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + __forceinline const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + __forceinline const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + __forceinline const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + __forceinline const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) + // + __forceinline const Matrix3 operator *( const floatInVec &scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + __forceinline const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply two 3x3 matrices + // + __forceinline const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + __forceinline Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + __forceinline Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + __forceinline Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + __forceinline Matrix3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + __forceinline Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static __forceinline const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static __forceinline const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static __forceinline const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static __forceinline const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static __forceinline const Matrix3 rotationX( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static __forceinline const Matrix3 rotationY( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static __forceinline const Matrix3 rotationZ( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static __forceinline const Matrix3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static __forceinline const Matrix3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static __forceinline const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static __forceinline const Matrix3 rotation( const Quat &unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static __forceinline const Matrix3 scale( const Vector3 &scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +__forceinline const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) +// +__forceinline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +__forceinline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +__forceinline const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +__forceinline const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +__forceinline const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +__forceinline const floatInVec determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +// Conditionally select between two 3x3 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + __forceinline Matrix4( ) { }; + + // Copy a 4x4 matrix + // + __forceinline Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + __forceinline Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit __forceinline Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + __forceinline Matrix4( const Matrix3 & mat, const Vector3 &translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + __forceinline Matrix4( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit __forceinline Matrix4( float scalar ); + + // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Matrix4( const floatInVec &scalar ); + + // Assign one 4x4 matrix to another + // + __forceinline Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + __forceinline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + __forceinline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + __forceinline Matrix4 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 4x4 matrix + // + __forceinline const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + __forceinline Matrix4 & setCol0( const Vector4 &col0 ); + + // Set column 1 of a 4x4 matrix + // + __forceinline Matrix4 & setCol1( const Vector4 &col1 ); + + // Set column 2 of a 4x4 matrix + // + __forceinline Matrix4 & setCol2( const Vector4 &col2 ); + + // Set column 3 of a 4x4 matrix + // + __forceinline Matrix4 & setCol3( const Vector4 &col3 ); + + // Get column 0 of a 4x4 matrix + // + __forceinline const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + __forceinline const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + __forceinline const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + __forceinline const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + __forceinline Matrix4 & setCol( int col, const Vector4 &vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + __forceinline Matrix4 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + __forceinline const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + __forceinline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + __forceinline Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + __forceinline const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + __forceinline Matrix4 & setElem( int col, int row, float val ); + + // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type) + // + __forceinline Matrix4 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + __forceinline const floatInVec getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + __forceinline const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + __forceinline const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + __forceinline const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + __forceinline const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) + // + __forceinline const Matrix4 operator *( const floatInVec &scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + __forceinline const Vector4 operator *( const Vector4 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + __forceinline const Vector4 operator *( const Vector3 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + __forceinline const Vector4 operator *( const Point3 &pnt ) const; + + // Multiply two 4x4 matrices + // + __forceinline const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + __forceinline const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + __forceinline Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + __forceinline Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + __forceinline Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + __forceinline Matrix4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + __forceinline Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + __forceinline Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static __forceinline const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static __forceinline const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static __forceinline const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static __forceinline const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static __forceinline const Matrix4 rotationX( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static __forceinline const Matrix4 rotationY( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static __forceinline const Matrix4 rotationZ( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static __forceinline const Matrix4 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static __forceinline const Matrix4 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static __forceinline const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static __forceinline const Matrix4 rotation( const Quat &unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static __forceinline const Matrix4 scale( const Vector3 &scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static __forceinline const Matrix4 translation( const Vector3 &translateVec ); + + // Construct viewing matrix based on eye, position looked at, and up direction + // + static __forceinline const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ); + + // Construct a perspective projection matrix + // + static __forceinline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static __forceinline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static __forceinline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +__forceinline const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) +// +__forceinline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +__forceinline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +__forceinline const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +__forceinline const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +__forceinline const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +__forceinline const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +__forceinline const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +__forceinline const floatInVec determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +// Conditionally select between two 4x4 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + __forceinline Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + __forceinline Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + __forceinline Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + __forceinline Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + __forceinline Transform3( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit __forceinline Transform3( float scalar ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type) + // + explicit __forceinline Transform3( const floatInVec &scalar ); + + // Assign one 3x4 transformation matrix to another + // + __forceinline Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + __forceinline Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + __forceinline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + __forceinline Transform3 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + __forceinline const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + __forceinline Transform3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + __forceinline Transform3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + __forceinline Transform3 & setCol2( const Vector3 &col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + __forceinline Transform3 & setCol3( const Vector3 &col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + __forceinline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + __forceinline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + __forceinline const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + __forceinline const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + __forceinline Transform3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + __forceinline Transform3 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + __forceinline const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + __forceinline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + __forceinline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + __forceinline const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + __forceinline Transform3 & setElem( int col, int row, float val ); + + // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type) + // + __forceinline Transform3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + __forceinline const floatInVec getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + __forceinline const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + __forceinline const Point3 operator *( const Point3 &pnt ) const; + + // Multiply two 3x4 transformation matrices + // + __forceinline const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + __forceinline Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static __forceinline const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static __forceinline const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static __forceinline const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static __forceinline const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type) + // + static __forceinline const Transform3 rotationX( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type) + // + static __forceinline const Transform3 rotationY( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type) + // + static __forceinline const Transform3 rotationZ( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static __forceinline const Transform3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static __forceinline const Transform3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static __forceinline const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static __forceinline const Transform3 rotation( const Quat &unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static __forceinline const Transform3 scale( const Vector3 &scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static __forceinline const Transform3 translation( const Vector3 &translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +__forceinline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +__forceinline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +__forceinline const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +__forceinline const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +__forceinline const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +__forceinline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +__forceinline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +__forceinline void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif