From 074b869a9f463b14f309922d532be104d43f18ec Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Tue, 12 May 2015 09:17:27 -0700 Subject: [PATCH] enable premake4 option --ios and Test_LinearMath usage: ./premake4_osx --ios xcode4 open xcode4ios/0_Bullet3Solution.xcworkspace --- build3/premake4.lua | 51 +- .../Source/Tests/Test_quat_aos_neon.cpp | 2 +- test/Bullet2/premake4.lua | 7 +- test/Bullet2/vectormath/neon/boolInVec.h | 226 ++ test/Bullet2/vectormath/neon/floatInVec.h | 344 +++ test/Bullet2/vectormath/neon/mat_aos.h | 1631 +++++++++++ test/Bullet2/vectormath/neon/quat_aos.h | 413 +++ test/Bullet2/vectormath/neon/vec_aos.h | 1427 +++++++++ test/Bullet2/vectormath/neon/vectormath_aos.h | 1890 ++++++++++++ test/Bullet2/vectormath/scalar/boolInVec.h | 225 ++ test/Bullet2/vectormath/scalar/floatInVec.h | 343 +++ test/Bullet2/vectormath/scalar/mat_aos.h | 1630 +++++++++++ test/Bullet2/vectormath/scalar/quat_aos.h | 433 +++ test/Bullet2/vectormath/scalar/vec_aos.h | 1426 +++++++++ .../vectormath/scalar/vectormath_aos.h | 1872 ++++++++++++ test/Bullet2/vectormath/sse/boolInVec.h | 247 ++ test/Bullet2/vectormath/sse/floatInVec.h | 340 +++ test/Bullet2/vectormath/sse/mat_aos.h | 2190 ++++++++++++++ test/Bullet2/vectormath/sse/quat_aos.h | 579 ++++ test/Bullet2/vectormath/sse/vec_aos.h | 1455 ++++++++++ test/Bullet2/vectormath/sse/vecidx_aos.h | 80 + test/Bullet2/vectormath/sse/vectormath_aos.h | 2547 +++++++++++++++++ test/Bullet2/vectormath/vmInclude.h | 31 + test/collision/premake4.lua | 2 +- 24 files changed, 19370 insertions(+), 21 deletions(-) create mode 100644 test/Bullet2/vectormath/neon/boolInVec.h create mode 100644 test/Bullet2/vectormath/neon/floatInVec.h create mode 100644 test/Bullet2/vectormath/neon/mat_aos.h create mode 100644 test/Bullet2/vectormath/neon/quat_aos.h create mode 100644 test/Bullet2/vectormath/neon/vec_aos.h create mode 100644 test/Bullet2/vectormath/neon/vectormath_aos.h create mode 100644 test/Bullet2/vectormath/scalar/boolInVec.h create mode 100644 test/Bullet2/vectormath/scalar/floatInVec.h create mode 100644 test/Bullet2/vectormath/scalar/mat_aos.h create mode 100644 test/Bullet2/vectormath/scalar/quat_aos.h create mode 100644 test/Bullet2/vectormath/scalar/vec_aos.h create mode 100644 test/Bullet2/vectormath/scalar/vectormath_aos.h create mode 100644 test/Bullet2/vectormath/sse/boolInVec.h create mode 100644 test/Bullet2/vectormath/sse/floatInVec.h create mode 100644 test/Bullet2/vectormath/sse/mat_aos.h create mode 100644 test/Bullet2/vectormath/sse/quat_aos.h create mode 100644 test/Bullet2/vectormath/sse/vec_aos.h create mode 100644 test/Bullet2/vectormath/sse/vecidx_aos.h create mode 100644 test/Bullet2/vectormath/sse/vectormath_aos.h create mode 100644 test/Bullet2/vectormath/vmInclude.h diff --git a/build3/premake4.lua b/build3/premake4.lua index 53c087357..6b7a33056 100644 --- a/build3/premake4.lua +++ b/build3/premake4.lua @@ -18,6 +18,11 @@ act = _ACTION end + newoption { + trigger = "ios", + description = "Enable iOS target (requires xcode4)" + } + newoption { trigger = "force_dlopen_opengl", @@ -95,12 +100,25 @@ postfix="" if _ACTION == "xcode4" then + if _OPTIONS["ios"] then + postfix = "ios"; + xcodebuildsettings + { + 'INFOPLIST_FILE = "../../test/Bullet2/Info.plist"', + 'CODE_SIGN_IDENTITY = "iPhone Developer"', + "SDKROOT = iphoneos", + 'ARCHS = "armv7"', + 'TARGETED_DEVICE_FAMILY = "1,2"', + 'VALID_ARCHS = "armv7"', + } + else xcodebuildsettings { 'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"', 'VALID_ARCHS = "x86_64 i386"', 'SDKROOT = "macosx10.9"', } + end end -- comment-out for now, URDF reader needs exceptions @@ -123,6 +141,8 @@ language "C++" +if not _OPTIONS["ios"] then + include "../examples/ExampleBrowser" include "../examples/OpenGLWindow" @@ -137,26 +157,25 @@ include "../test/enet/server" end - if not _OPTIONS["without-gtest"] then - include "../test/gtest-1.7.0" --- include "../test/hello_gtest" - include "../test/collision" - include "../test/TestBullet3OpenCL" - include "../test/GwenOpenGLTest" - end - - - include "../src/BulletSoftBody" - include "../src/BulletDynamics" - include "../src/BulletCollision" - include "../src/LinearMath" - include "../src/Bullet3Common" include "../src/Bullet3Geometry" include "../src/Bullet3Collision" include "../src/Bullet3Dynamics" include "../src/Bullet3OpenCL" include "../src/Bullet3Serialize/Bullet2FileLoader" - - + + if not _OPTIONS["without-gtest"] then + include "../test/gtest-1.7.0" +-- include "../test/hello_gtest" + include "../test/collision" + include "../test/TestBullet3OpenCL" + include "../test/GwenOpenGLTest" + end +end + + include "../test/Bullet2" + include "../src/BulletSoftBody" + include "../src/BulletDynamics" + include "../src/BulletCollision" + include "../src/LinearMath" diff --git a/test/Bullet2/Source/Tests/Test_quat_aos_neon.cpp b/test/Bullet2/Source/Tests/Test_quat_aos_neon.cpp index c13e41aeb..0a8f69724 100644 --- a/test/Bullet2/Source/Tests/Test_quat_aos_neon.cpp +++ b/test/Bullet2/Source/Tests/Test_quat_aos_neon.cpp @@ -14,7 +14,7 @@ #include "Utils.h" #include "main.h" -#include +#include "../vectormath/vmInclude.h" //typedef Vectormath::Aos::Vector3 vmVector3; diff --git a/test/Bullet2/premake4.lua b/test/Bullet2/premake4.lua index bca0cb5dc..7d6a77111 100644 --- a/test/Bullet2/premake4.lua +++ b/test/Bullet2/premake4.lua @@ -1,14 +1,15 @@ -project "AppUnitTest" +project "Test_LinearMath" if _OPTIONS["ios"] then kind "WindowedApp" else kind "ConsoleApp" end -targetdir "bin" -includedirs {"../src","Source", "Source/Tests"} +targetdir "../../bin" + +includedirs {"../../src","Source", "Source/Tests"} links { "BulletDynamics","BulletCollision", "LinearMath" diff --git a/test/Bullet2/vectormath/neon/boolInVec.h b/test/Bullet2/vectormath/neon/boolInVec.h new file mode 100644 index 000000000..ba16838c0 --- /dev/null +++ b/test/Bullet2/vectormath/neon/boolInVec.h @@ -0,0 +1,226 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _BOOLINVEC_H +#define _BOOLINVEC_H + +#include +namespace Vectormath { + +class floatInVec; + +//-------------------------------------------------------------------------------------------------- +// boolInVec class +// + +class boolInVec +{ +private: + unsigned int mData; + +public: + // Default constructor; does no initialization + // + inline boolInVec( ) { }; + + // Construct from a value converted from float + // + inline boolInVec(floatInVec vec); + + // Explicit cast from bool + // + explicit inline boolInVec(bool scalar); + + // Explicit cast to bool + // + inline bool getAsBool() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to bool + // + inline operator bool() const; +#endif + + // Boolean negation operator + // + inline const boolInVec operator ! () const; + + // Assignment operator + // + inline boolInVec& operator = (boolInVec vec); + + // Boolean and assignment operator + // + inline boolInVec& operator &= (boolInVec vec); + + // Boolean exclusive or assignment operator + // + inline boolInVec& operator ^= (boolInVec vec); + + // Boolean or assignment operator + // + inline boolInVec& operator |= (boolInVec vec); + +}; + +// Equal operator +// +inline const boolInVec operator == (boolInVec vec0, boolInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (boolInVec vec0, boolInVec vec1); + +// And operator +// +inline const boolInVec operator & (boolInVec vec0, boolInVec vec1); + +// Exclusive or operator +// +inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1); + +// Or operator +// +inline const boolInVec operator | (boolInVec vec0, boolInVec vec1); + +// Conditionally select between two values +// +inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// boolInVec implementation +// + +#include "floatInVec.h" + +namespace Vectormath { + +inline +boolInVec::boolInVec(floatInVec vec) +{ + *this = (vec != floatInVec(0.0f)); +} + +inline +boolInVec::boolInVec(bool scalar) +{ + mData = -(int)scalar; +} + +inline +bool +boolInVec::getAsBool() const +{ + return (mData > 0); +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +boolInVec::operator bool() const +{ + return getAsBool(); +} +#endif + +inline +const boolInVec +boolInVec::operator ! () const +{ + return boolInVec(!mData); +} + +inline +boolInVec& +boolInVec::operator = (boolInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +boolInVec& +boolInVec::operator &= (boolInVec vec) +{ + *this = *this & vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator ^= (boolInVec vec) +{ + *this = *this ^ vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator |= (boolInVec vec) +{ + *this = *this | vec; + return *this; +} + +inline +const boolInVec +operator == (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() == vec1.getAsBool()); +} + +inline +const boolInVec +operator != (boolInVec vec0, boolInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const boolInVec +operator & (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() & vec1.getAsBool()); +} + +inline +const boolInVec +operator | (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() | vec1.getAsBool()); +} + +inline +const boolInVec +operator ^ (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() ^ vec1.getAsBool()); +} + +inline +const boolInVec +select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // boolInVec_h + diff --git a/test/Bullet2/vectormath/neon/floatInVec.h b/test/Bullet2/vectormath/neon/floatInVec.h new file mode 100644 index 000000000..26147d22b --- /dev/null +++ b/test/Bullet2/vectormath/neon/floatInVec.h @@ -0,0 +1,344 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ +#ifndef _FLOATINVEC_H +#define _FLOATINVEC_H + +#include +namespace Vectormath { + +class boolInVec; + +//-------------------------------------------------------------------------------------------------- +// floatInVec class +// + +// A class representing a scalar float value contained in a vector register +// This class does not support fastmath +class floatInVec +{ +private: + float mData; + +public: + // Default constructor; does no initialization + // + inline floatInVec( ) { }; + + // Construct from a value converted from bool + // + inline floatInVec(boolInVec vec); + + // Explicit cast from float + // + explicit inline floatInVec(float scalar); + + // Explicit cast to float + // + inline float getAsFloat() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to float + // + inline operator float() const; +#endif + + // Post increment (add 1.0f) + // + inline const floatInVec operator ++ (int); + + // Post decrement (subtract 1.0f) + // + inline const floatInVec operator -- (int); + + // Pre increment (add 1.0f) + // + inline floatInVec& operator ++ (); + + // Pre decrement (subtract 1.0f) + // + inline floatInVec& operator -- (); + + // Negation operator + // + inline const floatInVec operator - () const; + + // Assignment operator + // + inline floatInVec& operator = (floatInVec vec); + + // Multiplication assignment operator + // + inline floatInVec& operator *= (floatInVec vec); + + // Division assignment operator + // + inline floatInVec& operator /= (floatInVec vec); + + // Addition assignment operator + // + inline floatInVec& operator += (floatInVec vec); + + // Subtraction assignment operator + // + inline floatInVec& operator -= (floatInVec vec); + +}; + +// Multiplication operator +// +inline const floatInVec operator * (floatInVec vec0, floatInVec vec1); + +// Division operator +// +inline const floatInVec operator / (floatInVec vec0, floatInVec vec1); + +// Addition operator +// +inline const floatInVec operator + (floatInVec vec0, floatInVec vec1); + +// Subtraction operator +// +inline const floatInVec operator - (floatInVec vec0, floatInVec vec1); + +// Less than operator +// +inline const boolInVec operator < (floatInVec vec0, floatInVec vec1); + +// Less than or equal operator +// +inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1); + +// Greater than operator +// +inline const boolInVec operator > (floatInVec vec0, floatInVec vec1); + +// Greater than or equal operator +// +inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1); + +// Equal operator +// +inline const boolInVec operator == (floatInVec vec0, floatInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (floatInVec vec0, floatInVec vec1); + +// Conditionally select between two values +// +inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// floatInVec implementation +// + +#include "boolInVec.h" + +namespace Vectormath { + +inline +floatInVec::floatInVec(boolInVec vec) +{ + mData = float(vec.getAsBool()); +} + +inline +floatInVec::floatInVec(float scalar) +{ + mData = scalar; +} + +inline +float +floatInVec::getAsFloat() const +{ + return mData; +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +floatInVec::operator float() const +{ + return getAsFloat(); +} +#endif + +inline +const floatInVec +floatInVec::operator ++ (int) +{ + float olddata = mData; + operator ++(); + return floatInVec(olddata); +} + +inline +const floatInVec +floatInVec::operator -- (int) +{ + float olddata = mData; + operator --(); + return floatInVec(olddata); +} + +inline +floatInVec& +floatInVec::operator ++ () +{ + *this += floatInVec(1.0f); + return *this; +} + +inline +floatInVec& +floatInVec::operator -- () +{ + *this -= floatInVec(1.0f); + return *this; +} + +inline +const floatInVec +floatInVec::operator - () const +{ + return floatInVec(-mData); +} + +inline +floatInVec& +floatInVec::operator = (floatInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +floatInVec& +floatInVec::operator *= (floatInVec vec) +{ + *this = *this * vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator /= (floatInVec vec) +{ + *this = *this / vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator += (floatInVec vec) +{ + *this = *this + vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator -= (floatInVec vec) +{ + *this = *this - vec; + return *this; +} + +inline +const floatInVec +operator * (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() * vec1.getAsFloat()); +} + +inline +const floatInVec +operator / (floatInVec num, floatInVec den) +{ + return floatInVec(num.getAsFloat() / den.getAsFloat()); +} + +inline +const floatInVec +operator + (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() + vec1.getAsFloat()); +} + +inline +const floatInVec +operator - (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() - vec1.getAsFloat()); +} + +inline +const boolInVec +operator < (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() < vec1.getAsFloat()); +} + +inline +const boolInVec +operator <= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 > vec1); +} + +inline +const boolInVec +operator > (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() > vec1.getAsFloat()); +} + +inline +const boolInVec +operator >= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 < vec1); +} + +inline +const boolInVec +operator == (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() == vec1.getAsFloat()); +} + +inline +const boolInVec +operator != (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const floatInVec +select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // floatInVec_h + diff --git a/test/Bullet2/vectormath/neon/mat_aos.h b/test/Bullet2/vectormath/neon/mat_aos.h new file mode 100644 index 000000000..e61f601c3 --- /dev/null +++ b/test/Bullet2/vectormath/neon/mat_aos.h @@ -0,0 +1,1631 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_MAT_AOS_CPP_H +#define _VECTORMATH_MAT_AOS_CPP_H + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_PI_OVER_2 1.570796327f + +//----------------------------------------------------------------------------- +// Definitions + +inline Matrix3::Matrix3( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; +} + +inline Matrix3::Matrix3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +inline Matrix3::Matrix3( const Quat & unitQuat ) +{ + float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2; + qx = unitQuat.getX(); + qy = unitQuat.getY(); + qz = unitQuat.getZ(); + qw = unitQuat.getW(); + qx2 = ( qx + qx ); + qy2 = ( qy + qy ); + qz2 = ( qz + qz ); + qxqx2 = ( qx * qx2 ); + qxqy2 = ( qx * qy2 ); + qxqz2 = ( qx * qz2 ); + qxqw2 = ( qw * qx2 ); + qyqy2 = ( qy * qy2 ); + qyqz2 = ( qy * qz2 ); + qyqw2 = ( qw * qy2 ); + qzqz2 = ( qz * qz2 ); + qzqw2 = ( qw * qz2 ); + mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) ); + mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) ); + mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) ); +} + +inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; +} + +inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + return *this; +} + +inline Matrix3 & Matrix3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Matrix3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Matrix3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Matrix3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Matrix3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::getRow( int row ) const +{ + return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); +} + +inline Vector3 & Matrix3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + return *this; +} + +inline const Matrix3 transpose( const Matrix3 & mat ) +{ + return Matrix3( + Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ), + Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ), + Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() ) + ); +} + +inline const Matrix3 inverse( const Matrix3 & mat ) +{ + Vector3 tmp0, tmp1, tmp2; + float detinv; + tmp0 = cross( mat.getCol1(), mat.getCol2() ); + tmp1 = cross( mat.getCol2(), mat.getCol0() ); + tmp2 = cross( mat.getCol0(), mat.getCol1() ); + detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) ); + return Matrix3( + Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ), + Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ), + Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ) + ); +} + +inline float determinant( const Matrix3 & mat ) +{ + return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); +} + +inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ) + ); +} + +inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix3 Matrix3::operator -( ) const +{ + return Matrix3( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ) + ); +} + +inline const Matrix3 absPerElem( const Matrix3 & mat ) +{ + return Matrix3( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::operator *( float scalar ) const +{ + return Matrix3( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ) + ); +} + +inline Matrix3 & Matrix3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ) +{ + return mat * scalar; +} + +inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +{ + return Matrix3( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +{ + return Matrix3( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::identity( ) +{ + return Matrix3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ) + ); +} + +inline const Matrix3 Matrix3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ) + ); +} + +inline const Matrix3 Matrix3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix3( + Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ), + Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ), + Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( const Quat & unitQuat ) +{ + return Matrix3( unitQuat ); +} + +inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec ) +{ + return Matrix3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ) + ); +} + +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ) +{ + return Matrix3( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ) + ); +} + +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ) +{ + return Matrix3( + mulPerElem( mat.getCol0(), scaleVec ), + mulPerElem( mat.getCol1(), scaleVec ), + mulPerElem( mat.getCol2(), scaleVec ) + ); +} + +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix3 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); +} + +inline void print( const Matrix3 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Matrix4::Matrix4( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; +} + +inline Matrix4::Matrix4( float scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +inline Matrix4::Matrix4( const Transform3 & mat ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( mat.getCol3(), 1.0f ); +} + +inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec ) +{ + Matrix3 mat; + mat = Matrix3( unitQuat ); + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Matrix4 & Matrix4::setElem( int col, int row, float val ) +{ + Vector4 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix4::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector4 Matrix4::getCol0( ) const +{ + return mCol0; +} + +inline const Vector4 Matrix4::getCol1( ) const +{ + return mCol1; +} + +inline const Vector4 Matrix4::getCol2( ) const +{ + return mCol2; +} + +inline const Vector4 Matrix4::getCol3( ) const +{ + return mCol3; +} + +inline const Vector4 Matrix4::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector4 & Matrix4::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; + return *this; +} + +inline const Matrix4 transpose( const Matrix4 & mat ) +{ + return Matrix4( + Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ), + Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ), + Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ), + Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() ) + ); +} + +inline const Matrix4 inverse( const Matrix4 & mat ) +{ + Vector4 res0, res1, res2, res3; + float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) ); + res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) ); + res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) ); + res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) ); + detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) ); + res1.setX( ( mI * tmp1 ) ); + res1.setY( ( mM * tmp0 ) ); + res1.setZ( ( mA * tmp1 ) ); + res1.setW( ( mE * tmp0 ) ); + res3.setX( ( mI * tmp3 ) ); + res3.setY( ( mM * tmp2 ) ); + res3.setZ( ( mA * tmp3 ) ); + res3.setW( ( mE * tmp2 ) ); + res2.setX( ( mI * tmp5 ) ); + res2.setY( ( mM * tmp4 ) ); + res2.setZ( ( mA * tmp5 ) ); + res2.setW( ( mE * tmp4 ) ); + tmp0 = ( ( mI * mB ) - ( mA * mJ ) ); + tmp1 = ( ( mM * mF ) - ( mE * mN ) ); + tmp2 = ( ( mI * mD ) - ( mA * mL ) ); + tmp3 = ( ( mM * mH ) - ( mE * mP ) ); + tmp4 = ( ( mI * mC ) - ( mA * mK ) ); + tmp5 = ( ( mM * mG ) - ( mE * mO ) ); + res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) ); + res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) ); + res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) ); + res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) ); + res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) ); + res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) ); + res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) ); + res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) ); + res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) ); + res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) ); + res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) ); + res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) ); + return Matrix4( + ( res0 * detInv ), + ( res1 * detInv ), + ( res2 * detInv ), + ( res3 * detInv ) + ); +} + +inline const Matrix4 affineInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( inverse( affineMat ) ); +} + +inline const Matrix4 orthoInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( orthoInverse( affineMat ) ); +} + +inline float determinant( const Matrix4 & mat ) +{ + float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ); + dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ); + dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ); + dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ); + return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) ); +} + +inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ), + ( mCol3 + mat.mCol3 ) + ); +} + +inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ), + ( mCol3 - mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix4 Matrix4::operator -( ) const +{ + return Matrix4( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ), + ( -mCol3 ) + ); +} + +inline const Matrix4 absPerElem( const Matrix4 & mat ) +{ + return Matrix4( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ), + absPerElem( mat.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::operator *( float scalar ) const +{ + return Matrix4( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ), + ( mCol3 * scalar ) + ); +} + +inline Matrix4 & Matrix4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ) +{ + return mat * scalar; +} + +inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ), + ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ), + ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ), + ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const +{ + return Vector4( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ), + ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ), + ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() ) + ); +} + +inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +{ + return Matrix4( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ), + ( *this * mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +{ + return Matrix4( + ( *this * tfrm.getCol0() ), + ( *this * tfrm.getCol1() ), + ( *this * tfrm.getCol2() ), + ( *this * Point3( tfrm.getCol3() ) ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +{ + return Matrix4( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ), + mulPerElem( mat0.getCol3(), mat1.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::identity( ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +{ + mCol0.setXYZ( mat3.getCol0() ); + mCol1.setXYZ( mat3.getCol1() ); + mCol2.setXYZ( mat3.getCol2() ); + return *this; +} + +inline const Matrix3 Matrix4::getUpper3x3( ) const +{ + return Matrix3( + mCol0.getXYZ( ), + mCol1.getXYZ( ), + mCol2.getXYZ( ) + ); +} + +inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec ) +{ + mCol3.setXYZ( translateVec ); + return *this; +} + +inline const Vector3 Matrix4::getTranslation( ) const +{ + return mCol3.getXYZ( ); +} + +inline const Matrix4 Matrix4::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4::xAxis( ), + Vector4( 0.0f, c, s, 0.0f ), + Vector4( 0.0f, -s, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, 0.0f, -s, 0.0f ), + Vector4::yAxis( ), + Vector4( s, 0.0f, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, s, 0.0f, 0.0f ), + Vector4( -s, c, 0.0f, 0.0f ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix4( + Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ), + Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ), + Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix4( + Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ), + Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ), + Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( const Quat & unitQuat ) +{ + return Matrix4( Transform3::rotation( unitQuat ) ); +} + +inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec ) +{ + return Matrix4( + Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ) +{ + return Matrix4( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ), + mat.getCol3() + ); +} + +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ) +{ + Vector4 scale4; + scale4 = Vector4( scaleVec, 1.0f ); + return Matrix4( + mulPerElem( mat.getCol0(), scale4 ), + mulPerElem( mat.getCol1(), scale4 ), + mulPerElem( mat.getCol2(), scale4 ), + mulPerElem( mat.getCol3(), scale4 ) + ); +} + +inline const Matrix4 Matrix4::translation( const Vector3 & translateVec ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4( translateVec, 1.0f ) + ); +} + +inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ) +{ + Matrix4 m4EyeFrame; + Vector3 v3X, v3Y, v3Z; + v3Y = normalize( upVec ); + v3Z = normalize( ( eyePos - lookAtPos ) ); + v3X = normalize( cross( v3Y, v3Z ) ); + v3Y = cross( v3Z, v3X ); + m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) ); + return orthoInverse( m4EyeFrame ); +} + +inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +{ + float f, rangeInv; + f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) ); + rangeInv = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, f, 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + n2 = ( zNear + zNear ); + return Matrix4( + Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ), + Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ), + Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f ) + ); +} + +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix4 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); + print( mat.getRow( 3 ) ); +} + +inline void print( const Matrix4 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Transform3::Transform3( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; +} + +inline Transform3::Transform3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ) +{ + this->setUpper3x3( tfrm ); + this->setTranslation( translateVec ); +} + +inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec ) +{ + this->setUpper3x3( Matrix3( unitQuat ) ); + this->setTranslation( translateVec ); +} + +inline Transform3 & Transform3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Transform3 & Transform3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Transform3 & Transform3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Transform3 & Transform3::setCol3( const Vector3 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Transform3 & Transform3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Transform3 & Transform3::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Transform3 & Transform3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Transform3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Transform3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Transform3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Transform3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Transform3::getCol3( ) const +{ + return mCol3; +} + +inline const Vector3 Transform3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Transform3::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector3 & Transform3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Transform3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; + return *this; +} + +inline const Transform3 inverse( const Transform3 & tfrm ) +{ + Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2; + float detinv; + tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() ); + tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() ); + tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() ); + detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) ); + inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ); + inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ); + inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 orthoInverse( const Transform3 & tfrm ) +{ + Vector3 inv0, inv1, inv2; + inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() ); + inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() ); + inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 absPerElem( const Transform3 & tfrm ) +{ + return Transform3( + absPerElem( tfrm.getCol0() ), + absPerElem( tfrm.getCol1() ), + absPerElem( tfrm.getCol2() ), + absPerElem( tfrm.getCol3() ) + ); +} + +inline const Vector3 Transform3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Point3 Transform3::operator *( const Point3 & pnt ) const +{ + return Point3( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ) + ); +} + +inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +{ + return Transform3( + ( *this * tfrm.mCol0 ), + ( *this * tfrm.mCol1 ), + ( *this * tfrm.mCol2 ), + Vector3( ( *this * Point3( tfrm.mCol3 ) ) ) + ); +} + +inline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +{ + return Transform3( + mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), + mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ), + mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ), + mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() ) + ); +} + +inline const Transform3 Transform3::identity( ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +{ + mCol0 = tfrm.getCol0(); + mCol1 = tfrm.getCol1(); + mCol2 = tfrm.getCol2(); + return *this; +} + +inline const Matrix3 Transform3::getUpper3x3( ) const +{ + return Matrix3( mCol0, mCol1, mCol2 ); +} + +inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec ) +{ + mCol3 = translateVec; + return *this; +} + +inline const Vector3 Transform3::getTranslation( ) const +{ + return mCol3; +} + +inline const Transform3 Transform3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Transform3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec ) +{ + return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::rotation( const Quat & unitQuat ) +{ + return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::scale( const Vector3 & scaleVec ) +{ + return Transform3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ) +{ + return Transform3( + ( tfrm.getCol0() * scaleVec.getX( ) ), + ( tfrm.getCol1() * scaleVec.getY( ) ), + ( tfrm.getCol2() * scaleVec.getZ( ) ), + tfrm.getCol3() + ); +} + +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ) +{ + return Transform3( + mulPerElem( tfrm.getCol0(), scaleVec ), + mulPerElem( tfrm.getCol1(), scaleVec ), + mulPerElem( tfrm.getCol2(), scaleVec ), + mulPerElem( tfrm.getCol3(), scaleVec ) + ); +} + +inline const Transform3 Transform3::translation( const Vector3 & translateVec ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + translateVec + ); +} + +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Transform3 & tfrm ) +{ + print( tfrm.getRow( 0 ) ); + print( tfrm.getRow( 1 ) ); + print( tfrm.getRow( 2 ) ); +} + +inline void print( const Transform3 & tfrm, const char * name ) +{ + printf("%s:\n", name); + print( tfrm ); +} + +#endif + +inline Quat::Quat( const Matrix3 & tfrm ) +{ + float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw; + int negTrace, ZgtX, ZgtY, YgtX; + int largestXorY, largestYorZ, largestZorX; + + xx = tfrm.getCol0().getX(); + yx = tfrm.getCol0().getY(); + zx = tfrm.getCol0().getZ(); + xy = tfrm.getCol1().getX(); + yy = tfrm.getCol1().getY(); + zy = tfrm.getCol1().getZ(); + xz = tfrm.getCol2().getX(); + yz = tfrm.getCol2().getY(); + zz = tfrm.getCol2().getZ(); + + trace = ( ( xx + yy ) + zz ); + + negTrace = ( trace < 0.0f ); + ZgtX = zz > xx; + ZgtY = zz > yy; + YgtX = yy > xx; + largestXorY = ( !ZgtX || !ZgtY ) && negTrace; + largestYorZ = ( YgtX || ZgtX ) && negTrace; + largestZorX = ( ZgtY || !YgtX ) && negTrace; + + if ( largestXorY ) + { + zz = -zz; + xy = -xy; + } + if ( largestYorZ ) + { + xx = -xx; + yz = -yz; + } + if ( largestZorX ) + { + yy = -yy; + zx = -zx; + } + + radicand = ( ( ( xx + yy ) + zz ) + 1.0f ); + scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) ); + + tmpx = ( ( zy - yz ) * scale ); + tmpy = ( ( xz - zx ) * scale ); + tmpz = ( ( yx - xy ) * scale ); + tmpw = ( radicand * scale ); + qx = tmpx; + qy = tmpy; + qz = tmpz; + qw = tmpw; + + if ( largestXorY ) + { + qx = tmpw; + qy = tmpz; + qz = tmpy; + qw = tmpx; + } + if ( largestYorZ ) + { + tmpx = qx; + tmpz = qz; + qx = qy; + qy = tmpx; + qz = qw; + qw = tmpz; + } + + mXYZW[0] = qx; + mXYZW[1] = qy; + mXYZW[2] = qz; + mXYZW[3] = qw; +} + +inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 ) +{ + return Matrix3( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ) + ); +} + +inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 ) +{ + return Matrix4( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ), + ( tfrm0 * tfrm1.getW( ) ) + ); +} + +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Vector3( + ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ), + ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ), + ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) ) + ); +} + +inline const Matrix3 crossMatrix( const Vector3 & vec ) +{ + return Matrix3( + Vector3( 0.0f, vec.getZ(), -vec.getY() ), + Vector3( -vec.getZ(), 0.0f, vec.getX() ), + Vector3( vec.getY(), -vec.getX(), 0.0f ) + ); +} + +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); +} + +} // namespace Aos +} // namespace Vectormath + +#endif + diff --git a/test/Bullet2/vectormath/neon/quat_aos.h b/test/Bullet2/vectormath/neon/quat_aos.h new file mode 100644 index 000000000..d06184603 --- /dev/null +++ b/test/Bullet2/vectormath/neon/quat_aos.h @@ -0,0 +1,413 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_QUAT_AOS_CPP_H +#define _VECTORMATH_QUAT_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + + inline Quat::Quat( const Quat & quat ) + { + vXYZW = quat.vXYZW; + } + + inline Quat::Quat( float _x, float _y, float _z, float _w ) + { + mXYZW[0] = _x; + mXYZW[1] = _y; + mXYZW[2] = _z; + mXYZW[3] = _w; + } + + inline Quat::Quat( float32x4_t fXYZW ) + { + vXYZW = fXYZW; + } + + inline Quat::Quat( const Vector3 & xyz, float _w ) + { + this->setXYZ( xyz ); + this->setW( _w ); + } + + inline Quat::Quat( const Vector4 & vec ) + { + mXYZW[0] = vec.getX(); + mXYZW[1] = vec.getY(); + mXYZW[2] = vec.getZ(); + mXYZW[3] = vec.getW(); + } + + inline Quat::Quat( float scalar ) + { + vXYZW = vdupq_n_f32(scalar); + } + + inline const Quat Quat::identity( ) + { + return Quat( 0.0f, 0.0f, 0.0f, 1.0f ); + } + + inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ) + { + return ( quat0 + ( ( quat1 - quat0 ) * t ) ); + } + + inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ) + { + Quat start; + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitQuat0, unitQuat1 ); + if ( cosAngle < 0.0f ) { + cosAngle = -cosAngle; + start = ( -unitQuat0 ); + } else { + start = unitQuat0; + } + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) ); + } + + inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ) + { + Quat tmp0, tmp1; + tmp0 = slerp( t, unitQuat0, unitQuat3 ); + tmp1 = slerp( t, unitQuat1, unitQuat2 ); + return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 ); + } + + inline void loadXYZW( Quat & quat, const float * fptr ) + { + quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] ); + } + + inline void storeXYZW( const Quat & quat, float * fptr ) + { + vst1q_f32(fptr, quat.getvXYZW()); + } + + inline Quat & Quat::operator =( const Quat & quat ) + { + vXYZW = quat.getvXYZW(); + return *this; + } + + inline Quat & Quat::setXYZ( const Vector3 & vec ) + { + mXYZW[0] = vec.getX(); + mXYZW[1] = vec.getY(); + mXYZW[2] = vec.getZ(); + return *this; + } + + inline const Vector3 Quat::getXYZ( ) const + { + return Vector3( mXYZW[0], mXYZW[1], mXYZW[2] ); + } + + inline float32x4_t Quat::getvXYZW( ) const + { + return vXYZW; + } + + inline Quat & Quat::setX( float _x ) + { + mXYZW[0] = _x; + return *this; + } + + inline float Quat::getX( ) const + { + return mXYZW[0]; + } + + inline Quat & Quat::setY( float _y ) + { + mXYZW[1] = _y; + return *this; + } + + inline float Quat::getY( ) const + { + return mXYZW[1]; + } + + inline Quat & Quat::setZ( float _z ) + { + mXYZW[2] = _z; + return *this; + } + + inline float Quat::getZ( ) const + { + return mXYZW[2]; + } + + inline Quat & Quat::setW( float _w ) + { + mXYZW[3] = _w; + return *this; + } + + inline float Quat::getW( ) const + { + return mXYZW[3]; + } + + inline Quat & Quat::setElem( int idx, float value ) + { + *(&mXYZW[0] + idx) = value; + return *this; + } + + inline float Quat::getElem( int idx ) const + { + return *(&mXYZW[0] + idx); + } + + inline float & Quat::operator []( int idx ) + { + return *(&mXYZW[0] + idx); + } + + inline float Quat::operator []( int idx ) const + { + return *(&mXYZW[0] + idx); + } + + inline const Quat Quat::operator +( const Quat & quat ) const + { + return Quat( vaddq_f32(vXYZW, quat.vXYZW) ); + } + + inline const Quat Quat::operator -( const Quat & quat ) const + { + return Quat( vsubq_f32(vXYZW, quat.vXYZW) ); + } + + inline const Quat Quat::operator *( float scalar ) const + { + float32x4_t v_scalar = vdupq_n_f32(scalar); + return Quat( vmulq_f32(vXYZW, v_scalar) ); + } + + inline Quat & Quat::operator +=( const Quat & quat ) + { + *this = *this + quat; + return *this; + } + + inline Quat & Quat::operator -=( const Quat & quat ) + { + *this = *this - quat; + return *this; + } + + inline Quat & Quat::operator *=( float scalar ) + { + *this = *this * scalar; + return *this; + } + + inline const Quat Quat::operator /( float scalar ) const + { + return Quat( + ( mXYZW[0] / scalar ), + ( mXYZW[1] / scalar ), + ( mXYZW[2] / scalar ), + ( mXYZW[3] / scalar ) + ); + } + + inline Quat & Quat::operator /=( float scalar ) + { + *this = *this / scalar; + return *this; + } + + inline const Quat Quat::operator -( ) const + { + return Quat( vnegq_f32(vXYZW) ); + } + + inline const Quat operator *( float scalar, const Quat & quat ) + { + return quat * scalar; + } + + inline float dot( const Quat & quat0, const Quat & quat1 ) + { + float result; + result = ( quat0.getX() * quat1.getX() ); + result = ( result + ( quat0.getY() * quat1.getY() ) ); + result = ( result + ( quat0.getZ() * quat1.getZ() ) ); + result = ( result + ( quat0.getW() * quat1.getW() ) ); + return result; + } + + inline float norm( const Quat & quat ) + { + float result; + result = ( quat.getX() * quat.getX() ); + result = ( result + ( quat.getY() * quat.getY() ) ); + result = ( result + ( quat.getZ() * quat.getZ() ) ); + result = ( result + ( quat.getW() * quat.getW() ) ); + return result; + } + + inline float length( const Quat & quat ) + { + return ::sqrtf( norm( quat ) ); + } + + inline const Quat normalize( const Quat & quat ) + { + float lenSqr, lenInv; + lenSqr = norm( quat ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Quat( + ( quat.getX() * lenInv ), + ( quat.getY() * lenInv ), + ( quat.getZ() * lenInv ), + ( quat.getW() * lenInv ) + ); + } + + inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ) + { + float cosHalfAngleX2, recipCosHalfAngleX2; + cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) ); + recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 ); + return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) ); + } + + inline const Quat Quat::rotation( float radians, const Vector3 & unitVec ) + { + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( ( unitVec * s ), c ); + } + + inline const Quat Quat::rotationX( float radians ) + { + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( s, 0.0f, 0.0f, c ); + } + + inline const Quat Quat::rotationY( float radians ) + { + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, s, 0.0f, c ); + } + + inline const Quat Quat::rotationZ( float radians ) + { + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, 0.0f, s, c ); + } + + inline const Quat Quat::operator *( const Quat & quat ) const + { + return Quat( + ( ( ( ( mXYZW[3] * quat.mXYZW[0] ) + ( mXYZW[0] * quat.mXYZW[3] ) ) + ( mXYZW[1] * quat.mXYZW[2] ) ) - ( mXYZW[2] * quat.mXYZW[1] ) ), + ( ( ( ( mXYZW[3] * quat.mXYZW[1] ) + ( mXYZW[1] * quat.mXYZW[3] ) ) + ( mXYZW[2] * quat.mXYZW[0] ) ) - ( mXYZW[0] * quat.mXYZW[2] ) ), + ( ( ( ( mXYZW[3] * quat.mXYZW[2] ) + ( mXYZW[2] * quat.mXYZW[3] ) ) + ( mXYZW[0] * quat.mXYZW[1] ) ) - ( mXYZW[1] * quat.mXYZW[0] ) ), + ( ( ( ( mXYZW[3] * quat.mXYZW[3] ) - ( mXYZW[0] * quat.mXYZW[0] ) ) - ( mXYZW[1] * quat.mXYZW[1] ) ) - ( mXYZW[2] * quat.mXYZW[2] ) ) + ); + } + + inline Quat & Quat::operator *=( const Quat & quat ) + { + *this = *this * quat; + return *this; + } + + inline const Vector3 rotate( const Quat & quat, const Vector3 & vec ) + { + float tmpX, tmpY, tmpZ, tmpW; + tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) ); + tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) ); + tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) ); + tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) ); + return Vector3( + ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ), + ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ), + ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) ) + ); + } + + inline const Quat conj( const Quat & quat ) + { + return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() ); + } + + inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ) + { + return Quat( + ( select1 )? quat1.getX() : quat0.getX(), + ( select1 )? quat1.getY() : quat0.getY(), + ( select1 )? quat1.getZ() : quat0.getZ(), + ( select1 )? quat1.getW() : quat0.getW() + ); + } + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Quat & quat ) +{ + printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +inline void print( const Quat & quat, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif + diff --git a/test/Bullet2/vectormath/neon/vec_aos.h b/test/Bullet2/vectormath/neon/vec_aos.h new file mode 100644 index 000000000..7bcf8dbec --- /dev/null +++ b/test/Bullet2/vectormath/neon/vec_aos.h @@ -0,0 +1,1427 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_VEC_AOS_CPP_H +#define _VECTORMATH_VEC_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_SLERP_TOL 0.999f + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Vector3::Vector3( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; +} + +inline Vector3::Vector3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Vector3::Vector3( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); +} + +inline Vector3::Vector3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Vector3 Vector3::xAxis( ) +{ + return Vector3( 1.0f, 0.0f, 0.0f ); +} + +inline const Vector3 Vector3::yAxis( ) +{ + return Vector3( 0.0f, 1.0f, 0.0f ); +} + +inline const Vector3 Vector3::zAxis( ) +{ + return Vector3( 0.0f, 0.0f, 1.0f ); +} + +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZ( Vector3 & vec, const float * fptr ) +{ + vec = Vector3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Vector3 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector3 & Vector3::operator =( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + return *this; +} + +inline Vector3 & Vector3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector3::getX( ) const +{ + return mX; +} + +inline Vector3 & Vector3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector3::getY( ) const +{ + return mY; +} + +inline Vector3 & Vector3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector3::getZ( ) const +{ + return mZ; +} + +inline Vector3 & Vector3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Vector3::operator +( const Vector3 & vec ) const +{ + return Vector3( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ) + ); +} + +inline const Vector3 Vector3::operator -( const Vector3 & vec ) const +{ + return Vector3( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ) + ); +} + +inline const Point3 Vector3::operator +( const Point3 & pnt ) const +{ + return Point3( + ( mX + pnt.getX() ), + ( mY + pnt.getY() ), + ( mZ + pnt.getZ() ) + ); +} + +inline const Vector3 Vector3::operator *( float scalar ) const +{ + return Vector3( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ) + ); +} + +inline Vector3 & Vector3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector3 & Vector3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector3 & Vector3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector3 Vector3::operator /( float scalar ) const +{ + return Vector3( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ) + ); +} + +inline Vector3 & Vector3::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector3 Vector3::operator -( ) const +{ + return Vector3( + -mX, + -mY, + -mZ + ); +} + +inline const Vector3 operator *( float scalar, const Vector3 & vec ) +{ + return vec * scalar; +} + +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ) + ); +} + +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ) + ); +} + +inline const Vector3 recipPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ) + ); +} + +inline const Vector3 sqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ) + ); +} + +inline const Vector3 rsqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ) + ); +} + +inline const Vector3 absPerElem( const Vector3 & vec ) +{ + return Vector3( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ) + ); +} + +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ) + ); +} + +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float maxElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + return result; +} + +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float minElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + return result; +} + +inline float sum( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + return result; +} + +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + return result; +} + +inline float lengthSqr( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + return result; +} + +inline float length( const Vector3 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector3 normalize( const Vector3 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector3( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ) + ); +} + +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ), + ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ), + ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) ) + ); +} + +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ) +{ + return Vector3( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector3 & vec ) +{ + printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() ); +} + +inline void print( const Vector3 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() ); +} + +#endif + +inline Vector4::Vector4( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; +} + +inline Vector4::Vector4( float _x, float _y, float _z, float _w ) +{ + mX = _x; + mY = _y; + mZ = _z; + mW = _w; +} + +inline Vector4::Vector4( const Vector3 & xyz, float _w ) +{ + this->setXYZ( xyz ); + this->setW( _w ); +} + +inline Vector4::Vector4( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + mW = 0.0f; +} + +inline Vector4::Vector4( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); + mW = 1.0f; +} + +inline Vector4::Vector4( const Quat & quat ) +{ + mX = quat.getX(); + mY = quat.getY(); + mZ = quat.getZ(); + mW = quat.getW(); +} + +inline Vector4::Vector4( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; + mW = scalar; +} + +inline const Vector4 Vector4::xAxis( ) +{ + return Vector4( 1.0f, 0.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::yAxis( ) +{ + return Vector4( 0.0f, 1.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::zAxis( ) +{ + return Vector4( 0.0f, 0.0f, 1.0f, 0.0f ); +} + +inline const Vector4 Vector4::wAxis( ) +{ + return Vector4( 0.0f, 0.0f, 0.0f, 1.0f ); +} + +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZW( Vector4 & vec, const float * fptr ) +{ + vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] ); +} + +inline void storeXYZW( const Vector4 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); + fptr[3] = vec.getW(); +} + +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector4 & Vector4::operator =( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; + return *this; +} + +inline Vector4 & Vector4::setXYZ( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + return *this; +} + +inline const Vector3 Vector4::getXYZ( ) const +{ + return Vector3( mX, mY, mZ ); +} + +inline Vector4 & Vector4::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector4::getX( ) const +{ + return mX; +} + +inline Vector4 & Vector4::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector4::getY( ) const +{ + return mY; +} + +inline Vector4 & Vector4::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector4::getZ( ) const +{ + return mZ; +} + +inline Vector4 & Vector4::setW( float _w ) +{ + mW = _w; + return *this; +} + +inline float Vector4::getW( ) const +{ + return mW; +} + +inline Vector4 & Vector4::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector4::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector4::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector4::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector4 Vector4::operator +( const Vector4 & vec ) const +{ + return Vector4( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ), + ( mW + vec.mW ) + ); +} + +inline const Vector4 Vector4::operator -( const Vector4 & vec ) const +{ + return Vector4( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ), + ( mW - vec.mW ) + ); +} + +inline const Vector4 Vector4::operator *( float scalar ) const +{ + return Vector4( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ), + ( mW * scalar ) + ); +} + +inline Vector4 & Vector4::operator +=( const Vector4 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector4 & Vector4::operator -=( const Vector4 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector4 & Vector4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector4 Vector4::operator /( float scalar ) const +{ + return Vector4( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ), + ( mW / scalar ) + ); +} + +inline Vector4 & Vector4::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector4 Vector4::operator -( ) const +{ + return Vector4( + -mX, + -mY, + -mZ, + -mW + ); +} + +inline const Vector4 operator *( float scalar, const Vector4 & vec ) +{ + return vec * scalar; +} + +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ), + ( vec0.getW() * vec1.getW() ) + ); +} + +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ), + ( vec0.getW() / vec1.getW() ) + ); +} + +inline const Vector4 recipPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ), + ( 1.0f / vec.getW() ) + ); +} + +inline const Vector4 sqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ), + sqrtf( vec.getW() ) + ); +} + +inline const Vector4 rsqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ), + ( 1.0f / sqrtf( vec.getW() ) ) + ); +} + +inline const Vector4 absPerElem( const Vector4 & vec ) +{ + return Vector4( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ), + fabsf( vec.getW() ) + ); +} + +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ), + ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() ) + ); +} + +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float maxElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + result = (vec.getW() > result)? vec.getW() : result; + return result; +} + +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float minElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + result = (vec.getW() < result)? vec.getW() : result; + return result; +} + +inline float sum( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + result = ( result + vec.getW() ); + return result; +} + +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + result = ( result + ( vec0.getW() * vec1.getW() ) ); + return result; +} + +inline float lengthSqr( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + result = ( result + ( vec.getW() * vec.getW() ) ); + return result; +} + +inline float length( const Vector4 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector4 normalize( const Vector4 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector4( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ), + ( vec.getW() * lenInv ) + ); +} + +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ) +{ + return Vector4( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ(), + ( select1 )? vec1.getW() : vec0.getW() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector4 & vec ) +{ + printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +inline void print( const Vector4 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +#endif + +inline Point3::Point3( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; +} + +inline Point3::Point3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Point3::Point3( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); +} + +inline Point3::Point3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ) +{ + return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); +} + +inline void loadXYZ( Point3 & pnt, const float * fptr ) +{ + pnt = Point3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Point3 & pnt, float * fptr ) +{ + fptr[0] = pnt.getX(); + fptr[1] = pnt.getY(); + fptr[2] = pnt.getZ(); +} + +inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Point3 & Point3::operator =( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; + return *this; +} + +inline Point3 & Point3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Point3::getX( ) const +{ + return mX; +} + +inline Point3 & Point3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Point3::getY( ) const +{ + return mY; +} + +inline Point3 & Point3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Point3::getZ( ) const +{ + return mZ; +} + +inline Point3 & Point3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Point3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Point3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Point3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Point3::operator -( const Point3 & pnt ) const +{ + return Vector3( + ( mX - pnt.mX ), + ( mY - pnt.mY ), + ( mZ - pnt.mZ ) + ); +} + +inline const Point3 Point3::operator +( const Vector3 & vec ) const +{ + return Point3( + ( mX + vec.getX() ), + ( mY + vec.getY() ), + ( mZ + vec.getZ() ) + ); +} + +inline const Point3 Point3::operator -( const Vector3 & vec ) const +{ + return Point3( + ( mX - vec.getX() ), + ( mY - vec.getY() ), + ( mZ - vec.getZ() ) + ); +} + +inline Point3 & Point3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Point3 & Point3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() * pnt1.getX() ), + ( pnt0.getY() * pnt1.getY() ), + ( pnt0.getZ() * pnt1.getZ() ) + ); +} + +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() / pnt1.getX() ), + ( pnt0.getY() / pnt1.getY() ), + ( pnt0.getZ() / pnt1.getZ() ) + ); +} + +inline const Point3 recipPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / pnt.getX() ), + ( 1.0f / pnt.getY() ), + ( 1.0f / pnt.getZ() ) + ); +} + +inline const Point3 sqrtPerElem( const Point3 & pnt ) +{ + return Point3( + sqrtf( pnt.getX() ), + sqrtf( pnt.getY() ), + sqrtf( pnt.getZ() ) + ); +} + +inline const Point3 rsqrtPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / sqrtf( pnt.getX() ) ), + ( 1.0f / sqrtf( pnt.getY() ) ), + ( 1.0f / sqrtf( pnt.getZ() ) ) + ); +} + +inline const Point3 absPerElem( const Point3 & pnt ) +{ + return Point3( + fabsf( pnt.getX() ), + fabsf( pnt.getY() ), + fabsf( pnt.getZ() ) + ); +} + +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ), + ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ), + ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() ) + ); +} + +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float maxElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() > result)? pnt.getZ() : result; + return result; +} + +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float minElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() < result)? pnt.getZ() : result; + return result; +} + +inline float sum( const Point3 & pnt ) +{ + float result; + result = ( pnt.getX() + pnt.getY() ); + result = ( result + pnt.getZ() ); + return result; +} + +inline const Point3 scale( const Point3 & pnt, float scaleVal ) +{ + return mulPerElem( pnt, Point3( scaleVal ) ); +} + +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ) +{ + return mulPerElem( pnt, Point3( scaleVec ) ); +} + +inline float projection( const Point3 & pnt, const Vector3 & unitVec ) +{ + float result; + result = ( pnt.getX() * unitVec.getX() ); + result = ( result + ( pnt.getY() * unitVec.getY() ) ); + result = ( result + ( pnt.getZ() * unitVec.getZ() ) ); + return result; +} + +inline float distSqrFromOrigin( const Point3 & pnt ) +{ + return lengthSqr( Vector3( pnt ) ); +} + +inline float distFromOrigin( const Point3 & pnt ) +{ + return length( Vector3( pnt ) ); +} + +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return lengthSqr( ( pnt1 - pnt0 ) ); +} + +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return length( ( pnt1 - pnt0 ) ); +} + +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ) +{ + return Point3( + ( select1 )? pnt1.getX() : pnt0.getX(), + ( select1 )? pnt1.getY() : pnt0.getY(), + ( select1 )? pnt1.getZ() : pnt0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Point3 & pnt ) +{ + printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +inline void print( const Point3 & pnt, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif + diff --git a/test/Bullet2/vectormath/neon/vectormath_aos.h b/test/Bullet2/vectormath/neon/vectormath_aos.h new file mode 100644 index 000000000..97bdc278a --- /dev/null +++ b/test/Bullet2/vectormath/neon/vectormath_aos.h @@ -0,0 +1,1890 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +This source version has been altered. + +*/ + +#ifndef _VECTORMATH_AOS_CPP_H +#define _VECTORMATH_AOS_CPP_H + +#include + +#ifdef _VECTORMATH_DEBUG +#include +#endif + +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Vector3( ) { }; + + // Copy a 3-D vector + // + inline Vector3( const Vector3 & vec ); + + // Construct a 3-D vector from x, y, and z elements + // + inline Vector3( float x, float y, float z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit inline Vector3( const Point3 & pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit inline Vector3( float scalar ); + + // Assign one 3-D vector to another + // + inline Vector3 & operator =( const Vector3 & vec ); + + // Set the x element of a 3-D vector + // + inline Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + inline Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + inline Vector3 & setZ( float z ); + + // Get the x element of a 3-D vector + // + inline float getX( ) const; + + // Get the y element of a 3-D vector + // + inline float getY( ) const; + + // Get the z element of a 3-D vector + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + inline Vector3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 3-D vectors + // + inline const Vector3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + inline const Vector3 operator -( const Vector3 & vec ) const; + + // Add a 3-D vector to a 3-D point + // + inline const Point3 operator +( const Point3 & pnt ) const; + + // Multiply a 3-D vector by a scalar + // + inline const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + inline const Vector3 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Vector3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Vector3 & operator -=( const Vector3 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector3 & operator /=( float scalar ); + + // Negate all elements of a 3-D vector + // + inline const Vector3 operator -( ) const; + + // Construct x axis + // + static inline const Vector3 xAxis( ); + + // Construct y axis + // + static inline const Vector3 yAxis( ); + + // Construct z axis + // + static inline const Vector3 zAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 3-D vector by a scalar +// +inline const Vector3 operator *( float scalar, const Vector3 & vec ); + +// Multiply two 3-D vectors per element +// +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector3 recipPerElem( const Vector3 & vec ); + +// Compute the square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector3 sqrtPerElem( const Vector3 & vec ); + +// Compute the reciprocal square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector3 rsqrtPerElem( const Vector3 & vec ); + +// Compute the absolute value of a 3-D vector per element +// +inline const Vector3 absPerElem( const Vector3 & vec ); + +// Copy sign from one 3-D vector to another, per element +// +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum of two 3-D vectors per element +// +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Minimum of two 3-D vectors per element +// +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum element of a 3-D vector +// +inline float maxElem( const Vector3 & vec ); + +// Minimum element of a 3-D vector +// +inline float minElem( const Vector3 & vec ); + +// Compute the sum of all elements of a 3-D vector +// +inline float sum( const Vector3 & vec ); + +// Compute the dot product of two 3-D vectors +// +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the square of the length of a 3-D vector +// +inline float lengthSqr( const Vector3 & vec ); + +// Compute the length of a 3-D vector +// +inline float length( const Vector3 & vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector3 normalize( const Vector3 & vec ); + +// Compute cross product of two 3-D vectors +// +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ); + +// Outer product of two 3-D vectors +// +inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +inline const Matrix3 crossMatrix( const Vector3 & vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ); + +// Conditionally select between two 3-D vectors +// +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Vector3 & vec, const float * fptr ); + +// Store x, y, and z elements of a 3-D vector in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Vector3 & vec, float * fptr ); + +// Load three-half-floats as a 3-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ); + +// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + float mX; + float mY; + float mZ; + float mW; + +public: + // Default constructor; does no initialization + // + inline Vector4( ) { }; + + // Copy a 4-D vector + // + inline Vector4( const Vector4 & vec ); + + // Construct a 4-D vector from x, y, z, and w elements + // + inline Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + inline Vector4( const Vector3 & xyz, float w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit inline Vector4( const Vector3 & vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit inline Vector4( const Point3 & pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit inline Vector4( const Quat & quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit inline Vector4( float scalar ); + + // Assign one 4-D vector to another + // + inline Vector4 & operator =( const Vector4 & vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + inline Vector4 & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a 4-D vector + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + inline Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + inline Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + inline Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + inline Vector4 & setW( float w ); + + // Get the x element of a 4-D vector + // + inline float getX( ) const; + + // Get the y element of a 4-D vector + // + inline float getY( ) const; + + // Get the z element of a 4-D vector + // + inline float getZ( ) const; + + // Get the w element of a 4-D vector + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + inline Vector4 & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 4-D vectors + // + inline const Vector4 operator +( const Vector4 & vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + inline const Vector4 operator -( const Vector4 & vec ) const; + + // Multiply a 4-D vector by a scalar + // + inline const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + inline const Vector4 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + inline Vector4 & operator +=( const Vector4 & vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + inline Vector4 & operator -=( const Vector4 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector4 & operator /=( float scalar ); + + // Negate all elements of a 4-D vector + // + inline const Vector4 operator -( ) const; + + // Construct x axis + // + static inline const Vector4 xAxis( ); + + // Construct y axis + // + static inline const Vector4 yAxis( ); + + // Construct z axis + // + static inline const Vector4 zAxis( ); + + // Construct w axis + // + static inline const Vector4 wAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 4-D vector by a scalar +// +inline const Vector4 operator *( float scalar, const Vector4 & vec ); + +// Multiply two 4-D vectors per element +// +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector4 recipPerElem( const Vector4 & vec ); + +// Compute the square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector4 sqrtPerElem( const Vector4 & vec ); + +// Compute the reciprocal square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector4 rsqrtPerElem( const Vector4 & vec ); + +// Compute the absolute value of a 4-D vector per element +// +inline const Vector4 absPerElem( const Vector4 & vec ); + +// Copy sign from one 4-D vector to another, per element +// +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum of two 4-D vectors per element +// +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Minimum of two 4-D vectors per element +// +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum element of a 4-D vector +// +inline float maxElem( const Vector4 & vec ); + +// Minimum element of a 4-D vector +// +inline float minElem( const Vector4 & vec ); + +// Compute the sum of all elements of a 4-D vector +// +inline float sum( const Vector4 & vec ); + +// Compute the dot product of two 4-D vectors +// +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the square of the length of a 4-D vector +// +inline float lengthSqr( const Vector4 & vec ); + +// Compute the length of a 4-D vector +// +inline float length( const Vector4 & vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector4 normalize( const Vector4 & vec ); + +// Outer product of two 4-D vectors +// +inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ); + +// Conditionally select between two 4-D vectors +// +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Vector4 & vec, const float * fptr ); + +// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Vector4 & vec, float * fptr ); + +// Load four-half-floats as a 4-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ); + +// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Point3( ) { }; + + // Copy a 3-D point + // + inline Point3( const Point3 & pnt ); + + // Construct a 3-D point from x, y, and z elements + // + inline Point3( float x, float y, float z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit inline Point3( const Vector3 & vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit inline Point3( float scalar ); + + // Assign one 3-D point to another + // + inline Point3 & operator =( const Point3 & pnt ); + + // Set the x element of a 3-D point + // + inline Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + inline Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + inline Point3 & setZ( float z ); + + // Get the x element of a 3-D point + // + inline float getX( ) const; + + // Get the y element of a 3-D point + // + inline float getY( ) const; + + // Get the z element of a 3-D point + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + inline Point3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D point by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + inline const Vector3 operator -( const Point3 & pnt ) const; + + // Add a 3-D point to a 3-D vector + // + inline const Point3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + inline const Point3 operator -( const Vector3 & vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Point3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Point3 & operator -=( const Vector3 & vec ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply two 3-D points per element +// +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Point3 recipPerElem( const Point3 & pnt ); + +// Compute the square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Point3 sqrtPerElem( const Point3 & pnt ); + +// Compute the reciprocal square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Point3 rsqrtPerElem( const Point3 & pnt ); + +// Compute the absolute value of a 3-D point per element +// +inline const Point3 absPerElem( const Point3 & pnt ); + +// Copy sign from one 3-D point to another, per element +// +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum of two 3-D points per element +// +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Minimum of two 3-D points per element +// +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum element of a 3-D point +// +inline float maxElem( const Point3 & pnt ); + +// Minimum element of a 3-D point +// +inline float minElem( const Point3 & pnt ); + +// Compute the sum of all elements of a 3-D point +// +inline float sum( const Point3 & pnt ); + +// Apply uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, float scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +inline float projection( const Point3 & pnt, const Vector3 & unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +inline float distSqrFromOrigin( const Point3 & pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +inline float distFromOrigin( const Point3 & pnt ); + +// Compute the square of the distance between two 3-D points +// +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the distance between two 3-D points +// +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ); + +// Conditionally select between two 3-D points +// +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Point3 & pnt, const float * fptr ); + +// Store x, y, and z elements of a 3-D point in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Point3 & pnt, float * fptr ); + +// Load three-half-floats as a 3-D point +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr ); + +// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ +#if defined( __APPLE__ ) && defined( BT_USE_NEON ) + union{ + float32x4_t vXYZW; + float mXYZW[4]; + }; +#else + float mX; + float mY; + float mZ; + float mW; +#endif + +public: + // Default constructor; does no initialization + // + inline Quat( ) { }; + + // Copy a quaternion + // + inline Quat( const Quat & quat ); + + // Construct a quaternion from x, y, z, and w elements + // + inline Quat( float x, float y, float z, float w ); + + // Construct a quaternion from vector of x, y, z, and w elements + // + inline Quat( float32x4_t fXYZW ); + + // Construct a quaternion from a 3-D vector and a scalar + // + inline Quat( const Vector3 & xyz, float w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit inline Quat( const Vector4 & vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit inline Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit inline Quat( float scalar ); + + // Assign one quaternion to another + // + inline Quat & operator =( const Quat & quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + inline Quat & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a quaternion + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + inline Quat & setX( float x ); + + // Set the y element of a quaternion + // + inline Quat & setY( float y ); + + // Set the z element of a quaternion + // + inline Quat & setZ( float z ); + + // Set the w element of a quaternion + // + inline Quat & setW( float w ); + +#if defined( __APPLE__ ) && defined( BT_USE_NEON ) + inline float32x4_t getvXYZW( ) const; +#endif + + // Get the x element of a quaternion + // + inline float getX( ) const; + + // Get the y element of a quaternion + // + inline float getY( ) const; + + // Get the z element of a quaternion + // + inline float getZ( ) const; + + // Get the w element of a quaternion + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + inline Quat & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a quaternion by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two quaternions + // + inline const Quat operator +( const Quat & quat ) const; + + // Subtract a quaternion from another quaternion + // + inline const Quat operator -( const Quat & quat ) const; + + // Multiply two quaternions + // + inline const Quat operator *( const Quat & quat ) const; + + // Multiply a quaternion by a scalar + // + inline const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + inline const Quat operator /( float scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + inline Quat & operator +=( const Quat & quat ); + + // Perform compound assignment and subtraction by a quaternion + // + inline Quat & operator -=( const Quat & quat ); + + // Perform compound assignment and multiplication by a quaternion + // + inline Quat & operator *=( const Quat & quat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Quat & operator /=( float scalar ); + + // Negate all elements of a quaternion + // + inline const Quat operator -( ) const; + + // Construct an identity quaternion + // + static inline const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static inline const Quat rotation( float radians, const Vector3 & unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static inline const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static inline const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static inline const Quat rotationZ( float radians ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a quaternion by a scalar +// +inline const Quat operator *( float scalar, const Quat & quat ); + +// Compute the conjugate of a quaternion +// +inline const Quat conj( const Quat & quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec ); + +// Compute the dot product of two quaternions +// +inline float dot( const Quat & quat0, const Quat & quat1 ); + +// Compute the norm of a quaternion +// +inline float norm( const Quat & quat ); + +// Compute the length of a quaternion +// +inline float length( const Quat & quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +inline const Quat normalize( const Quat & quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ); + +// Spherical quadrangle interpolation +// +inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ); + +// Conditionally select between two quaternions +// +inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Quat & quat, const float * fptr ); + +// Store x, y, z, and w elements of a quaternion in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Quat & quat, float * fptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + inline Matrix3( ) { }; + + // Copy a 3x3 matrix + // + inline Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit inline Matrix3( const Quat & unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit inline Matrix3( float scalar ); + + // Assign one 3x3 matrix to another + // + inline Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + inline Matrix3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x3 matrix + // + inline Matrix3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x3 matrix + // + inline Matrix3 & setCol2( const Vector3 & col2 ); + + // Get column 0 of a 3x3 matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + inline const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setRow( int row, const Vector3 & vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + inline Matrix3 & setElem( int col, int row, float val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + inline const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + inline const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + inline const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + inline const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply two 3x3 matrices + // + inline const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + inline Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + inline Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + inline Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static inline const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static inline const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static inline const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static inline const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix3 rotation( const Quat & unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static inline const Matrix3 scale( const Vector3 & scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +inline const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +inline const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +inline float determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + inline Matrix4( ) { }; + + // Copy a 4x4 matrix + // + inline Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit inline Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit inline Matrix4( float scalar ); + + // Assign one 4x4 matrix to another + // + inline Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 4x4 matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + inline Matrix4 & setCol0( const Vector4 & col0 ); + + // Set column 1 of a 4x4 matrix + // + inline Matrix4 & setCol1( const Vector4 & col1 ); + + // Set column 2 of a 4x4 matrix + // + inline Matrix4 & setCol2( const Vector4 & col2 ); + + // Set column 3 of a 4x4 matrix + // + inline Matrix4 & setCol3( const Vector4 & col3 ); + + // Get column 0 of a 4x4 matrix + // + inline const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + inline const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + inline const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + inline const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setCol( int col, const Vector4 & vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + inline Matrix4 & setElem( int col, int row, float val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + inline const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + inline const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + inline const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + inline const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + inline const Vector4 operator *( const Vector4 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + inline const Vector4 operator *( const Vector3 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + inline const Vector4 operator *( const Point3 & pnt ) const; + + // Multiply two 4x4 matrices + // + inline const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + inline const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + inline Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + inline Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + inline Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static inline const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static inline const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static inline const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static inline const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix4 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix4 rotation( const Quat & unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static inline const Matrix4 scale( const Vector3 & scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static inline const Matrix4 translation( const Vector3 & translateVec ); + + // Construct viewing matrix based on eye position, position looked at, and up direction + // + static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ); + + // Construct a perspective projection matrix + // + static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +inline const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +inline const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +inline const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +inline float determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + inline Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + inline Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + inline Transform3( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit inline Transform3( float scalar ); + + // Assign one 3x4 transformation matrix to another + // + inline Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + inline Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + inline Transform3 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + inline Transform3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + inline Transform3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + inline Transform3 & setCol2( const Vector3 & col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + inline Transform3 & setCol3( const Vector3 & col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + inline const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + inline const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + inline Transform3 & setElem( int col, int row, float val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + inline const Point3 operator *( const Point3 & pnt ) const; + + // Multiply two 3x4 transformation matrices + // + inline const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static inline const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static inline const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static inline const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static inline const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static inline const Transform3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Transform3 rotation( const Quat & unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static inline const Transform3 scale( const Vector3 & scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static inline const Transform3 translation( const Vector3 & translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +inline const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +inline const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +inline const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif + diff --git a/test/Bullet2/vectormath/scalar/boolInVec.h b/test/Bullet2/vectormath/scalar/boolInVec.h new file mode 100644 index 000000000..c5eeeebd7 --- /dev/null +++ b/test/Bullet2/vectormath/scalar/boolInVec.h @@ -0,0 +1,225 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _BOOLINVEC_H +#define _BOOLINVEC_H + +#include +namespace Vectormath { + +class floatInVec; + +//-------------------------------------------------------------------------------------------------- +// boolInVec class +// + +class boolInVec +{ +private: + unsigned int mData; + +public: + // Default constructor; does no initialization + // + inline boolInVec( ) { }; + + // Construct from a value converted from float + // + inline boolInVec(floatInVec vec); + + // Explicit cast from bool + // + explicit inline boolInVec(bool scalar); + + // Explicit cast to bool + // + inline bool getAsBool() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to bool + // + inline operator bool() const; +#endif + + // Boolean negation operator + // + inline const boolInVec operator ! () const; + + // Assignment operator + // + inline boolInVec& operator = (boolInVec vec); + + // Boolean and assignment operator + // + inline boolInVec& operator &= (boolInVec vec); + + // Boolean exclusive or assignment operator + // + inline boolInVec& operator ^= (boolInVec vec); + + // Boolean or assignment operator + // + inline boolInVec& operator |= (boolInVec vec); + +}; + +// Equal operator +// +inline const boolInVec operator == (boolInVec vec0, boolInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (boolInVec vec0, boolInVec vec1); + +// And operator +// +inline const boolInVec operator & (boolInVec vec0, boolInVec vec1); + +// Exclusive or operator +// +inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1); + +// Or operator +// +inline const boolInVec operator | (boolInVec vec0, boolInVec vec1); + +// Conditionally select between two values +// +inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// boolInVec implementation +// + +#include "floatInVec.h" + +namespace Vectormath { + +inline +boolInVec::boolInVec(floatInVec vec) +{ + *this = (vec != floatInVec(0.0f)); +} + +inline +boolInVec::boolInVec(bool scalar) +{ + mData = -(int)scalar; +} + +inline +bool +boolInVec::getAsBool() const +{ + return (mData > 0); +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +boolInVec::operator bool() const +{ + return getAsBool(); +} +#endif + +inline +const boolInVec +boolInVec::operator ! () const +{ + return boolInVec(!mData); +} + +inline +boolInVec& +boolInVec::operator = (boolInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +boolInVec& +boolInVec::operator &= (boolInVec vec) +{ + *this = *this & vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator ^= (boolInVec vec) +{ + *this = *this ^ vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator |= (boolInVec vec) +{ + *this = *this | vec; + return *this; +} + +inline +const boolInVec +operator == (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() == vec1.getAsBool()); +} + +inline +const boolInVec +operator != (boolInVec vec0, boolInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const boolInVec +operator & (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() & vec1.getAsBool()); +} + +inline +const boolInVec +operator | (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() | vec1.getAsBool()); +} + +inline +const boolInVec +operator ^ (boolInVec vec0, boolInVec vec1) +{ + return boolInVec(vec0.getAsBool() ^ vec1.getAsBool()); +} + +inline +const boolInVec +select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // boolInVec_h diff --git a/test/Bullet2/vectormath/scalar/floatInVec.h b/test/Bullet2/vectormath/scalar/floatInVec.h new file mode 100644 index 000000000..12d89e43d --- /dev/null +++ b/test/Bullet2/vectormath/scalar/floatInVec.h @@ -0,0 +1,343 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ +#ifndef _FLOATINVEC_H +#define _FLOATINVEC_H + +#include +namespace Vectormath { + +class boolInVec; + +//-------------------------------------------------------------------------------------------------- +// floatInVec class +// + +// A class representing a scalar float value contained in a vector register +// This class does not support fastmath +class floatInVec +{ +private: + float mData; + +public: + // Default constructor; does no initialization + // + inline floatInVec( ) { }; + + // Construct from a value converted from bool + // + inline floatInVec(boolInVec vec); + + // Explicit cast from float + // + explicit inline floatInVec(float scalar); + + // Explicit cast to float + // + inline float getAsFloat() const; + +#ifndef _VECTORMATH_NO_SCALAR_CAST + // Implicit cast to float + // + inline operator float() const; +#endif + + // Post increment (add 1.0f) + // + inline const floatInVec operator ++ (int); + + // Post decrement (subtract 1.0f) + // + inline const floatInVec operator -- (int); + + // Pre increment (add 1.0f) + // + inline floatInVec& operator ++ (); + + // Pre decrement (subtract 1.0f) + // + inline floatInVec& operator -- (); + + // Negation operator + // + inline const floatInVec operator - () const; + + // Assignment operator + // + inline floatInVec& operator = (floatInVec vec); + + // Multiplication assignment operator + // + inline floatInVec& operator *= (floatInVec vec); + + // Division assignment operator + // + inline floatInVec& operator /= (floatInVec vec); + + // Addition assignment operator + // + inline floatInVec& operator += (floatInVec vec); + + // Subtraction assignment operator + // + inline floatInVec& operator -= (floatInVec vec); + +}; + +// Multiplication operator +// +inline const floatInVec operator * (floatInVec vec0, floatInVec vec1); + +// Division operator +// +inline const floatInVec operator / (floatInVec vec0, floatInVec vec1); + +// Addition operator +// +inline const floatInVec operator + (floatInVec vec0, floatInVec vec1); + +// Subtraction operator +// +inline const floatInVec operator - (floatInVec vec0, floatInVec vec1); + +// Less than operator +// +inline const boolInVec operator < (floatInVec vec0, floatInVec vec1); + +// Less than or equal operator +// +inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1); + +// Greater than operator +// +inline const boolInVec operator > (floatInVec vec0, floatInVec vec1); + +// Greater than or equal operator +// +inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1); + +// Equal operator +// +inline const boolInVec operator == (floatInVec vec0, floatInVec vec1); + +// Not equal operator +// +inline const boolInVec operator != (floatInVec vec0, floatInVec vec1); + +// Conditionally select between two values +// +inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1); + + +} // namespace Vectormath + + +//-------------------------------------------------------------------------------------------------- +// floatInVec implementation +// + +#include "boolInVec.h" + +namespace Vectormath { + +inline +floatInVec::floatInVec(boolInVec vec) +{ + mData = float(vec.getAsBool()); +} + +inline +floatInVec::floatInVec(float scalar) +{ + mData = scalar; +} + +inline +float +floatInVec::getAsFloat() const +{ + return mData; +} + +#ifndef _VECTORMATH_NO_SCALAR_CAST +inline +floatInVec::operator float() const +{ + return getAsFloat(); +} +#endif + +inline +const floatInVec +floatInVec::operator ++ (int) +{ + float olddata = mData; + operator ++(); + return floatInVec(olddata); +} + +inline +const floatInVec +floatInVec::operator -- (int) +{ + float olddata = mData; + operator --(); + return floatInVec(olddata); +} + +inline +floatInVec& +floatInVec::operator ++ () +{ + *this += floatInVec(1.0f); + return *this; +} + +inline +floatInVec& +floatInVec::operator -- () +{ + *this -= floatInVec(1.0f); + return *this; +} + +inline +const floatInVec +floatInVec::operator - () const +{ + return floatInVec(-mData); +} + +inline +floatInVec& +floatInVec::operator = (floatInVec vec) +{ + mData = vec.mData; + return *this; +} + +inline +floatInVec& +floatInVec::operator *= (floatInVec vec) +{ + *this = *this * vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator /= (floatInVec vec) +{ + *this = *this / vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator += (floatInVec vec) +{ + *this = *this + vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator -= (floatInVec vec) +{ + *this = *this - vec; + return *this; +} + +inline +const floatInVec +operator * (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() * vec1.getAsFloat()); +} + +inline +const floatInVec +operator / (floatInVec num, floatInVec den) +{ + return floatInVec(num.getAsFloat() / den.getAsFloat()); +} + +inline +const floatInVec +operator + (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() + vec1.getAsFloat()); +} + +inline +const floatInVec +operator - (floatInVec vec0, floatInVec vec1) +{ + return floatInVec(vec0.getAsFloat() - vec1.getAsFloat()); +} + +inline +const boolInVec +operator < (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() < vec1.getAsFloat()); +} + +inline +const boolInVec +operator <= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 > vec1); +} + +inline +const boolInVec +operator > (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() > vec1.getAsFloat()); +} + +inline +const boolInVec +operator >= (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 < vec1); +} + +inline +const boolInVec +operator == (floatInVec vec0, floatInVec vec1) +{ + return boolInVec(vec0.getAsFloat() == vec1.getAsFloat()); +} + +inline +const boolInVec +operator != (floatInVec vec0, floatInVec vec1) +{ + return !(vec0 == vec1); +} + +inline +const floatInVec +select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1) +{ + return (select_vec1.getAsBool() == 0) ? vec0 : vec1; +} + +} // namespace Vectormath + +#endif // floatInVec_h diff --git a/test/Bullet2/vectormath/scalar/mat_aos.h b/test/Bullet2/vectormath/scalar/mat_aos.h new file mode 100644 index 000000000..e103243d1 --- /dev/null +++ b/test/Bullet2/vectormath/scalar/mat_aos.h @@ -0,0 +1,1630 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_MAT_AOS_CPP_H +#define _VECTORMATH_MAT_AOS_CPP_H + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_PI_OVER_2 1.570796327f + +//----------------------------------------------------------------------------- +// Definitions + +inline Matrix3::Matrix3( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; +} + +inline Matrix3::Matrix3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +inline Matrix3::Matrix3( const Quat & unitQuat ) +{ + float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2; + qx = unitQuat.getX(); + qy = unitQuat.getY(); + qz = unitQuat.getZ(); + qw = unitQuat.getW(); + qx2 = ( qx + qx ); + qy2 = ( qy + qy ); + qz2 = ( qz + qz ); + qxqx2 = ( qx * qx2 ); + qxqy2 = ( qx * qy2 ); + qxqz2 = ( qx * qz2 ); + qxqw2 = ( qw * qx2 ); + qyqy2 = ( qy * qy2 ); + qyqz2 = ( qy * qz2 ); + qyqw2 = ( qw * qy2 ); + qzqz2 = ( qz * qz2 ); + qzqw2 = ( qw * qz2 ); + mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) ); + mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) ); + mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) ); +} + +inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; +} + +inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + return *this; +} + +inline Matrix3 & Matrix3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Matrix3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Matrix3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Matrix3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Matrix3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::getRow( int row ) const +{ + return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); +} + +inline Vector3 & Matrix3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + return *this; +} + +inline const Matrix3 transpose( const Matrix3 & mat ) +{ + return Matrix3( + Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ), + Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ), + Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() ) + ); +} + +inline const Matrix3 inverse( const Matrix3 & mat ) +{ + Vector3 tmp0, tmp1, tmp2; + float detinv; + tmp0 = cross( mat.getCol1(), mat.getCol2() ); + tmp1 = cross( mat.getCol2(), mat.getCol0() ); + tmp2 = cross( mat.getCol0(), mat.getCol1() ); + detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) ); + return Matrix3( + Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ), + Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ), + Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ) + ); +} + +inline float determinant( const Matrix3 & mat ) +{ + return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); +} + +inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ) + ); +} + +inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix3 Matrix3::operator -( ) const +{ + return Matrix3( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ) + ); +} + +inline const Matrix3 absPerElem( const Matrix3 & mat ) +{ + return Matrix3( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::operator *( float scalar ) const +{ + return Matrix3( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ) + ); +} + +inline Matrix3 & Matrix3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ) +{ + return mat * scalar; +} + +inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +{ + return Matrix3( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +{ + return Matrix3( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::identity( ) +{ + return Matrix3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ) + ); +} + +inline const Matrix3 Matrix3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ) + ); +} + +inline const Matrix3 Matrix3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix3( + Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ), + Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ), + Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( const Quat & unitQuat ) +{ + return Matrix3( unitQuat ); +} + +inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec ) +{ + return Matrix3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ) + ); +} + +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ) +{ + return Matrix3( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ) + ); +} + +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ) +{ + return Matrix3( + mulPerElem( mat.getCol0(), scaleVec ), + mulPerElem( mat.getCol1(), scaleVec ), + mulPerElem( mat.getCol2(), scaleVec ) + ); +} + +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix3 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); +} + +inline void print( const Matrix3 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Matrix4::Matrix4( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; +} + +inline Matrix4::Matrix4( float scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +inline Matrix4::Matrix4( const Transform3 & mat ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( mat.getCol3(), 1.0f ); +} + +inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec ) +{ + Matrix3 mat; + mat = Matrix3( unitQuat ); + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Matrix4 & Matrix4::setElem( int col, int row, float val ) +{ + Vector4 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Matrix4::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector4 Matrix4::getCol0( ) const +{ + return mCol0; +} + +inline const Vector4 Matrix4::getCol1( ) const +{ + return mCol1; +} + +inline const Vector4 Matrix4::getCol2( ) const +{ + return mCol2; +} + +inline const Vector4 Matrix4::getCol3( ) const +{ + return mCol3; +} + +inline const Vector4 Matrix4::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector4 & Matrix4::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; + return *this; +} + +inline const Matrix4 transpose( const Matrix4 & mat ) +{ + return Matrix4( + Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ), + Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ), + Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ), + Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() ) + ); +} + +inline const Matrix4 inverse( const Matrix4 & mat ) +{ + Vector4 res0, res1, res2, res3; + float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) ); + res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) ); + res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) ); + res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) ); + detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) ); + res1.setX( ( mI * tmp1 ) ); + res1.setY( ( mM * tmp0 ) ); + res1.setZ( ( mA * tmp1 ) ); + res1.setW( ( mE * tmp0 ) ); + res3.setX( ( mI * tmp3 ) ); + res3.setY( ( mM * tmp2 ) ); + res3.setZ( ( mA * tmp3 ) ); + res3.setW( ( mE * tmp2 ) ); + res2.setX( ( mI * tmp5 ) ); + res2.setY( ( mM * tmp4 ) ); + res2.setZ( ( mA * tmp5 ) ); + res2.setW( ( mE * tmp4 ) ); + tmp0 = ( ( mI * mB ) - ( mA * mJ ) ); + tmp1 = ( ( mM * mF ) - ( mE * mN ) ); + tmp2 = ( ( mI * mD ) - ( mA * mL ) ); + tmp3 = ( ( mM * mH ) - ( mE * mP ) ); + tmp4 = ( ( mI * mC ) - ( mA * mK ) ); + tmp5 = ( ( mM * mG ) - ( mE * mO ) ); + res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) ); + res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) ); + res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) ); + res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) ); + res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) ); + res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) ); + res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) ); + res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) ); + res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) ); + res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) ); + res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) ); + res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) ); + return Matrix4( + ( res0 * detInv ), + ( res1 * detInv ), + ( res2 * detInv ), + ( res3 * detInv ) + ); +} + +inline const Matrix4 affineInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( inverse( affineMat ) ); +} + +inline const Matrix4 orthoInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( orthoInverse( affineMat ) ); +} + +inline float determinant( const Matrix4 & mat ) +{ + float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + mA = mat.getCol0().getX(); + mB = mat.getCol0().getY(); + mC = mat.getCol0().getZ(); + mD = mat.getCol0().getW(); + mE = mat.getCol1().getX(); + mF = mat.getCol1().getY(); + mG = mat.getCol1().getZ(); + mH = mat.getCol1().getW(); + mI = mat.getCol2().getX(); + mJ = mat.getCol2().getY(); + mK = mat.getCol2().getZ(); + mL = mat.getCol2().getW(); + mM = mat.getCol3().getX(); + mN = mat.getCol3().getY(); + mO = mat.getCol3().getZ(); + mP = mat.getCol3().getW(); + tmp0 = ( ( mK * mD ) - ( mC * mL ) ); + tmp1 = ( ( mO * mH ) - ( mG * mP ) ); + tmp2 = ( ( mB * mK ) - ( mJ * mC ) ); + tmp3 = ( ( mF * mO ) - ( mN * mG ) ); + tmp4 = ( ( mJ * mD ) - ( mB * mL ) ); + tmp5 = ( ( mN * mH ) - ( mF * mP ) ); + dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ); + dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ); + dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ); + dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ); + return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) ); +} + +inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ), + ( mCol3 + mat.mCol3 ) + ); +} + +inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ), + ( mCol3 - mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix4 Matrix4::operator -( ) const +{ + return Matrix4( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ), + ( -mCol3 ) + ); +} + +inline const Matrix4 absPerElem( const Matrix4 & mat ) +{ + return Matrix4( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ), + absPerElem( mat.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::operator *( float scalar ) const +{ + return Matrix4( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ), + ( mCol3 * scalar ) + ); +} + +inline Matrix4 & Matrix4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ) +{ + return mat * scalar; +} + +inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ), + ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ), + ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ), + ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const +{ + return Vector4( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ), + ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ); +} + +inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const +{ + return Vector4( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ), + ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() ) + ); +} + +inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +{ + return Matrix4( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ), + ( *this * mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +{ + return Matrix4( + ( *this * tfrm.getCol0() ), + ( *this * tfrm.getCol1() ), + ( *this * tfrm.getCol2() ), + ( *this * Point3( tfrm.getCol3() ) ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +{ + return Matrix4( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ), + mulPerElem( mat0.getCol3(), mat1.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::identity( ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +{ + mCol0.setXYZ( mat3.getCol0() ); + mCol1.setXYZ( mat3.getCol1() ); + mCol2.setXYZ( mat3.getCol2() ); + return *this; +} + +inline const Matrix3 Matrix4::getUpper3x3( ) const +{ + return Matrix3( + mCol0.getXYZ( ), + mCol1.getXYZ( ), + mCol2.getXYZ( ) + ); +} + +inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec ) +{ + mCol3.setXYZ( translateVec ); + return *this; +} + +inline const Vector3 Matrix4::getTranslation( ) const +{ + return mCol3.getXYZ( ); +} + +inline const Matrix4 Matrix4::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4::xAxis( ), + Vector4( 0.0f, c, s, 0.0f ), + Vector4( 0.0f, -s, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, 0.0f, -s, 0.0f ), + Vector4::yAxis( ), + Vector4( s, 0.0f, c, 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Matrix4( + Vector4( c, s, 0.0f, 0.0f ), + Vector4( -s, c, 0.0f, 0.0f ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Matrix4( + Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ), + Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ), + Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec ) +{ + float x, y, z, s, c, oneMinusC, xy, yz, zx; + s = sinf( radians ); + c = cosf( radians ); + x = unitVec.getX(); + y = unitVec.getY(); + z = unitVec.getZ(); + xy = ( x * y ); + yz = ( y * z ); + zx = ( z * x ); + oneMinusC = ( 1.0f - c ); + return Matrix4( + Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ), + Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ), + Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( const Quat & unitQuat ) +{ + return Matrix4( Transform3::rotation( unitQuat ) ); +} + +inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec ) +{ + return Matrix4( + Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ) +{ + return Matrix4( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ), + mat.getCol3() + ); +} + +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ) +{ + Vector4 scale4; + scale4 = Vector4( scaleVec, 1.0f ); + return Matrix4( + mulPerElem( mat.getCol0(), scale4 ), + mulPerElem( mat.getCol1(), scale4 ), + mulPerElem( mat.getCol2(), scale4 ), + mulPerElem( mat.getCol3(), scale4 ) + ); +} + +inline const Matrix4 Matrix4::translation( const Vector3 & translateVec ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4( translateVec, 1.0f ) + ); +} + +inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ) +{ + Matrix4 m4EyeFrame; + Vector3 v3X, v3Y, v3Z; + v3Y = normalize( upVec ); + v3Z = normalize( ( eyePos - lookAtPos ) ); + v3X = normalize( cross( v3Y, v3Z ) ); + v3Y = cross( v3Z, v3X ); + m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) ); + return orthoInverse( m4EyeFrame ); +} + +inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +{ + float f, rangeInv; + f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) ); + rangeInv = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, f, 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + n2 = ( zNear + zNear ); + return Matrix4( + Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ), + Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ), + Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f ) + ); +} + +inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf; + sum_rl = ( right + left ); + sum_tb = ( top + bottom ); + sum_nf = ( zNear + zFar ); + inv_rl = ( 1.0f / ( right - left ) ); + inv_tb = ( 1.0f / ( top - bottom ) ); + inv_nf = ( 1.0f / ( zNear - zFar ) ); + return Matrix4( + Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ), + Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ), + Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ), + Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f ) + ); +} + +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix4 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); + print( mat.getRow( 3 ) ); +} + +inline void print( const Matrix4 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Transform3::Transform3( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; +} + +inline Transform3::Transform3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ) +{ + this->setUpper3x3( tfrm ); + this->setTranslation( translateVec ); +} + +inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec ) +{ + this->setUpper3x3( Matrix3( unitQuat ) ); + this->setTranslation( translateVec ); +} + +inline Transform3 & Transform3::setCol0( const Vector3 & _col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Transform3 & Transform3::setCol1( const Vector3 & _col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Transform3 & Transform3::setCol2( const Vector3 & _col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Transform3 & Transform3::setCol3( const Vector3 & _col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Transform3 & Transform3::setCol( int col, const Vector3 & vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Transform3 & Transform3::setRow( int row, const Vector4 & vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Transform3 & Transform3::setElem( int col, int row, float val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline float Transform3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Transform3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Transform3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Transform3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Transform3::getCol3( ) const +{ + return mCol3; +} + +inline const Vector3 Transform3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Transform3::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector3 & Transform3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Transform3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; + return *this; +} + +inline const Transform3 inverse( const Transform3 & tfrm ) +{ + Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2; + float detinv; + tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() ); + tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() ); + tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() ); + detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) ); + inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ); + inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ); + inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 orthoInverse( const Transform3 & tfrm ) +{ + Vector3 inv0, inv1, inv2; + inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() ); + inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() ); + inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() ); + return Transform3( + inv0, + inv1, + inv2, + Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) ) + ); +} + +inline const Transform3 absPerElem( const Transform3 & tfrm ) +{ + return Transform3( + absPerElem( tfrm.getCol0() ), + absPerElem( tfrm.getCol1() ), + absPerElem( tfrm.getCol2() ), + absPerElem( tfrm.getCol3() ) + ); +} + +inline const Vector3 Transform3::operator *( const Vector3 & vec ) const +{ + return Vector3( + ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ), + ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ), + ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ); +} + +inline const Point3 Transform3::operator *( const Point3 & pnt ) const +{ + return Point3( + ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ), + ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ), + ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ) + ); +} + +inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +{ + return Transform3( + ( *this * tfrm.mCol0 ), + ( *this * tfrm.mCol1 ), + ( *this * tfrm.mCol2 ), + Vector3( ( *this * Point3( tfrm.mCol3 ) ) ) + ); +} + +inline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +{ + return Transform3( + mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), + mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ), + mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ), + mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() ) + ); +} + +inline const Transform3 Transform3::identity( ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +{ + mCol0 = tfrm.getCol0(); + mCol1 = tfrm.getCol1(); + mCol2 = tfrm.getCol2(); + return *this; +} + +inline const Matrix3 Transform3::getUpper3x3( ) const +{ + return Matrix3( mCol0, mCol1, mCol2 ); +} + +inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec ) +{ + mCol3 = translateVec; + return *this; +} + +inline const Vector3 Transform3::getTranslation( ) const +{ + return mCol3; +} + +inline const Transform3 Transform3::rotationX( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3::xAxis( ), + Vector3( 0.0f, c, s ), + Vector3( 0.0f, -s, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationY( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, 0.0f, -s ), + Vector3::yAxis( ), + Vector3( s, 0.0f, c ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZ( float radians ) +{ + float s, c; + s = sinf( radians ); + c = cosf( radians ); + return Transform3( + Vector3( c, s, 0.0f ), + Vector3( -s, c, 0.0f ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ ) +{ + float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1; + sX = sinf( radiansXYZ.getX() ); + cX = cosf( radiansXYZ.getX() ); + sY = sinf( radiansXYZ.getY() ); + cY = cosf( radiansXYZ.getY() ); + sZ = sinf( radiansXYZ.getZ() ); + cZ = cosf( radiansXYZ.getZ() ); + tmp0 = ( cZ * sY ); + tmp1 = ( sZ * sY ); + return Transform3( + Vector3( ( cZ * cY ), ( sZ * cY ), -sY ), + Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ), + Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec ) +{ + return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::rotation( const Quat & unitQuat ) +{ + return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::scale( const Vector3 & scaleVec ) +{ + return Transform3( + Vector3( scaleVec.getX(), 0.0f, 0.0f ), + Vector3( 0.0f, scaleVec.getY(), 0.0f ), + Vector3( 0.0f, 0.0f, scaleVec.getZ() ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ) +{ + return Transform3( + ( tfrm.getCol0() * scaleVec.getX( ) ), + ( tfrm.getCol1() * scaleVec.getY( ) ), + ( tfrm.getCol2() * scaleVec.getZ( ) ), + tfrm.getCol3() + ); +} + +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ) +{ + return Transform3( + mulPerElem( tfrm.getCol0(), scaleVec ), + mulPerElem( tfrm.getCol1(), scaleVec ), + mulPerElem( tfrm.getCol2(), scaleVec ), + mulPerElem( tfrm.getCol3(), scaleVec ) + ); +} + +inline const Transform3 Transform3::translation( const Vector3 & translateVec ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + translateVec + ); +} + +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Transform3 & tfrm ) +{ + print( tfrm.getRow( 0 ) ); + print( tfrm.getRow( 1 ) ); + print( tfrm.getRow( 2 ) ); +} + +inline void print( const Transform3 & tfrm, const char * name ) +{ + printf("%s:\n", name); + print( tfrm ); +} + +#endif + +inline Quat::Quat( const Matrix3 & tfrm ) +{ + float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw; + int negTrace, ZgtX, ZgtY, YgtX; + int largestXorY, largestYorZ, largestZorX; + + xx = tfrm.getCol0().getX(); + yx = tfrm.getCol0().getY(); + zx = tfrm.getCol0().getZ(); + xy = tfrm.getCol1().getX(); + yy = tfrm.getCol1().getY(); + zy = tfrm.getCol1().getZ(); + xz = tfrm.getCol2().getX(); + yz = tfrm.getCol2().getY(); + zz = tfrm.getCol2().getZ(); + + trace = ( ( xx + yy ) + zz ); + + negTrace = ( trace < 0.0f ); + ZgtX = zz > xx; + ZgtY = zz > yy; + YgtX = yy > xx; + largestXorY = ( !ZgtX || !ZgtY ) && negTrace; + largestYorZ = ( YgtX || ZgtX ) && negTrace; + largestZorX = ( ZgtY || !YgtX ) && negTrace; + + if ( largestXorY ) + { + zz = -zz; + xy = -xy; + } + if ( largestYorZ ) + { + xx = -xx; + yz = -yz; + } + if ( largestZorX ) + { + yy = -yy; + zx = -zx; + } + + radicand = ( ( ( xx + yy ) + zz ) + 1.0f ); + scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) ); + + tmpx = ( ( zy - yz ) * scale ); + tmpy = ( ( xz - zx ) * scale ); + tmpz = ( ( yx - xy ) * scale ); + tmpw = ( radicand * scale ); + qx = tmpx; + qy = tmpy; + qz = tmpz; + qw = tmpw; + + if ( largestXorY ) + { + qx = tmpw; + qy = tmpz; + qz = tmpy; + qw = tmpx; + } + if ( largestYorZ ) + { + tmpx = qx; + tmpz = qz; + qx = qy; + qy = tmpx; + qz = qw; + qw = tmpz; + } + + mX = qx; + mY = qy; + mZ = qz; + mW = qw; +} + +inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 ) +{ + return Matrix3( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ) + ); +} + +inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 ) +{ + return Matrix4( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ), + ( tfrm0 * tfrm1.getW( ) ) + ); +} + +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Vector3( + ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ), + ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ), + ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) ) + ); +} + +inline const Matrix3 crossMatrix( const Vector3 & vec ) +{ + return Matrix3( + Vector3( 0.0f, vec.getZ(), -vec.getY() ), + Vector3( -vec.getZ(), 0.0f, vec.getX() ), + Vector3( vec.getY(), -vec.getX(), 0.0f ) + ); +} + +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ) +{ + return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); +} + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/scalar/quat_aos.h b/test/Bullet2/vectormath/scalar/quat_aos.h new file mode 100644 index 000000000..764e01708 --- /dev/null +++ b/test/Bullet2/vectormath/scalar/quat_aos.h @@ -0,0 +1,433 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_QUAT_AOS_CPP_H +#define _VECTORMATH_QUAT_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Quat::Quat( const Quat & quat ) +{ + mX = quat.mX; + mY = quat.mY; + mZ = quat.mZ; + mW = quat.mW; +} + +inline Quat::Quat( float _x, float _y, float _z, float _w ) +{ + mX = _x; + mY = _y; + mZ = _z; + mW = _w; +} + +inline Quat::Quat( const Vector3 & xyz, float _w ) +{ + this->setXYZ( xyz ); + this->setW( _w ); +} + +inline Quat::Quat( const Vector4 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + mW = vec.getW(); +} + +inline Quat::Quat( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; + mW = scalar; +} + +inline const Quat Quat::identity( ) +{ + return Quat( 0.0f, 0.0f, 0.0f, 1.0f ); +} + +inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ) +{ + return ( quat0 + ( ( quat1 - quat0 ) * t ) ); +} + +inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ) +{ + Quat start; + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitQuat0, unitQuat1 ); + if ( cosAngle < 0.0f ) { + cosAngle = -cosAngle; + start = ( -unitQuat0 ); + } else { + start = unitQuat0; + } + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) ); +} + +inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ) +{ + Quat tmp0, tmp1; + tmp0 = slerp( t, unitQuat0, unitQuat3 ); + tmp1 = slerp( t, unitQuat1, unitQuat2 ); + return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 ); +} + +inline void loadXYZW( Quat & quat, const float * fptr ) +{ + quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] ); +} + +inline void storeXYZW( const Quat & quat, float * fptr ) +{ + fptr[0] = quat.getX(); + fptr[1] = quat.getY(); + fptr[2] = quat.getZ(); + fptr[3] = quat.getW(); +} + +inline Quat & Quat::operator =( const Quat & quat ) +{ + mX = quat.mX; + mY = quat.mY; + mZ = quat.mZ; + mW = quat.mW; + return *this; +} + +inline Quat & Quat::setXYZ( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + return *this; +} + +inline const Vector3 Quat::getXYZ( ) const +{ + return Vector3( mX, mY, mZ ); +} + +inline Quat & Quat::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Quat::getX( ) const +{ + return mX; +} + +inline Quat & Quat::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Quat::getY( ) const +{ + return mY; +} + +inline Quat & Quat::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Quat::getZ( ) const +{ + return mZ; +} + +inline Quat & Quat::setW( float _w ) +{ + mW = _w; + return *this; +} + +inline float Quat::getW( ) const +{ + return mW; +} + +inline Quat & Quat::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Quat::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Quat::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Quat::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Quat Quat::operator +( const Quat & quat ) const +{ + return Quat( + ( mX + quat.mX ), + ( mY + quat.mY ), + ( mZ + quat.mZ ), + ( mW + quat.mW ) + ); +} + +inline const Quat Quat::operator -( const Quat & quat ) const +{ + return Quat( + ( mX - quat.mX ), + ( mY - quat.mY ), + ( mZ - quat.mZ ), + ( mW - quat.mW ) + ); +} + +inline const Quat Quat::operator *( float scalar ) const +{ + return Quat( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ), + ( mW * scalar ) + ); +} + +inline Quat & Quat::operator +=( const Quat & quat ) +{ + *this = *this + quat; + return *this; +} + +inline Quat & Quat::operator -=( const Quat & quat ) +{ + *this = *this - quat; + return *this; +} + +inline Quat & Quat::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Quat Quat::operator /( float scalar ) const +{ + return Quat( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ), + ( mW / scalar ) + ); +} + +inline Quat & Quat::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Quat Quat::operator -( ) const +{ + return Quat( + -mX, + -mY, + -mZ, + -mW + ); +} + +inline const Quat operator *( float scalar, const Quat & quat ) +{ + return quat * scalar; +} + +inline float dot( const Quat & quat0, const Quat & quat1 ) +{ + float result; + result = ( quat0.getX() * quat1.getX() ); + result = ( result + ( quat0.getY() * quat1.getY() ) ); + result = ( result + ( quat0.getZ() * quat1.getZ() ) ); + result = ( result + ( quat0.getW() * quat1.getW() ) ); + return result; +} + +inline float norm( const Quat & quat ) +{ + float result; + result = ( quat.getX() * quat.getX() ); + result = ( result + ( quat.getY() * quat.getY() ) ); + result = ( result + ( quat.getZ() * quat.getZ() ) ); + result = ( result + ( quat.getW() * quat.getW() ) ); + return result; +} + +inline float length( const Quat & quat ) +{ + return ::sqrtf( norm( quat ) ); +} + +inline const Quat normalize( const Quat & quat ) +{ + float lenSqr, lenInv; + lenSqr = norm( quat ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Quat( + ( quat.getX() * lenInv ), + ( quat.getY() * lenInv ), + ( quat.getZ() * lenInv ), + ( quat.getW() * lenInv ) + ); +} + +inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ) +{ + float cosHalfAngleX2, recipCosHalfAngleX2; + cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) ); + recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 ); + return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) ); +} + +inline const Quat Quat::rotation( float radians, const Vector3 & unitVec ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( ( unitVec * s ), c ); +} + +inline const Quat Quat::rotationX( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( s, 0.0f, 0.0f, c ); +} + +inline const Quat Quat::rotationY( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, s, 0.0f, c ); +} + +inline const Quat Quat::rotationZ( float radians ) +{ + float s, c, angle; + angle = ( radians * 0.5f ); + s = sinf( angle ); + c = cosf( angle ); + return Quat( 0.0f, 0.0f, s, c ); +} + +inline const Quat Quat::operator *( const Quat & quat ) const +{ + return Quat( + ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ), + ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ), + ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ), + ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) ) + ); +} + +inline Quat & Quat::operator *=( const Quat & quat ) +{ + *this = *this * quat; + return *this; +} + +inline const Vector3 rotate( const Quat & quat, const Vector3 & vec ) +{ + float tmpX, tmpY, tmpZ, tmpW; + tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) ); + tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) ); + tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) ); + tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) ); + return Vector3( + ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ), + ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ), + ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) ) + ); +} + +inline const Quat conj( const Quat & quat ) +{ + return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() ); +} + +inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ) +{ + return Quat( + ( select1 )? quat1.getX() : quat0.getX(), + ( select1 )? quat1.getY() : quat0.getY(), + ( select1 )? quat1.getZ() : quat0.getZ(), + ( select1 )? quat1.getW() : quat0.getW() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Quat & quat ) +{ + printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +inline void print( const Quat & quat, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/scalar/vec_aos.h b/test/Bullet2/vectormath/scalar/vec_aos.h new file mode 100644 index 000000000..46d4d6b3e --- /dev/null +++ b/test/Bullet2/vectormath/scalar/vec_aos.h @@ -0,0 +1,1426 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_VEC_AOS_CPP_H +#define _VECTORMATH_VEC_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Constants + +#define _VECTORMATH_SLERP_TOL 0.999f + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Vector3::Vector3( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; +} + +inline Vector3::Vector3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Vector3::Vector3( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); +} + +inline Vector3::Vector3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Vector3 Vector3::xAxis( ) +{ + return Vector3( 1.0f, 0.0f, 0.0f ); +} + +inline const Vector3 Vector3::yAxis( ) +{ + return Vector3( 0.0f, 1.0f, 0.0f ); +} + +inline const Vector3 Vector3::zAxis( ) +{ + return Vector3( 0.0f, 0.0f, 1.0f ); +} + +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZ( Vector3 & vec, const float * fptr ) +{ + vec = Vector3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Vector3 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector3 & Vector3::operator =( const Vector3 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + return *this; +} + +inline Vector3 & Vector3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector3::getX( ) const +{ + return mX; +} + +inline Vector3 & Vector3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector3::getY( ) const +{ + return mY; +} + +inline Vector3 & Vector3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector3::getZ( ) const +{ + return mZ; +} + +inline Vector3 & Vector3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Vector3::operator +( const Vector3 & vec ) const +{ + return Vector3( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ) + ); +} + +inline const Vector3 Vector3::operator -( const Vector3 & vec ) const +{ + return Vector3( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ) + ); +} + +inline const Point3 Vector3::operator +( const Point3 & pnt ) const +{ + return Point3( + ( mX + pnt.getX() ), + ( mY + pnt.getY() ), + ( mZ + pnt.getZ() ) + ); +} + +inline const Vector3 Vector3::operator *( float scalar ) const +{ + return Vector3( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ) + ); +} + +inline Vector3 & Vector3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector3 & Vector3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector3 & Vector3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector3 Vector3::operator /( float scalar ) const +{ + return Vector3( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ) + ); +} + +inline Vector3 & Vector3::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector3 Vector3::operator -( ) const +{ + return Vector3( + -mX, + -mY, + -mZ + ); +} + +inline const Vector3 operator *( float scalar, const Vector3 & vec ) +{ + return vec * scalar; +} + +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ) + ); +} + +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ) + ); +} + +inline const Vector3 recipPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ) + ); +} + +inline const Vector3 sqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ) + ); +} + +inline const Vector3 rsqrtPerElem( const Vector3 & vec ) +{ + return Vector3( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ) + ); +} + +inline const Vector3 absPerElem( const Vector3 & vec ) +{ + return Vector3( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ) + ); +} + +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ) + ); +} + +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float maxElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + return result; +} + +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ() + ); +} + +inline float minElem( const Vector3 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + return result; +} + +inline float sum( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + return result; +} + +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + return result; +} + +inline float lengthSqr( const Vector3 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + return result; +} + +inline float length( const Vector3 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector3 normalize( const Vector3 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector3( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ) + ); +} + +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ) +{ + return Vector3( + ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ), + ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ), + ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) ) + ); +} + +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ) +{ + return Vector3( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector3 & vec ) +{ + printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() ); +} + +inline void print( const Vector3 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() ); +} + +#endif + +inline Vector4::Vector4( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; +} + +inline Vector4::Vector4( float _x, float _y, float _z, float _w ) +{ + mX = _x; + mY = _y; + mZ = _z; + mW = _w; +} + +inline Vector4::Vector4( const Vector3 & xyz, float _w ) +{ + this->setXYZ( xyz ); + this->setW( _w ); +} + +inline Vector4::Vector4( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + mW = 0.0f; +} + +inline Vector4::Vector4( const Point3 & pnt ) +{ + mX = pnt.getX(); + mY = pnt.getY(); + mZ = pnt.getZ(); + mW = 1.0f; +} + +inline Vector4::Vector4( const Quat & quat ) +{ + mX = quat.getX(); + mY = quat.getY(); + mZ = quat.getZ(); + mW = quat.getW(); +} + +inline Vector4::Vector4( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; + mW = scalar; +} + +inline const Vector4 Vector4::xAxis( ) +{ + return Vector4( 1.0f, 0.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::yAxis( ) +{ + return Vector4( 0.0f, 1.0f, 0.0f, 0.0f ); +} + +inline const Vector4 Vector4::zAxis( ) +{ + return Vector4( 0.0f, 0.0f, 1.0f, 0.0f ); +} + +inline const Vector4 Vector4::wAxis( ) +{ + return Vector4( 0.0f, 0.0f, 0.0f, 1.0f ); +} + +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ) +{ + float recipSinAngle, scale0, scale1, cosAngle, angle; + cosAngle = dot( unitVec0, unitVec1 ); + if ( cosAngle < _VECTORMATH_SLERP_TOL ) { + angle = acosf( cosAngle ); + recipSinAngle = ( 1.0f / sinf( angle ) ); + scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle ); + scale1 = ( sinf( ( t * angle ) ) * recipSinAngle ); + } else { + scale0 = ( 1.0f - t ); + scale1 = t; + } + return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) ); +} + +inline void loadXYZW( Vector4 & vec, const float * fptr ) +{ + vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] ); +} + +inline void storeXYZW( const Vector4 & vec, float * fptr ) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); + fptr[3] = vec.getW(); +} + +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 4; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Vector4 & Vector4::operator =( const Vector4 & vec ) +{ + mX = vec.mX; + mY = vec.mY; + mZ = vec.mZ; + mW = vec.mW; + return *this; +} + +inline Vector4 & Vector4::setXYZ( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); + return *this; +} + +inline const Vector3 Vector4::getXYZ( ) const +{ + return Vector3( mX, mY, mZ ); +} + +inline Vector4 & Vector4::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Vector4::getX( ) const +{ + return mX; +} + +inline Vector4 & Vector4::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Vector4::getY( ) const +{ + return mY; +} + +inline Vector4 & Vector4::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Vector4::getZ( ) const +{ + return mZ; +} + +inline Vector4 & Vector4::setW( float _w ) +{ + mW = _w; + return *this; +} + +inline float Vector4::getW( ) const +{ + return mW; +} + +inline Vector4 & Vector4::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Vector4::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Vector4::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Vector4::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector4 Vector4::operator +( const Vector4 & vec ) const +{ + return Vector4( + ( mX + vec.mX ), + ( mY + vec.mY ), + ( mZ + vec.mZ ), + ( mW + vec.mW ) + ); +} + +inline const Vector4 Vector4::operator -( const Vector4 & vec ) const +{ + return Vector4( + ( mX - vec.mX ), + ( mY - vec.mY ), + ( mZ - vec.mZ ), + ( mW - vec.mW ) + ); +} + +inline const Vector4 Vector4::operator *( float scalar ) const +{ + return Vector4( + ( mX * scalar ), + ( mY * scalar ), + ( mZ * scalar ), + ( mW * scalar ) + ); +} + +inline Vector4 & Vector4::operator +=( const Vector4 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector4 & Vector4::operator -=( const Vector4 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector4 & Vector4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector4 Vector4::operator /( float scalar ) const +{ + return Vector4( + ( mX / scalar ), + ( mY / scalar ), + ( mZ / scalar ), + ( mW / scalar ) + ); +} + +inline Vector4 & Vector4::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector4 Vector4::operator -( ) const +{ + return Vector4( + -mX, + -mY, + -mZ, + -mW + ); +} + +inline const Vector4 operator *( float scalar, const Vector4 & vec ) +{ + return vec * scalar; +} + +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() * vec1.getX() ), + ( vec0.getY() * vec1.getY() ), + ( vec0.getZ() * vec1.getZ() ), + ( vec0.getW() * vec1.getW() ) + ); +} + +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec0.getX() / vec1.getX() ), + ( vec0.getY() / vec1.getY() ), + ( vec0.getZ() / vec1.getZ() ), + ( vec0.getW() / vec1.getW() ) + ); +} + +inline const Vector4 recipPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / vec.getX() ), + ( 1.0f / vec.getY() ), + ( 1.0f / vec.getZ() ), + ( 1.0f / vec.getW() ) + ); +} + +inline const Vector4 sqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + sqrtf( vec.getX() ), + sqrtf( vec.getY() ), + sqrtf( vec.getZ() ), + sqrtf( vec.getW() ) + ); +} + +inline const Vector4 rsqrtPerElem( const Vector4 & vec ) +{ + return Vector4( + ( 1.0f / sqrtf( vec.getX() ) ), + ( 1.0f / sqrtf( vec.getY() ) ), + ( 1.0f / sqrtf( vec.getZ() ) ), + ( 1.0f / sqrtf( vec.getW() ) ) + ); +} + +inline const Vector4 absPerElem( const Vector4 & vec ) +{ + return Vector4( + fabsf( vec.getX() ), + fabsf( vec.getY() ), + fabsf( vec.getZ() ), + fabsf( vec.getW() ) + ); +} + +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ), + ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ), + ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ), + ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() ) + ); +} + +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float maxElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() > vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() > result)? vec.getZ() : result; + result = (vec.getW() > result)? vec.getW() : result; + return result; +} + +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ) +{ + return Vector4( + (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(), + (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(), + (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(), + (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW() + ); +} + +inline float minElem( const Vector4 & vec ) +{ + float result; + result = (vec.getX() < vec.getY())? vec.getX() : vec.getY(); + result = (vec.getZ() < result)? vec.getZ() : result; + result = (vec.getW() < result)? vec.getW() : result; + return result; +} + +inline float sum( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() + vec.getY() ); + result = ( result + vec.getZ() ); + result = ( result + vec.getW() ); + return result; +} + +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ) +{ + float result; + result = ( vec0.getX() * vec1.getX() ); + result = ( result + ( vec0.getY() * vec1.getY() ) ); + result = ( result + ( vec0.getZ() * vec1.getZ() ) ); + result = ( result + ( vec0.getW() * vec1.getW() ) ); + return result; +} + +inline float lengthSqr( const Vector4 & vec ) +{ + float result; + result = ( vec.getX() * vec.getX() ); + result = ( result + ( vec.getY() * vec.getY() ) ); + result = ( result + ( vec.getZ() * vec.getZ() ) ); + result = ( result + ( vec.getW() * vec.getW() ) ); + return result; +} + +inline float length( const Vector4 & vec ) +{ + return ::sqrtf( lengthSqr( vec ) ); +} + +inline const Vector4 normalize( const Vector4 & vec ) +{ + float lenSqr, lenInv; + lenSqr = lengthSqr( vec ); + lenInv = ( 1.0f / sqrtf( lenSqr ) ); + return Vector4( + ( vec.getX() * lenInv ), + ( vec.getY() * lenInv ), + ( vec.getZ() * lenInv ), + ( vec.getW() * lenInv ) + ); +} + +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ) +{ + return Vector4( + ( select1 )? vec1.getX() : vec0.getX(), + ( select1 )? vec1.getY() : vec0.getY(), + ( select1 )? vec1.getZ() : vec0.getZ(), + ( select1 )? vec1.getW() : vec0.getW() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector4 & vec ) +{ + printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +inline void print( const Vector4 & vec, const char * name ) +{ + printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() ); +} + +#endif + +inline Point3::Point3( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; +} + +inline Point3::Point3( float _x, float _y, float _z ) +{ + mX = _x; + mY = _y; + mZ = _z; +} + +inline Point3::Point3( const Vector3 & vec ) +{ + mX = vec.getX(); + mY = vec.getY(); + mZ = vec.getZ(); +} + +inline Point3::Point3( float scalar ) +{ + mX = scalar; + mY = scalar; + mZ = scalar; +} + +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ) +{ + return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); +} + +inline void loadXYZ( Point3 & pnt, const float * fptr ) +{ + pnt = Point3( fptr[0], fptr[1], fptr[2] ); +} + +inline void storeXYZ( const Point3 & pnt, float * fptr ) +{ + fptr[0] = pnt.getX(); + fptr[1] = pnt.getY(); + fptr[2] = pnt.getZ(); +} + +inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + unsigned short fp16 = hfptr[i]; + unsigned int sign = fp16 >> 15; + unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1); + unsigned int mantissa = fp16 & ((1 << 10) - 1); + + if (exponent == 0) { + // zero + mantissa = 0; + + } else if (exponent == 31) { + // infinity or nan -> infinity + exponent = 255; + mantissa = 0; + + } else { + exponent += 127 - 15; + mantissa <<= 13; + } + + Data32 d; + d.u32 = (sign << 31) | (exponent << 23) | mantissa; + vec[i] = d.f32; + } +} + +inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr ) +{ + union Data32 { + unsigned int u32; + float f32; + }; + + for (int i = 0; i < 3; i++) { + Data32 d; + d.f32 = vec[i]; + + unsigned int sign = d.u32 >> 31; + unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1); + unsigned int mantissa = d.u32 & ((1 << 23) - 1);; + + if (exponent == 0) { + // zero or denorm -> zero + mantissa = 0; + + } else if (exponent == 255 && mantissa != 0) { + // nan -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent >= 127 - 15 + 31) { + // overflow or infinity -> infinity + exponent = 31; + mantissa = 0; + + } else if (exponent <= 127 - 15) { + // underflow -> zero + exponent = 0; + mantissa = 0; + + } else { + exponent -= 127 - 15; + mantissa >>= 13; + } + + hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa); + } +} + +inline Point3 & Point3::operator =( const Point3 & pnt ) +{ + mX = pnt.mX; + mY = pnt.mY; + mZ = pnt.mZ; + return *this; +} + +inline Point3 & Point3::setX( float _x ) +{ + mX = _x; + return *this; +} + +inline float Point3::getX( ) const +{ + return mX; +} + +inline Point3 & Point3::setY( float _y ) +{ + mY = _y; + return *this; +} + +inline float Point3::getY( ) const +{ + return mY; +} + +inline Point3 & Point3::setZ( float _z ) +{ + mZ = _z; + return *this; +} + +inline float Point3::getZ( ) const +{ + return mZ; +} + +inline Point3 & Point3::setElem( int idx, float value ) +{ + *(&mX + idx) = value; + return *this; +} + +inline float Point3::getElem( int idx ) const +{ + return *(&mX + idx); +} + +inline float & Point3::operator []( int idx ) +{ + return *(&mX + idx); +} + +inline float Point3::operator []( int idx ) const +{ + return *(&mX + idx); +} + +inline const Vector3 Point3::operator -( const Point3 & pnt ) const +{ + return Vector3( + ( mX - pnt.mX ), + ( mY - pnt.mY ), + ( mZ - pnt.mZ ) + ); +} + +inline const Point3 Point3::operator +( const Vector3 & vec ) const +{ + return Point3( + ( mX + vec.getX() ), + ( mY + vec.getY() ), + ( mZ + vec.getZ() ) + ); +} + +inline const Point3 Point3::operator -( const Vector3 & vec ) const +{ + return Point3( + ( mX - vec.getX() ), + ( mY - vec.getY() ), + ( mZ - vec.getZ() ) + ); +} + +inline Point3 & Point3::operator +=( const Vector3 & vec ) +{ + *this = *this + vec; + return *this; +} + +inline Point3 & Point3::operator -=( const Vector3 & vec ) +{ + *this = *this - vec; + return *this; +} + +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() * pnt1.getX() ), + ( pnt0.getY() * pnt1.getY() ), + ( pnt0.getZ() * pnt1.getZ() ) + ); +} + +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt0.getX() / pnt1.getX() ), + ( pnt0.getY() / pnt1.getY() ), + ( pnt0.getZ() / pnt1.getZ() ) + ); +} + +inline const Point3 recipPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / pnt.getX() ), + ( 1.0f / pnt.getY() ), + ( 1.0f / pnt.getZ() ) + ); +} + +inline const Point3 sqrtPerElem( const Point3 & pnt ) +{ + return Point3( + sqrtf( pnt.getX() ), + sqrtf( pnt.getY() ), + sqrtf( pnt.getZ() ) + ); +} + +inline const Point3 rsqrtPerElem( const Point3 & pnt ) +{ + return Point3( + ( 1.0f / sqrtf( pnt.getX() ) ), + ( 1.0f / sqrtf( pnt.getY() ) ), + ( 1.0f / sqrtf( pnt.getZ() ) ) + ); +} + +inline const Point3 absPerElem( const Point3 & pnt ) +{ + return Point3( + fabsf( pnt.getX() ), + fabsf( pnt.getY() ), + fabsf( pnt.getZ() ) + ); +} + +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ), + ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ), + ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() ) + ); +} + +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float maxElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() > result)? pnt.getZ() : result; + return result; +} + +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return Point3( + (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(), + (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(), + (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ() + ); +} + +inline float minElem( const Point3 & pnt ) +{ + float result; + result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY(); + result = (pnt.getZ() < result)? pnt.getZ() : result; + return result; +} + +inline float sum( const Point3 & pnt ) +{ + float result; + result = ( pnt.getX() + pnt.getY() ); + result = ( result + pnt.getZ() ); + return result; +} + +inline const Point3 scale( const Point3 & pnt, float scaleVal ) +{ + return mulPerElem( pnt, Point3( scaleVal ) ); +} + +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ) +{ + return mulPerElem( pnt, Point3( scaleVec ) ); +} + +inline float projection( const Point3 & pnt, const Vector3 & unitVec ) +{ + float result; + result = ( pnt.getX() * unitVec.getX() ); + result = ( result + ( pnt.getY() * unitVec.getY() ) ); + result = ( result + ( pnt.getZ() * unitVec.getZ() ) ); + return result; +} + +inline float distSqrFromOrigin( const Point3 & pnt ) +{ + return lengthSqr( Vector3( pnt ) ); +} + +inline float distFromOrigin( const Point3 & pnt ) +{ + return length( Vector3( pnt ) ); +} + +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return lengthSqr( ( pnt1 - pnt0 ) ); +} + +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ) +{ + return length( ( pnt1 - pnt0 ) ); +} + +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ) +{ + return Point3( + ( select1 )? pnt1.getX() : pnt0.getX(), + ( select1 )? pnt1.getY() : pnt0.getY(), + ( select1 )? pnt1.getZ() : pnt0.getZ() + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Point3 & pnt ) +{ + printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +inline void print( const Point3 & pnt, const char * name ) +{ + printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/scalar/vectormath_aos.h b/test/Bullet2/vectormath/scalar/vectormath_aos.h new file mode 100644 index 000000000..d00456dfe --- /dev/null +++ b/test/Bullet2/vectormath/scalar/vectormath_aos.h @@ -0,0 +1,1872 @@ +/* + Copyright (C) 2009 Sony Computer Entertainment Inc. + All rights reserved. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +*/ + +#ifndef _VECTORMATH_AOS_CPP_H +#define _VECTORMATH_AOS_CPP_H + +#include + +#ifdef _VECTORMATH_DEBUG +#include +#endif + +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Vector3( ) { }; + + // Copy a 3-D vector + // + inline Vector3( const Vector3 & vec ); + + // Construct a 3-D vector from x, y, and z elements + // + inline Vector3( float x, float y, float z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit inline Vector3( const Point3 & pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit inline Vector3( float scalar ); + + // Assign one 3-D vector to another + // + inline Vector3 & operator =( const Vector3 & vec ); + + // Set the x element of a 3-D vector + // + inline Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + inline Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + inline Vector3 & setZ( float z ); + + // Get the x element of a 3-D vector + // + inline float getX( ) const; + + // Get the y element of a 3-D vector + // + inline float getY( ) const; + + // Get the z element of a 3-D vector + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + inline Vector3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 3-D vectors + // + inline const Vector3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + inline const Vector3 operator -( const Vector3 & vec ) const; + + // Add a 3-D vector to a 3-D point + // + inline const Point3 operator +( const Point3 & pnt ) const; + + // Multiply a 3-D vector by a scalar + // + inline const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + inline const Vector3 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Vector3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Vector3 & operator -=( const Vector3 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector3 & operator /=( float scalar ); + + // Negate all elements of a 3-D vector + // + inline const Vector3 operator -( ) const; + + // Construct x axis + // + static inline const Vector3 xAxis( ); + + // Construct y axis + // + static inline const Vector3 yAxis( ); + + // Construct z axis + // + static inline const Vector3 zAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 3-D vector by a scalar +// +inline const Vector3 operator *( float scalar, const Vector3 & vec ); + +// Multiply two 3-D vectors per element +// +inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector3 recipPerElem( const Vector3 & vec ); + +// Compute the square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector3 sqrtPerElem( const Vector3 & vec ); + +// Compute the reciprocal square root of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector3 rsqrtPerElem( const Vector3 & vec ); + +// Compute the absolute value of a 3-D vector per element +// +inline const Vector3 absPerElem( const Vector3 & vec ); + +// Copy sign from one 3-D vector to another, per element +// +inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum of two 3-D vectors per element +// +inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Minimum of two 3-D vectors per element +// +inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 ); + +// Maximum element of a 3-D vector +// +inline float maxElem( const Vector3 & vec ); + +// Minimum element of a 3-D vector +// +inline float minElem( const Vector3 & vec ); + +// Compute the sum of all elements of a 3-D vector +// +inline float sum( const Vector3 & vec ); + +// Compute the dot product of two 3-D vectors +// +inline float dot( const Vector3 & vec0, const Vector3 & vec1 ); + +// Compute the square of the length of a 3-D vector +// +inline float lengthSqr( const Vector3 & vec ); + +// Compute the length of a 3-D vector +// +inline float length( const Vector3 & vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector3 normalize( const Vector3 & vec ); + +// Compute cross product of two 3-D vectors +// +inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 ); + +// Outer product of two 3-D vectors +// +inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// +inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +inline const Matrix3 crossMatrix( const Vector3 & vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 ); + +// Conditionally select between two 3-D vectors +// +inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Vector3 & vec, const float * fptr ); + +// Store x, y, and z elements of a 3-D vector in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Vector3 & vec, float * fptr ); + +// Load three-half-floats as a 3-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr ); + +// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 & vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + float mX; + float mY; + float mZ; + float mW; + +public: + // Default constructor; does no initialization + // + inline Vector4( ) { }; + + // Copy a 4-D vector + // + inline Vector4( const Vector4 & vec ); + + // Construct a 4-D vector from x, y, z, and w elements + // + inline Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + inline Vector4( const Vector3 & xyz, float w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit inline Vector4( const Vector3 & vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit inline Vector4( const Point3 & pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit inline Vector4( const Quat & quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit inline Vector4( float scalar ); + + // Assign one 4-D vector to another + // + inline Vector4 & operator =( const Vector4 & vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + inline Vector4 & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a 4-D vector + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + inline Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + inline Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + inline Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + inline Vector4 & setW( float w ); + + // Get the x element of a 4-D vector + // + inline float getX( ) const; + + // Get the y element of a 4-D vector + // + inline float getY( ) const; + + // Get the z element of a 4-D vector + // + inline float getZ( ) const; + + // Get the w element of a 4-D vector + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + inline Vector4 & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two 4-D vectors + // + inline const Vector4 operator +( const Vector4 & vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + inline const Vector4 operator -( const Vector4 & vec ) const; + + // Multiply a 4-D vector by a scalar + // + inline const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + inline const Vector4 operator /( float scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + inline Vector4 & operator +=( const Vector4 & vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + inline Vector4 & operator -=( const Vector4 & vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector4 & operator /=( float scalar ); + + // Negate all elements of a 4-D vector + // + inline const Vector4 operator -( ) const; + + // Construct x axis + // + static inline const Vector4 xAxis( ); + + // Construct y axis + // + static inline const Vector4 yAxis( ); + + // Construct z axis + // + static inline const Vector4 zAxis( ); + + // Construct w axis + // + static inline const Vector4 wAxis( ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a 4-D vector by a scalar +// +inline const Vector4 operator *( float scalar, const Vector4 & vec ); + +// Multiply two 4-D vectors per element +// +inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector4 recipPerElem( const Vector4 & vec ); + +// Compute the square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Vector4 sqrtPerElem( const Vector4 & vec ); + +// Compute the reciprocal square root of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Vector4 rsqrtPerElem( const Vector4 & vec ); + +// Compute the absolute value of a 4-D vector per element +// +inline const Vector4 absPerElem( const Vector4 & vec ); + +// Copy sign from one 4-D vector to another, per element +// +inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum of two 4-D vectors per element +// +inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Minimum of two 4-D vectors per element +// +inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 ); + +// Maximum element of a 4-D vector +// +inline float maxElem( const Vector4 & vec ); + +// Minimum element of a 4-D vector +// +inline float minElem( const Vector4 & vec ); + +// Compute the sum of all elements of a 4-D vector +// +inline float sum( const Vector4 & vec ); + +// Compute the dot product of two 4-D vectors +// +inline float dot( const Vector4 & vec0, const Vector4 & vec1 ); + +// Compute the square of the length of a 4-D vector +// +inline float lengthSqr( const Vector4 & vec ); + +// Compute the length of a 4-D vector +// +inline float length( const Vector4 & vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector4 normalize( const Vector4 & vec ); + +// Outer product of two 4-D vectors +// +inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 ); + +// Conditionally select between two 4-D vectors +// +inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Vector4 & vec, const float * fptr ); + +// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Vector4 & vec, float * fptr ); + +// Load four-half-floats as a 4-D vector +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr ); + +// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 & vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + float mX; + float mY; + float mZ; +#ifndef __GNUC__ + float d; +#endif + +public: + // Default constructor; does no initialization + // + inline Point3( ) { }; + + // Copy a 3-D point + // + inline Point3( const Point3 & pnt ); + + // Construct a 3-D point from x, y, and z elements + // + inline Point3( float x, float y, float z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit inline Point3( const Vector3 & vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit inline Point3( float scalar ); + + // Assign one 3-D point to another + // + inline Point3 & operator =( const Point3 & pnt ); + + // Set the x element of a 3-D point + // + inline Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + inline Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + inline Point3 & setZ( float z ); + + // Get the x element of a 3-D point + // + inline float getX( ) const; + + // Get the y element of a 3-D point + // + inline float getY( ) const; + + // Get the z element of a 3-D point + // + inline float getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + inline Point3 & setElem( int idx, float value ); + + // Get an x, y, or z element of a 3-D point by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + inline const Vector3 operator -( const Point3 & pnt ) const; + + // Add a 3-D point to a 3-D vector + // + inline const Point3 operator +( const Vector3 & vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + inline const Point3 operator -( const Vector3 & vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Point3 & operator +=( const Vector3 & vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Point3 & operator -=( const Vector3 & vec ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply two 3-D points per element +// +inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Point3 recipPerElem( const Point3 & pnt ); + +// Compute the square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function sqrtf4. +// +inline const Point3 sqrtPerElem( const Point3 & pnt ); + +// Compute the reciprocal square root of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function rsqrtf4. +// +inline const Point3 rsqrtPerElem( const Point3 & pnt ); + +// Compute the absolute value of a 3-D point per element +// +inline const Point3 absPerElem( const Point3 & pnt ); + +// Copy sign from one 3-D point to another, per element +// +inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum of two 3-D points per element +// +inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Minimum of two 3-D points per element +// +inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 ); + +// Maximum element of a 3-D point +// +inline float maxElem( const Point3 & pnt ); + +// Minimum element of a 3-D point +// +inline float minElem( const Point3 & pnt ); + +// Compute the sum of all elements of a 3-D point +// +inline float sum( const Point3 & pnt ); + +// Apply uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, float scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +inline float projection( const Point3 & pnt, const Vector3 & unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +inline float distSqrFromOrigin( const Point3 & pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +inline float distFromOrigin( const Point3 & pnt ); + +// Compute the square of the distance between two 3-D points +// +inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 ); + +// Compute the distance between two 3-D points +// +inline float dist( const Point3 & pnt0, const Point3 & pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 ); + +// Conditionally select between two 3-D points +// +inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 ); + +// Load x, y, and z elements from the first three words of a float array. +// +// +inline void loadXYZ( Point3 & pnt, const float * fptr ); + +// Store x, y, and z elements of a 3-D point in the first three words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZ( const Point3 & pnt, float * fptr ); + +// Load three-half-floats as a 3-D point +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. +// +inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr ); + +// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// NOTE: +// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed. +// +inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 & pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ + float mX; + float mY; + float mZ; + float mW; + +public: + // Default constructor; does no initialization + // + inline Quat( ) { }; + + // Copy a quaternion + // + inline Quat( const Quat & quat ); + + // Construct a quaternion from x, y, z, and w elements + // + inline Quat( float x, float y, float z, float w ); + + // Construct a quaternion from a 3-D vector and a scalar + // + inline Quat( const Vector3 & xyz, float w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit inline Quat( const Vector4 & vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit inline Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit inline Quat( float scalar ); + + // Assign one quaternion to another + // + inline Quat & operator =( const Quat & quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + inline Quat & setXYZ( const Vector3 & vec ); + + // Get the x, y, and z elements of a quaternion + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + inline Quat & setX( float x ); + + // Set the y element of a quaternion + // + inline Quat & setY( float y ); + + // Set the z element of a quaternion + // + inline Quat & setZ( float z ); + + // Set the w element of a quaternion + // + inline Quat & setW( float w ); + + // Get the x element of a quaternion + // + inline float getX( ) const; + + // Get the y element of a quaternion + // + inline float getY( ) const; + + // Get the z element of a quaternion + // + inline float getZ( ) const; + + // Get the w element of a quaternion + // + inline float getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + inline Quat & setElem( int idx, float value ); + + // Get an x, y, z, or w element of a quaternion by index + // + inline float getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline float & operator []( int idx ); + + // Subscripting operator to get an element + // + inline float operator []( int idx ) const; + + // Add two quaternions + // + inline const Quat operator +( const Quat & quat ) const; + + // Subtract a quaternion from another quaternion + // + inline const Quat operator -( const Quat & quat ) const; + + // Multiply two quaternions + // + inline const Quat operator *( const Quat & quat ) const; + + // Multiply a quaternion by a scalar + // + inline const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + inline const Quat operator /( float scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + inline Quat & operator +=( const Quat & quat ); + + // Perform compound assignment and subtraction by a quaternion + // + inline Quat & operator -=( const Quat & quat ); + + // Perform compound assignment and multiplication by a quaternion + // + inline Quat & operator *=( const Quat & quat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Quat & operator /=( float scalar ); + + // Negate all elements of a quaternion + // + inline const Quat operator -( ) const; + + // Construct an identity quaternion + // + static inline const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static inline const Quat rotation( float radians, const Vector3 & unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static inline const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static inline const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static inline const Quat rotationZ( float radians ); + +} +#ifdef __GNUC__ +__attribute__ ((aligned(16))) +#endif +; + +// Multiply a quaternion by a scalar +// +inline const Quat operator *( float scalar, const Quat & quat ); + +// Compute the conjugate of a quaternion +// +inline const Quat conj( const Quat & quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec ); + +// Compute the dot product of two quaternions +// +inline float dot( const Quat & quat0, const Quat & quat1 ); + +// Compute the norm of a quaternion +// +inline float norm( const Quat & quat ); + +// Compute the length of a quaternion +// +inline float length( const Quat & quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +inline const Quat normalize( const Quat & quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 ); + +// Spherical quadrangle interpolation +// +inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 ); + +// Conditionally select between two quaternions +// +inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 ); + +// Load x, y, z, and w elements from the first four words of a float array. +// +// +inline void loadXYZW( Quat & quat, const float * fptr ); + +// Store x, y, z, and w elements of a quaternion in the first four words of a float array. +// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed +// +inline void storeXYZW( const Quat & quat, float * fptr ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat & quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + inline Matrix3( ) { }; + + // Copy a 3x3 matrix + // + inline Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit inline Matrix3( const Quat & unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit inline Matrix3( float scalar ); + + // Assign one 3x3 matrix to another + // + inline Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + inline Matrix3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x3 matrix + // + inline Matrix3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x3 matrix + // + inline Matrix3 & setCol2( const Vector3 & col2 ); + + // Get column 0 of a 3x3 matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + inline const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setRow( int row, const Vector3 & vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + inline Matrix3 & setElem( int col, int row, float val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + inline const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + inline const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + inline const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + inline const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply two 3x3 matrices + // + inline const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + inline Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + inline Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + inline Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static inline const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static inline const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static inline const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static inline const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix3 rotation( const Quat & unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static inline const Matrix3 scale( const Vector3 & scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +inline const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +inline const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +inline float determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + inline Matrix4( ) { }; + + // Copy a 4x4 matrix + // + inline Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit inline Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit inline Matrix4( float scalar ); + + // Assign one 4x4 matrix to another + // + inline Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 4x4 matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + inline Matrix4 & setCol0( const Vector4 & col0 ); + + // Set column 1 of a 4x4 matrix + // + inline Matrix4 & setCol1( const Vector4 & col1 ); + + // Set column 2 of a 4x4 matrix + // + inline Matrix4 & setCol2( const Vector4 & col2 ); + + // Set column 3 of a 4x4 matrix + // + inline Matrix4 & setCol3( const Vector4 & col3 ); + + // Get column 0 of a 4x4 matrix + // + inline const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + inline const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + inline const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + inline const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setCol( int col, const Vector4 & vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + inline Matrix4 & setElem( int col, int row, float val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + inline const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + inline const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + inline const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + inline const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + inline const Vector4 operator *( const Vector4 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + inline const Vector4 operator *( const Vector3 & vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + inline const Vector4 operator *( const Point3 & pnt ) const; + + // Multiply two 4x4 matrices + // + inline const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + inline const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + inline Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + inline Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + inline Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static inline const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static inline const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static inline const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static inline const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix4 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix4 rotation( const Quat & unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static inline const Matrix4 scale( const Vector3 & scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static inline const Matrix4 translation( const Vector3 & translateVec ); + + // Construct viewing matrix based on eye position, position looked at, and up direction + // + static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec ); + + // Construct a perspective projection matrix + // + static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +inline const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +inline const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +inline const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +inline float determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + inline Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + inline Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + inline Transform3( const Quat & unitQuat, const Vector3 & translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit inline Transform3( float scalar ); + + // Assign one 3x4 transformation matrix to another + // + inline Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + inline Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + inline Transform3 & setTranslation( const Vector3 & translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + inline Transform3 & setCol0( const Vector3 & col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + inline Transform3 & setCol1( const Vector3 & col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + inline Transform3 & setCol2( const Vector3 & col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + inline Transform3 & setCol3( const Vector3 & col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + inline const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + inline const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setCol( int col, const Vector3 & vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setRow( int row, const Vector4 & vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + inline Transform3 & setElem( int col, int row, float val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + inline float getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 & vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + inline const Point3 operator *( const Point3 & pnt ) const; + + // Multiply two 3x4 transformation matrices + // + inline const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static inline const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static inline const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static inline const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static inline const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static inline const Transform3 rotation( float radians, const Vector3 & unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Transform3 rotation( const Quat & unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static inline const Transform3 scale( const Vector3 & scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static inline const Transform3 translation( const Vector3 & translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +inline const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +inline const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +inline const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif diff --git a/test/Bullet2/vectormath/sse/boolInVec.h b/test/Bullet2/vectormath/sse/boolInVec.h new file mode 100644 index 000000000..d21d25cbb --- /dev/null +++ b/test/Bullet2/vectormath/sse/boolInVec.h @@ -0,0 +1,247 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _BOOLINVEC_H +#define _BOOLINVEC_H + +#include + +namespace Vectormath { + +class floatInVec; + +//-------------------------------------------------------------------------------------------------- +// boolInVec class +// + +class boolInVec +{ + private: + __m128 mData; + + inline boolInVec(__m128 vec); + public: + inline boolInVec() {} + + // matches standard type conversions + // + inline boolInVec(const floatInVec &vec); + + // explicit cast from bool + // + explicit inline boolInVec(bool scalar); + +#ifdef _VECTORMATH_NO_SCALAR_CAST + // explicit cast to bool + // + inline bool getAsBool() const; +#else + // implicit cast to bool + // + inline operator bool() const; +#endif + + // get vector data + // bool value is splatted across all word slots of vector as 0 (false) or -1 (true) + // + inline __m128 get128() const; + + // operators + // + inline const boolInVec operator ! () const; + inline boolInVec& operator = (const boolInVec &vec); + inline boolInVec& operator &= (const boolInVec &vec); + inline boolInVec& operator ^= (const boolInVec &vec); + inline boolInVec& operator |= (const boolInVec &vec); + + // friend functions + // + friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1); +}; + +//-------------------------------------------------------------------------------------------------- +// boolInVec functions +// + +// operators +// +inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1); + +// select between vec0 and vec1 using boolInVec. +// false selects vec0, true selects vec1 +// +inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1); + +} // namespace Vectormath + +//-------------------------------------------------------------------------------------------------- +// boolInVec implementation +// + +#include "floatInVec.h" + +namespace Vectormath { + +inline +boolInVec::boolInVec(__m128 vec) +{ + mData = vec; +} + +inline +boolInVec::boolInVec(const floatInVec &vec) +{ + *this = (vec != floatInVec(0.0f)); +} + +inline +boolInVec::boolInVec(bool scalar) +{ + unsigned int mask = -(int)scalar; + mData = _mm_set1_ps(*(float *)&mask); // TODO: Union +} + +#ifdef _VECTORMATH_NO_SCALAR_CAST +inline +bool +boolInVec::getAsBool() const +#else +inline +boolInVec::operator bool() const +#endif +{ + return *(bool *)&mData; +} + +inline +__m128 +boolInVec::get128() const +{ + return mData; +} + +inline +const boolInVec +boolInVec::operator ! () const +{ + return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps()))); +} + +inline +boolInVec& +boolInVec::operator = (const boolInVec &vec) +{ + mData = vec.mData; + return *this; +} + +inline +boolInVec& +boolInVec::operator &= (const boolInVec &vec) +{ + *this = *this & vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator ^= (const boolInVec &vec) +{ + *this = *this ^ vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator |= (const boolInVec &vec) +{ + *this = *this | vec; + return *this; +} + +inline +const boolInVec +operator == (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator != (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator & (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator | (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator ^ (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1) +{ + return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128())); +} + +} // namespace Vectormath + +#endif // boolInVec_h diff --git a/test/Bullet2/vectormath/sse/floatInVec.h b/test/Bullet2/vectormath/sse/floatInVec.h new file mode 100644 index 000000000..e8ac5959e --- /dev/null +++ b/test/Bullet2/vectormath/sse/floatInVec.h @@ -0,0 +1,340 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _FLOATINVEC_H +#define _FLOATINVEC_H + +#include +#include + +namespace Vectormath { + +class boolInVec; + +//-------------------------------------------------------------------------------------------------- +// floatInVec class +// + +class floatInVec +{ + private: + __m128 mData; + + public: + inline floatInVec(__m128 vec); + + inline floatInVec() {} + + // matches standard type conversions + // + inline floatInVec(const boolInVec &vec); + + // construct from a slot of __m128 + // + inline floatInVec(__m128 vec, int slot); + + // explicit cast from float + // + explicit inline floatInVec(float scalar); + +#ifdef _VECTORMATH_NO_SCALAR_CAST + // explicit cast to float + // + inline float getAsFloat() const; +#else + // implicit cast to float + // + inline operator float() const; +#endif + + // get vector data + // float value is splatted across all word slots of vector + // + inline __m128 get128() const; + + // operators + // + inline const floatInVec operator ++ (int); + inline const floatInVec operator -- (int); + inline floatInVec& operator ++ (); + inline floatInVec& operator -- (); + inline const floatInVec operator - () const; + inline floatInVec& operator = (const floatInVec &vec); + inline floatInVec& operator *= (const floatInVec &vec); + inline floatInVec& operator /= (const floatInVec &vec); + inline floatInVec& operator += (const floatInVec &vec); + inline floatInVec& operator -= (const floatInVec &vec); + + // friend functions + // + friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1); +}; + +//-------------------------------------------------------------------------------------------------- +// floatInVec functions +// + +// operators +// +inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1); + +// select between vec0 and vec1 using boolInVec. +// false selects vec0, true selects vec1 +// +inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1); + +} // namespace Vectormath + +//-------------------------------------------------------------------------------------------------- +// floatInVec implementation +// + +#include "boolInVec.h" + +namespace Vectormath { + +inline +floatInVec::floatInVec(__m128 vec) +{ + mData = vec; +} + +inline +floatInVec::floatInVec(const boolInVec &vec) +{ + mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128()); +} + +inline +floatInVec::floatInVec(__m128 vec, int slot) +{ + SSEFloat v; + v.m128 = vec; + mData = _mm_set1_ps(v.f[slot]); +} + +inline +floatInVec::floatInVec(float scalar) +{ + mData = _mm_set1_ps(scalar); +} + +#ifdef _VECTORMATH_NO_SCALAR_CAST +inline +float +floatInVec::getAsFloat() const +#else +inline +floatInVec::operator float() const +#endif +{ + return *((float *)&mData); +} + +inline +__m128 +floatInVec::get128() const +{ + return mData; +} + +inline +const floatInVec +floatInVec::operator ++ (int) +{ + __m128 olddata = mData; + operator ++(); + return floatInVec(olddata); +} + +inline +const floatInVec +floatInVec::operator -- (int) +{ + __m128 olddata = mData; + operator --(); + return floatInVec(olddata); +} + +inline +floatInVec& +floatInVec::operator ++ () +{ + *this += floatInVec(_mm_set1_ps(1.0f)); + return *this; +} + +inline +floatInVec& +floatInVec::operator -- () +{ + *this -= floatInVec(_mm_set1_ps(1.0f)); + return *this; +} + +inline +const floatInVec +floatInVec::operator - () const +{ + return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData)); +} + +inline +floatInVec& +floatInVec::operator = (const floatInVec &vec) +{ + mData = vec.mData; + return *this; +} + +inline +floatInVec& +floatInVec::operator *= (const floatInVec &vec) +{ + *this = *this * vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator /= (const floatInVec &vec) +{ + *this = *this / vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator += (const floatInVec &vec) +{ + *this = *this + vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator -= (const floatInVec &vec) +{ + *this = *this - vec; + return *this; +} + +inline +const floatInVec +operator * (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +operator / (const floatInVec &num, const floatInVec &den) +{ + return floatInVec(_mm_div_ps(num.get128(), den.get128())); +} + +inline +const floatInVec +operator + (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +operator - (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator < (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128())); +} + +inline +const boolInVec +operator <= (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128())); +} + +inline +const boolInVec +operator > (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator >= (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator == (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator != (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1) +{ + return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128())); +} + +} // namespace Vectormath + +#endif // floatInVec_h diff --git a/test/Bullet2/vectormath/sse/mat_aos.h b/test/Bullet2/vectormath/sse/mat_aos.h new file mode 100644 index 000000000..a2c66cc5f --- /dev/null +++ b/test/Bullet2/vectormath/sse/mat_aos.h @@ -0,0 +1,2190 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_MAT_AOS_CPP_H +#define _VECTORMATH_MAT_AOS_CPP_H + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// Constants +// for shuffles, words are labeled [x,y,z,w] [a,b,c,d] + +#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }) +#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D }) +#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z }) +#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D }) +#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y }) +#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A }) +#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B }) +#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PI_OVER_2 1.570796327f + +//----------------------------------------------------------------------------- +// Definitions + +VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; +} + +VECTORMATH_FORCE_INLINE Matrix3::Matrix3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const floatInVec &scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Quat &unitQuat ) +{ + __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2; + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0}; + __m128 select_x = _mm_load_ps((float *)sx); + __m128 select_z = _mm_load_ps((float *)sz); + + xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() ); + wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) ); + yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) ); + zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) ); + yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) ); + zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) ); + + tmp0 = _mm_mul_ps( yzxw_2, wwww ); // tmp0 = 2yw, 2zw, 2xw, 2w2 + tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) ); // tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2 + tmp2 = _mm_mul_ps( yzxw, xyzw_2 ); // tmp2 = 2xy, 2yz, 2xz, 2w2 + tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 ); // tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2 + tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) ); // tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2 + tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) ); // tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2 + + tmp3 = vec_sel( tmp0, tmp1, select_x ); + tmp4 = vec_sel( tmp1, tmp2, select_x ); + tmp5 = vec_sel( tmp2, tmp0, select_x ); + mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) ); + mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) ); + mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) ); +} + +VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const +{ + return mCol0; +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const +{ + return mCol1; +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const +{ + return mCol2; +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const +{ + return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Matrix3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, res0, res1, res2; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + res0 = vec_mergeh( tmp0, mat.getCol1().get128() ); + //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + res1 = vec_sel(res1, mat.getCol1().get128(), select_y); + //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y); + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2; + tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() ); + tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() ); + tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() ); + dot = _vmathVfDot3( tmp2, mat.getCol2().get128() ); + dot = vec_splat( dot, 0 ); + invdet = recipf4( dot ); + tmp3 = vec_mergeh( tmp0, tmp2 ); + tmp4 = vec_mergel( tmp0, tmp2 ); + inv0 = vec_mergeh( tmp3, tmp1 ); + //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tmp1, select_y); + //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y); + inv0 = vec_mul( inv0, invdet ); + inv1 = vec_mul( inv1, invdet ); + inv2 = vec_mul( inv2, invdet ); + return Matrix3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ) + ); +} + +VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat ) +{ + return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +{ + *this = *this + mat; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +{ + *this = *this - mat; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const +{ + return Matrix3( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat ) +{ + return Matrix3( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const +{ + return Matrix3( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat ) +{ + return floatInVec(scalar) * mat; +} + +VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ) +{ + return mat * scalar; +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const +{ + __m128 res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( vec.get128(), 0 ); + yyyy = vec_splat( vec.get128(), 1 ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_mul( mCol0.get128(), xxxx ); + res = vec_madd( mCol1.get128(), yyyy, res ); + res = vec_madd( mCol2.get128(), zzzz, res ); + return Vector3( res ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +{ + return Matrix3( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +{ + *this = *this * mat; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +{ + return Matrix3( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::identity( ) +{ + return Matrix3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Matrix3( + Vector3::xAxis( ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Matrix3( + Vector3( res0 ), + Vector3::yAxis( ), + Vector3( res2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3::zAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Matrix3( + Vector3( vec_mul( Z0, Y0 ) ), + Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; + axis = unitVec.get128(); + sincosf4( radians.get128(), &s, &c ); + xxxx = vec_splat( axis, 0 ); + yyyy = vec_splat( axis, 1 ); + zzzz = vec_splat( axis, 2 ); + oneMinusC = vec_sub( _mm_set1_ps(1.0f), c ); + axisS = vec_mul( axis, s ); + negAxisS = negatef4( axisS ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX ); + tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) ); + tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z); + //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX ); + tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x ); + //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX ); + tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) ); + tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y); + tmp0 = vec_sel( tmp0, c, select_x ); + tmp1 = vec_sel( tmp1, c, select_y ); + tmp2 = vec_sel( tmp2, c, select_z ); + return Matrix3( + Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ), + Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ), + Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat ) +{ + return Matrix3( unitQuat ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Matrix3( + Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ) +{ + return Matrix3( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ) +{ + return Matrix3( + mulPerElem( mat.getCol0(), scaleVec ), + mulPerElem( mat.getCol1(), scaleVec ), + mulPerElem( mat.getCol2(), scaleVec ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); +} + +VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( float scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const floatInVec &scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Transform3 & mat ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( mat.getCol3(), 1.0f ); +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec ) +{ + Matrix3 mat; + mat = Matrix3( unitQuat ); + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 ) +{ + mCol3 = _col3; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val ) +{ + Vector4 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const +{ + return mCol0; +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const +{ + return mCol1; +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const +{ + return mCol2; +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const +{ + return mCol3; +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Matrix4::operator []( int col ) +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() ); + tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() ); + res0 = vec_mergeh( tmp0, tmp1 ); + res1 = vec_mergel( tmp0, tmp1 ); + res2 = vec_mergeh( tmp2, tmp3 ); + res3 = vec_mergel( tmp2, tmp3 ); + return Matrix4( + Vector4( res0 ), + Vector4( res1 ), + Vector4( res2 ), + Vector4( res3 ) + ); +} + +// TODO: Tidy +static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000}; +static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000}; +static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f}; + +VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat ) +{ + __m128 Va,Vb,Vc; + __m128 r1,r2,r3,tt,tt2; + __m128 sum,Det,RDet; + __m128 trns0,trns1,trns2,trns3; + + __m128 _L1 = mat.getCol0().get128(); + __m128 _L2 = mat.getCol1().get128(); + __m128 _L3 = mat.getCol2().get128(); + __m128 _L4 = mat.getCol3().get128(); + // Calculating the minterms for the first line. + + // _mm_ror_ps is just a macro using _mm_shuffle_ps(). + tt = _L4; tt2 = _mm_ror_ps(_L3,1); + Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3'dot V4 + Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3'dot V4" + Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^ + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4' + + tt = _L2; + Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1); + Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2)); + Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3)); + + // Calculating the determinant. + Det = _mm_mul_ps(sum,_L1); + Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det)); + + const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN); + const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP); + + __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN); + + // Calculating the minterms of the second line (using previous results). + tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP); + + // Testing the determinant. + Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1)); + + // Calculating the minterms of the third line. + tt = _mm_ror_ps(_L1,1); + Va = _mm_mul_ps(tt,Vb); // V1' dot V2" + Vb = _mm_mul_ps(tt,Vc); // V1' dot V2^ + Vc = _mm_mul_ps(tt,_L2); // V1' dot V2 + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V1" dot V2^ - V1^ dot V2" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V1^ dot V2' - V1' dot V2^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V1' dot V2" - V1" dot V2' + + tt = _mm_ror_ps(_L4,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN); + + // Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs). + RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f? + RDet = _mm_shuffle_ps(RDet,RDet,0x00); + + // Devide the first 12 minterms with the determinant. + mtL1 = _mm_mul_ps(mtL1, RDet); + mtL2 = _mm_mul_ps(mtL2, RDet); + mtL3 = _mm_mul_ps(mtL3, RDet); + + // Calculate the minterms of the forth line and devide by the determinant. + tt = _mm_ror_ps(_L3,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP); + mtL4 = _mm_mul_ps(mtL4, RDet); + + // Now we just have to transpose the minterms matrix. + trns0 = _mm_unpacklo_ps(mtL1,mtL2); + trns1 = _mm_unpacklo_ps(mtL3,mtL4); + trns2 = _mm_unpackhi_ps(mtL1,mtL2); + trns3 = _mm_unpackhi_ps(mtL3,mtL4); + _L1 = _mm_movelh_ps(trns0,trns1); + _L2 = _mm_movehl_ps(trns1,trns0); + _L3 = _mm_movelh_ps(trns2,trns3); + _L4 = _mm_movehl_ps(trns3,trns2); + + return Matrix4( + Vector4( _L1 ), + Vector4( _L2 ), + Vector4( _L3 ), + Vector4( _L4 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( inverse( affineMat ) ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( orthoInverse( affineMat ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat ) +{ + __m128 Va,Vb,Vc; + __m128 r1,r2,r3,tt,tt2; + __m128 sum,Det; + + __m128 _L1 = mat.getCol0().get128(); + __m128 _L2 = mat.getCol1().get128(); + __m128 _L3 = mat.getCol2().get128(); + __m128 _L4 = mat.getCol3().get128(); + // Calculating the minterms for the first line. + + // _mm_ror_ps is just a macro using _mm_shuffle_ps(). + tt = _L4; tt2 = _mm_ror_ps(_L3,1); + Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3' dot V4 + Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3' dot V4" + Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^ + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4' + + tt = _L2; + Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1); + Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2)); + Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3)); + + // Calculating the determinant. + Det = _mm_mul_ps(sum,_L1); + Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det)); + + // Calculating the minterms of the second line (using previous results). + tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + + // Testing the determinant. + Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1)); + return floatInVec(Det, 0); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ), + ( mCol3 + mat.mCol3 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ), + ( mCol3 - mat.mCol3 ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +{ + *this = *this + mat; + return *this; +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +{ + *this = *this - mat; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const +{ + return Matrix4( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ), + ( -mCol3 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat ) +{ + return Matrix4( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ), + absPerElem( mat.getCol3() ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const +{ + return Matrix4( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ), + ( mCol3 * scalar ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat ) +{ + return floatInVec(scalar) * mat; +} + +VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ) +{ + return mat * scalar; +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3))))) + ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2)))) + ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128())) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +{ + return Matrix4( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ), + ( *this * mat.mCol3 ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +{ + *this = *this * mat; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +{ + return Matrix4( + ( *this * tfrm.getCol0() ), + ( *this * tfrm.getCol1() ), + ( *this * tfrm.getCol2() ), + ( *this * Point3( tfrm.getCol3() ) ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +{ + return Matrix4( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ), + mulPerElem( mat0.getCol3(), mat1.getCol3() ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::identity( ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +{ + mCol0.setXYZ( mat3.getCol0() ); + mCol1.setXYZ( mat3.getCol1() ); + mCol2.setXYZ( mat3.getCol2() ); + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const +{ + return Matrix3( + mCol0.getXYZ( ), + mCol1.getXYZ( ), + mCol2.getXYZ( ) + ); +} + +VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec ) +{ + mCol3.setXYZ( translateVec ); + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const +{ + return mCol3.getXYZ( ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Matrix4( + Vector4::xAxis( ), + Vector4( res1 ), + Vector4( res2 ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Matrix4( + Vector4( res0 ), + Vector4::yAxis( ), + Vector4( res2 ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Matrix4( + Vector4( res0 ), + Vector4( res1 ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Matrix4( + Vector4( vec_mul( Z0, Y0 ) ), + Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; + axis = unitVec.get128(); + sincosf4( radians.get128(), &s, &c ); + xxxx = vec_splat( axis, 0 ); + yyyy = vec_splat( axis, 1 ); + zzzz = vec_splat( axis, 2 ); + oneMinusC = vec_sub( _mm_set1_ps(1.0f), c ); + axisS = vec_mul( axis, s ); + negAxisS = negatef4( axisS ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX ); + tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) ); + tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z); + //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX ); + tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x ); + //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX ); + tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) ); + tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y); + tmp0 = vec_sel( tmp0, c, select_x ); + tmp1 = vec_sel( tmp1, c, select_y ); + tmp2 = vec_sel( tmp2, c, select_z ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) ); + tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) ); + tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) ); + tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) ); + return Matrix4( + Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ), + Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ), + Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat ) +{ + return Matrix4( Transform3::rotation( unitQuat ) ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Matrix4( + Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ), + Vector4::wAxis( ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ) +{ + return Matrix4( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ), + mat.getCol3() + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ) +{ + Vector4 scale4; + scale4 = Vector4( scaleVec, 1.0f ); + return Matrix4( + mulPerElem( mat.getCol0(), scale4 ), + mulPerElem( mat.getCol1(), scale4 ), + mulPerElem( mat.getCol2(), scale4 ), + mulPerElem( mat.getCol3(), scale4 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4( translateVec, 1.0f ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ) +{ + Matrix4 m4EyeFrame; + Vector3 v3X, v3Y, v3Z; + v3Y = normalize( upVec ); + v3Z = normalize( ( eyePos - lookAtPos ) ); + v3X = normalize( cross( v3Y, v3Z ) ); + v3Y = cross( v3Z, v3X ); + m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) ); + return orthoInverse( m4EyeFrame ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +{ + float f, rangeInv; + __m128 zero, col0, col1, col2, col3; + union { __m128 v; float s[4]; } tmp; + f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f ); + rangeInv = 1.0f / ( zNear - zFar ); + zero = _mm_setzero_ps(); + tmp.v = zero; + tmp.s[0] = f / aspect; + col0 = tmp.v; + tmp.v = zero; + tmp.s[1] = f; + col1 = tmp.v; + tmp.v = zero; + tmp.s[2] = ( zNear + zFar ) * rangeInv; + tmp.s[3] = -1.0f; + col2 = tmp.v; + tmp.v = zero; + tmp.s[2] = zNear * zFar * rangeInv * 2.0f; + col3 = tmp.v; + return Matrix4( + Vector4( col0 ), + Vector4( col1 ), + Vector4( col2 ), + Vector4( col3 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + /* function implementation based on code from STIDC SDK: */ + /* -------------------------------------------------------------- */ + /* PLEASE DO NOT MODIFY THIS SECTION */ + /* This prolog section is automatically generated. */ + /* */ + /* (C)Copyright */ + /* Sony Computer Entertainment, Inc., */ + /* Toshiba Corporation, */ + /* International Business Machines Corporation, */ + /* 2001,2002. */ + /* S/T/I Confidential Information */ + /* -------------------------------------------------------------- */ + __m128 lbf, rtn; + __m128 diff, sum, inv_diff; + __m128 diagonal, column, near2; + __m128 zero = _mm_setzero_ps(); + union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union? + l.s[0] = left; + f.s[0] = zFar; + r.s[0] = right; + n.s[0] = zNear; + b.s[0] = bottom; + t.s[0] = top; + lbf = vec_mergeh( l.v, f.v ); + rtn = vec_mergeh( r.v, n.v ); + lbf = vec_mergeh( lbf, b.v ); + rtn = vec_mergeh( rtn, t.v ); + diff = vec_sub( rtn, lbf ); + sum = vec_add( rtn, lbf ); + inv_diff = recipf4( diff ); + near2 = vec_splat( n.v, 0 ); + near2 = vec_add( near2, near2 ); + diagonal = vec_mul( near2, inv_diff ); + column = vec_mul( sum, inv_diff ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + return Matrix4( + Vector4( vec_sel( zero, diagonal, select_x ) ), + Vector4( vec_sel( zero, diagonal, select_y ) ), + Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ), + Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + /* function implementation based on code from STIDC SDK: */ + /* -------------------------------------------------------------- */ + /* PLEASE DO NOT MODIFY THIS SECTION */ + /* This prolog section is automatically generated. */ + /* */ + /* (C)Copyright */ + /* Sony Computer Entertainment, Inc., */ + /* Toshiba Corporation, */ + /* International Business Machines Corporation, */ + /* 2001,2002. */ + /* S/T/I Confidential Information */ + /* -------------------------------------------------------------- */ + __m128 lbf, rtn; + __m128 diff, sum, inv_diff, neg_inv_diff; + __m128 diagonal, column; + __m128 zero = _mm_setzero_ps(); + union { __m128 v; float s[4]; } l, f, r, n, b, t; + l.s[0] = left; + f.s[0] = zFar; + r.s[0] = right; + n.s[0] = zNear; + b.s[0] = bottom; + t.s[0] = top; + lbf = vec_mergeh( l.v, f.v ); + rtn = vec_mergeh( r.v, n.v ); + lbf = vec_mergeh( lbf, b.v ); + rtn = vec_mergeh( rtn, t.v ); + diff = vec_sub( rtn, lbf ); + sum = vec_add( rtn, lbf ); + inv_diff = recipf4( diff ); + neg_inv_diff = negatef4( inv_diff ); + diagonal = vec_add( inv_diff, inv_diff ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero + return Matrix4( + Vector4( vec_sel( zero, diagonal, select_x ) ), + Vector4( vec_sel( zero, diagonal, select_y ) ), + Vector4( vec_sel( zero, diagonal, select_z ) ), + Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); + print( mat.getRow( 3 ) ); +} + +VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; +} + +VECTORMATH_FORCE_INLINE Transform3::Transform3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +VECTORMATH_FORCE_INLINE Transform3::Transform3( const floatInVec &scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ) +{ + this->setUpper3x3( tfrm ); + this->setTranslation( translateVec ); +} + +VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec ) +{ + this->setUpper3x3( Matrix3( unitQuat ) ); + this->setTranslation( translateVec ); +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 ) +{ + mCol3 = _col3; + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, const floatInVec &val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const +{ + return mCol0; +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const +{ + return mCol1; +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const +{ + return mCol2; +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const +{ + return mCol3; +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Transform3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; + return *this; +} + +VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm ) +{ + __m128 inv0, inv1, inv2, inv3; + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet; + __m128 xxxx, yyyy, zzzz; + tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() ); + tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() ); + tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() ); + inv3 = negatef4( tfrm.getCol3().get128() ); + dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() ); + dot = vec_splat( dot, 0 ); + invdet = recipf4( dot ); + tmp3 = vec_mergeh( tmp0, tmp2 ); + tmp4 = vec_mergel( tmp0, tmp2 ); + inv0 = vec_mergeh( tmp3, tmp1 ); + xxxx = vec_splat( inv3, 0 ); + //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tmp1, select_y); + //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y); + yyyy = vec_splat( inv3, 1 ); + zzzz = vec_splat( inv3, 2 ); + inv3 = vec_mul( inv0, xxxx ); + inv3 = vec_madd( inv1, yyyy, inv3 ); + inv3 = vec_madd( inv2, zzzz, inv3 ); + inv0 = vec_mul( inv0, invdet ); + inv1 = vec_mul( inv1, invdet ); + inv2 = vec_mul( inv2, invdet ); + inv3 = vec_mul( inv3, invdet ); + return Transform3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ), + Vector3( inv3 ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm ) +{ + __m128 inv0, inv1, inv2, inv3; + __m128 tmp0, tmp1; + __m128 xxxx, yyyy, zzzz; + tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() ); + tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() ); + inv3 = negatef4( tfrm.getCol3().get128() ); + inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() ); + xxxx = vec_splat( inv3, 0 ); + //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y); + //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y); + yyyy = vec_splat( inv3, 1 ); + zzzz = vec_splat( inv3, 2 ); + inv3 = vec_mul( inv0, xxxx ); + inv3 = vec_madd( inv1, yyyy, inv3 ); + inv3 = vec_madd( inv2, zzzz, inv3 ); + return Transform3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ), + Vector3( inv3 ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm ) +{ + return Transform3( + absPerElem( tfrm.getCol0() ), + absPerElem( tfrm.getCol1() ), + absPerElem( tfrm.getCol2() ), + absPerElem( tfrm.getCol3() ) + ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const +{ + __m128 res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( vec.get128(), 0 ); + yyyy = vec_splat( vec.get128(), 1 ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_mul( mCol0.get128(), xxxx ); + res = vec_madd( mCol1.get128(), yyyy, res ); + res = vec_madd( mCol2.get128(), zzzz, res ); + return Vector3( res ); +} + +VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const +{ + __m128 tmp0, tmp1, res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( pnt.get128(), 0 ); + yyyy = vec_splat( pnt.get128(), 1 ); + zzzz = vec_splat( pnt.get128(), 2 ); + tmp0 = vec_mul( mCol0.get128(), xxxx ); + tmp1 = vec_mul( mCol1.get128(), yyyy ); + tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 ); + tmp1 = vec_add( mCol3.get128(), tmp1 ); + res = vec_add( tmp0, tmp1 ); + return Point3( res ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +{ + return Transform3( + ( *this * tfrm.mCol0 ), + ( *this * tfrm.mCol1 ), + ( *this * tfrm.mCol2 ), + Vector3( ( *this * Point3( tfrm.mCol3 ) ) ) + ); +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +{ + return Transform3( + mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), + mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ), + mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ), + mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::identity( ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +{ + mCol0 = tfrm.getCol0(); + mCol1 = tfrm.getCol1(); + mCol2 = tfrm.getCol2(); + return *this; +} + +VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const +{ + return Matrix3( mCol0, mCol1, mCol2 ); +} + +VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec ) +{ + mCol3 = translateVec; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Transform3::getTranslation( ) const +{ + return mCol3; +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Transform3( + Vector3::xAxis( ), + Vector3( res1 ), + Vector3( res2 ), + Vector3( _mm_setzero_ps() ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Transform3( + Vector3( res0 ), + Vector3::yAxis( ), + Vector3( res2 ), + Vector3( 0.0f ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __m128 zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Transform3( + Vector3( res0 ), + Vector3( res1 ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Transform3( + Vector3( vec_mul( Z0, Y0 ) ), + Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ), + Vector3( 0.0f ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const Quat &unitQuat ) +{ + return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Transform3( + Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ), + Vector3( 0.0f ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ) +{ + return Transform3( + ( tfrm.getCol0() * scaleVec.getX( ) ), + ( tfrm.getCol1() * scaleVec.getY( ) ), + ( tfrm.getCol2() * scaleVec.getZ( ) ), + tfrm.getCol3() + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ) +{ + return Transform3( + mulPerElem( tfrm.getCol0(), scaleVec ), + mulPerElem( tfrm.getCol1(), scaleVec ), + mulPerElem( tfrm.getCol2(), scaleVec ), + mulPerElem( tfrm.getCol3(), scaleVec ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + translateVec + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm ) +{ + print( tfrm.getRow( 0 ) ); + print( tfrm.getRow( 1 ) ); + print( tfrm.getRow( 2 ) ); +} + +VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name ) +{ + printf("%s:\n", name); + print( tfrm ); +} + +#endif + +VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm ) +{ + __m128 res; + __m128 col0, col1, col2; + __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff; + __m128 zy_xz_yx, yz_zx_xy, sum, diff; + __m128 radicand, invSqrt, scale; + __m128 res0, res1, res2, res3; + __m128 xx, yy, zz; + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + + col0 = tfrm.getCol0().get128(); + col1 = tfrm.getCol1().get128(); + col2 = tfrm.getCol2().get128(); + + /* four cases: */ + /* trace > 0 */ + /* else */ + /* xx largest diagonal element */ + /* yy largest diagonal element */ + /* zz largest diagonal element */ + + /* compute quaternion for each case */ + + xx_yy = vec_sel( col0, col1, select_y ); + //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX ); + //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY ); + //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC ); + xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) ); + xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck + yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) ); + zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) ); + + diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); + diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); + radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) ); + // invSqrt = rsqrtf4( radicand ); + invSqrt = newtonrapson_rsqrt4( radicand ); + + + + zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03 + //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX ); + zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) ); // zy_xz_yx = 12 12 01 00 + zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y ); // zy_xz_yx = 12 20 01 00 + yz_zx_xy = vec_sel( col0, col1, select_x ); // yz_zx_xy = 10 01 02 03 + //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX ); + yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) ); // yz_zx_xy = 10 02 10 10 + yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x ); // yz_zx_xy = 21 02 10 10 + + sum = vec_add( zy_xz_yx, yz_zx_xy ); + diff = vec_sub( zy_xz_yx, yz_zx_xy ); + + scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) ); + + //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA ); + res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) ); + res0 = vec_sel( res0, vec_splat(diff, 0), select_w ); // TODO: Ck + //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB ); + res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) ); + res1 = vec_sel( res1, vec_splat(diff, 1), select_w ); // TODO: Ck + //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC ); + res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) ); + res2 = vec_sel( res2, vec_splat(diff, 2), select_w ); // TODO: Ck + res3 = diff; + res0 = vec_sel( res0, radicand, select_x ); + res1 = vec_sel( res1, radicand, select_y ); + res2 = vec_sel( res2, radicand, select_z ); + res3 = vec_sel( res3, radicand, select_w ); + res0 = vec_mul( res0, vec_splat( scale, 0 ) ); + res1 = vec_mul( res1, vec_splat( scale, 1 ) ); + res2 = vec_mul( res2, vec_splat( scale, 2 ) ); + res3 = vec_mul( res3, vec_splat( scale, 3 ) ); + + /* determine case and select answer */ + + xx = vec_splat( col0, 0 ); + yy = vec_splat( col1, 1 ); + zz = vec_splat( col2, 2 ); + res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) ); + res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) ); + res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) ); + mVec128 = res; +} + +VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 ) +{ + return Matrix3( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 ) +{ + return Matrix4( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ), + ( tfrm0 * tfrm1.getW( ) ) + ); +} + +VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res; + __m128 xxxx, yyyy, zzzz; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + xxxx = vec_splat( vec.get128(), 0 ); + mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() ); + //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y); + //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y); + yyyy = vec_splat( vec.get128(), 1 ); + res = vec_mul( mcol0, xxxx ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_madd( mcol1, yyyy, res ); + res = vec_madd( mcol2, zzzz, res ); + return Vector3( res ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec ) +{ + __m128 neg, res0, res1, res2; + neg = negatef4( vec.get128() ); + VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX ); + res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) ); + res0 = vec_sel(res0, vec_splat(neg, 1), select_z); + //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX ); + res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x); + //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX ); + res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) ); + res2 = vec_sel(res2, vec_splat(neg, 0), select_y); + VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff}; + VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff}; + VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff}; + res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) ); + res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) ); + res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects? + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ) +{ + return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); +} + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/sse/quat_aos.h b/test/Bullet2/vectormath/sse/quat_aos.h new file mode 100644 index 000000000..7eac59fe5 --- /dev/null +++ b/test/Bullet2/vectormath/sse/quat_aos.h @@ -0,0 +1,579 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_QUAT_AOS_CPP_H +#define _VECTORMATH_QUAT_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +VECTORMATH_FORCE_INLINE void Quat::set128(vec_float4 vec) +{ + mVec128 = vec; +} + +VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +{ + mVec128 = _mm_unpacklo_ps( + _mm_unpacklo_ps( _x.get128(), _z.get128() ), + _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, float _w ) +{ + mVec128 = xyz.get128(); + _vmathVfSetElement(mVec128, _w, 3); +} + + + +VECTORMATH_FORCE_INLINE Quat::Quat(const Quat& quat) +{ + mVec128 = quat.get128(); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( float _x, float _y, float _z, float _w ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, _w); +} + + + + + +VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, const floatInVec &_w ) +{ + mVec128 = xyz.get128(); + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( const Vector4 &vec ) +{ + mVec128 = vec.get128(); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +VECTORMATH_FORCE_INLINE Quat::Quat( __m128 vf4 ) +{ + mVec128 = vf4; +} + +VECTORMATH_FORCE_INLINE const Quat Quat::identity( ) +{ + return Quat( _VECTORMATH_UNIT_0001 ); +} + +VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ) +{ + return lerp( floatInVec(t), quat0, quat1 ); +} + +VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ) +{ + return ( quat0 + ( ( quat1 - quat0 ) * t ) ); +} + +VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ) +{ + return slerp( floatInVec(t), unitQuat0, unitQuat1 ); +} + +VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ) +{ + Quat start; + vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + __m128 selectMask; + cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() ); + selectMask = (__m128)vec_cmpgt( _mm_setzero_ps(), cosAngle ); + cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask ); + start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) ); + selectMask = (__m128)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt ); + angles = vec_mergeh( _mm_set1_ps(1.0f), tttt ); + angles = vec_mergeh( angles, oneMinusT ); + angles = vec_madd( angles, angle, _mm_setzero_ps() ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) ); +} + +VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +{ + return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 ); +} + +VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +{ + return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) ); +} + +VECTORMATH_FORCE_INLINE __m128 Quat::get128( ) const +{ + return mVec128; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator =( const Quat &quat ) +{ + mVec128 = quat.mVec128; + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setXYZ( const Vector3 &vec ) +{ + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + mVec128 = vec_sel( vec.get128(), mVec128, sw ); + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Quat::getXYZ( ) const +{ + return Vector3( mVec128 ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setW( float _w ) +{ + _vmathVfSetElement(mVec128, _w, 3); + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setW( const floatInVec &_w ) +{ + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::getW( ) const +{ + return floatInVec( mVec128, 3 ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE VecIdx Quat::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const floatInVec Quat::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator +( const Quat &quat ) const +{ + return Quat( _mm_add_ps( mVec128, quat.mVec128 ) ); +} + + +VECTORMATH_FORCE_INLINE const Quat Quat::operator -( const Quat &quat ) const +{ + return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const floatInVec &scalar ) const +{ + return Quat( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator +=( const Quat &quat ) +{ + *this = *this + quat; + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator -=( const Quat &quat ) +{ + *this = *this - quat; + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator /( const floatInVec &scalar ) const +{ + return Quat( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator -( ) const +{ + return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat ) +{ + return floatInVec(scalar) * quat; +} + +VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat ) +{ + return quat * scalar; +} + +VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 ) +{ + return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat ) +{ + return floatInVec( _vmathVfDot4( quat.get128(), quat.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 ); +} + +VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat ) +{ + vec_float4 dot =_vmathVfDot4( quat.get128(), quat.get128()); + return Quat( _mm_mul_ps( quat.get128(), newtonrapson_rsqrt4( dot ) ) ); +} + + +VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + Vector3 crossVec; + __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res; + cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() ); + cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) ); + recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 ); + cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 ); + crossVec = cross( unitVec0, unitVec1 ); + res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 ); + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw ); + return Quat( res ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw ); + return Quat( res ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0xffffffff, 0, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, xsw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + VM_ATTRIBUTE_ALIGN16 unsigned int ysw[4] = {0, 0xffffffff, 0, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, ysw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0, 0, 0xffffffff, 0}; + VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, zsw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const Quat &quat ) const +{ + __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3; + __m128 product, l_wxyz, r_wxyz, xy, qw; + ldata = mVec128; + rdata = quat.mVec128; + tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) ); + qv = vec_mul( vec_splat( ldata, 3 ), rdata ); + qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv ); + qv = vec_madd( tmp0, tmp1, qv ); + qv = vec_nmsub( tmp2, tmp3, qv ); + product = vec_mul( ldata, rdata ); + l_wxyz = vec_sld( ldata, ldata, 12 ); + r_wxyz = vec_sld( rdata, rdata, 12 ); + qw = vec_nmsub( l_wxyz, r_wxyz, product ); + xy = vec_madd( l_wxyz, r_wxyz, product ); + qw = vec_sub( qw, vec_sld( xy, xy, 8 ) ); + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + return Quat( vec_sel( qv, qw, sw ) ); +} + +VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const Quat &quat ) +{ + *this = *this * quat; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &quat, const Vector3 &vec ) +{ __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res; + qdata = quat.get128(); + vdata = vec.get128(); + tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) ); + wwww = vec_splat( qdata, 3 ); + qv = vec_mul( wwww, vdata ); + qv = vec_madd( tmp0, tmp1, qv ); + qv = vec_nmsub( tmp2, tmp3, qv ); + product = vec_mul( qdata, vdata ); + qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product ); + qw = vec_add( vec_sld( product, product, 8 ), qw ); + tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) ); + res = vec_mul( vec_splat( qw, 0 ), qdata ); + res = vec_madd( wwww, qv, res ); + res = vec_madd( tmp0, tmp1, res ); + res = vec_nmsub( tmp2, tmp3, res ); + return Vector3( res ); +} + +VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat ) +{ + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0}; + return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) ); +} + +VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ) +{ + return select( quat0, quat1, boolInVec(select1) ); +} + +//VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ) +//{ +// return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) ); +//} + +VECTORMATH_FORCE_INLINE void loadXYZW(Quat& quat, const float* fptr) +{ +#ifdef USE_SSE3_LDDQU + quat = Quat( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 ); +#else + SSEFloat fl; + fl.f[0] = fptr[0]; + fl.f[1] = fptr[1]; + fl.f[2] = fptr[2]; + fl.f[3] = fptr[3]; + quat = Quat( fl.m128); +#endif + + +} + +VECTORMATH_FORCE_INLINE void storeXYZW(const Quat& quat, float* fptr) +{ + fptr[0] = quat.getX(); + fptr[1] = quat.getY(); + fptr[2] = quat.getZ(); + fptr[3] = quat.getW(); +// _mm_storeu_ps((float*)quat.get128(),fptr); +} + + + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Quat &quat ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = quat.get128(); + printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = quat.get128(); + printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/sse/vec_aos.h b/test/Bullet2/vectormath/sse/vec_aos.h new file mode 100644 index 000000000..35aeeaf16 --- /dev/null +++ b/test/Bullet2/vectormath/sse/vec_aos.h @@ -0,0 +1,1455 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VECTORMATH_VEC_AOS_CPP_H +#define _VECTORMATH_VEC_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Constants +// for permutes words are labeled [x,y,z,w] [a,b,c,d] + +#define _VECTORMATH_PERM_X 0x00010203 +#define _VECTORMATH_PERM_Y 0x04050607 +#define _VECTORMATH_PERM_Z 0x08090a0b +#define _VECTORMATH_PERM_W 0x0c0d0e0f +#define _VECTORMATH_PERM_A 0x10111213 +#define _VECTORMATH_PERM_B 0x14151617 +#define _VECTORMATH_PERM_C 0x18191a1b +#define _VECTORMATH_PERM_D 0x1c1d1e1f +#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A } +#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B } +#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C } +#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W } +#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 } +#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 } +#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 } +#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff } +#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f } +#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f } +#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f } +#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f } +#define _VECTORMATH_SLERP_TOL 0.999f +//_VECTORMATH_SLERP_TOLF + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#define _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx)) +static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 ) +{ + __m128 result = _mm_mul_ps( vec0, vec1); + return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) ); +} + +static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 ) +{ + __m128 result = _mm_mul_ps(vec0, vec1); + return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)), + _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)), + _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3))))); +} + +static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross( __m128 vec0, __m128 vec1 ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, result; + tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) ); + result = vec_mul( tmp0, tmp1 ); + result = vec_nmsub( tmp2, tmp3, result ); + return result; +} +/* +static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v) +{ +#if 0 + vec_int4 bexp; + vec_uint4 mant, sign, hfloat; + vec_uint4 notZero, isInf; + const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u); + const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu); + const vec_uint4 mergeSign = (vec_uint4)(0x00008000u); + + sign = vec_sr((vec_uint4)v, (vec_uint4)16); + mant = vec_sr((vec_uint4)v, (vec_uint4)13); + bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff); + + notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112); + isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142); + + bexp = _mm_add_ps(bexp, (vec_int4)-112); + bexp = vec_sl(bexp, (vec_uint4)10); + + hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant); + hfloat = vec_sel((vec_uint4)(0), hfloat, notZero); + hfloat = vec_sel(hfloat, hfloatInf, isInf); + hfloat = vec_sel(hfloat, sign, mergeSign); + + return hfloat; +#else + assert(0); + return _mm_setzero_ps(); +#endif +} + +static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v) +{ +#if 0 + vec_uint4 hfloat_u, hfloat_v; + const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; + hfloat_u = _vmathVfToHalfFloatsUnpacked(u); + hfloat_v = _vmathVfToHalfFloatsUnpacked(v); + return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack); +#else + assert(0); + return _mm_setzero_si128(); +#endif +} +*/ + +static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot) +{ + SSEFloat s; + s.m128 = src; + SSEFloat d; + d.m128 = dst; + d.f[slot] = s.f[slot]; + return d.m128; +} + +#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar + +static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar) +{ + return _mm_set1_ps(scalar); +} + +#endif + +namespace Vectormath { +namespace Aos { + + +#ifdef _VECTORMATH_NO_SCALAR_CAST +VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const +{ + return floatInVec(ref, i); +} + +VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const +#else +VECTORMATH_FORCE_INLINE VecIdx::operator float() const +#endif +{ + return ((float *)&ref)[i]; +} + +VECTORMATH_FORCE_INLINE float VecIdx::operator =( float scalar ) +{ + _vmathVfSetElement(ref, scalar, i); + return scalar; +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const floatInVec &scalar ) +{ + ref = _vmathVfInsert(ref, scalar.get128(), i); + return scalar; +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const VecIdx& scalar ) +{ + return *this = floatInVec(scalar.ref, scalar.i); +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) * scalar; +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator /=( float scalar ) +{ + return *this /= floatInVec(scalar); +} + +inline floatInVec VecIdx::operator /=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) / scalar; +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( float scalar ) +{ + return *this += floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) + scalar; +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( float scalar ) +{ + return *this -= floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) - scalar; +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3& vec) +{ + set128(vec.get128()); +} + +VECTORMATH_FORCE_INLINE void Vector3::set128(vec_float4 vec) +{ + mVec128 = vec; +} + + +VECTORMATH_FORCE_INLINE Vector3::Vector3( float _x, float _y, float _z ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +{ + __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() ); + mVec128 = _mm_unpacklo_ps( xz, _y.get128() ); +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3( const Point3 &pnt ) +{ + mVec128 = pnt.get128(); +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +VECTORMATH_FORCE_INLINE Vector3::Vector3( __m128 vf4 ) +{ + mVec128 = vf4; +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::xAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_1000 ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::yAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_0100 ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::zAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_0010 ); +} + +VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ) +{ + return lerp( floatInVec(t), vec0, vec1 ); +} + +VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + return slerp( floatInVec(t), unitVec0, unitVec1 ); +} + +VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() ); + __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt ); + angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t + angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t + angles = _mm_mul_ps( angles, angle ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); +} + +VECTORMATH_FORCE_INLINE __m128 Vector3::get128( ) const +{ + return mVec128; +} + +VECTORMATH_FORCE_INLINE void loadXYZ(Point3& vec, const float* fptr) +{ +#ifdef USE_SSE3_LDDQU + vec = Point3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 ); +#else + SSEFloat fl; + fl.f[0] = fptr[0]; + fl.f[1] = fptr[1]; + fl.f[2] = fptr[2]; + fl.f[3] = fptr[3]; + vec = Point3( fl.m128); +#endif //USE_SSE3_LDDQU + +} + + + +VECTORMATH_FORCE_INLINE void loadXYZ(Vector3& vec, const float* fptr) +{ +#ifdef USE_SSE3_LDDQU + vec = Vector3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 ); +#else + SSEFloat fl; + fl.f[0] = fptr[0]; + fl.f[1] = fptr[1]; + fl.f[2] = fptr[2]; + fl.f[3] = fptr[3]; + vec = Vector3( fl.m128); +#endif //USE_SSE3_LDDQU + +} + +VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad ) +{ + __m128 dstVec = *quad; + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize + dstVec = vec_sel(vec.get128(), dstVec, sw); + *quad = dstVec; +} + +VECTORMATH_FORCE_INLINE void storeXYZ(const Point3& vec, float* fptr) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + +VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3& vec, float* fptr) +{ + fptr[0] = vec.getX(); + fptr[1] = vec.getY(); + fptr[2] = vec.getZ(); +} + + +VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ) +{ + const float *quads = (float *)threeQuads; + vec0 = Vector3( _mm_load_ps(quads) ); + vec1 = Vector3( _mm_loadu_ps(quads + 3) ); + vec2 = Vector3( _mm_loadu_ps(quads + 6) ); + vec3 = Vector3( _mm_loadu_ps(quads + 9) ); +} + +VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ) +{ + __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); + __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); + VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff}; + VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0}; + threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw ); + threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) ); + threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); +} +/* +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ) +{ + assert(0); +#if 0 + __m128 xyz0[3]; + __m128 xyz1[3]; + storeXYZArray( vec0, vec1, vec2, vec3, xyz0 ); + storeXYZArray( vec4, vec5, vec6, vec7, xyz1 ); + threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]); + threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]); + threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]); +#endif +} +*/ +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator =( const Vector3 &vec ) +{ + mVec128 = vec.mVec128; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector3::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector3::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector3::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector3::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE VecIdx Vector3::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator +( const Vector3 &vec ) const +{ + return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( const Vector3 &vec ) const +{ + return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 Vector3::operator +( const Point3 &pnt ) const +{ + return Point3( _mm_add_ps( mVec128, pnt.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( const floatInVec &scalar ) const +{ + return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator +=( const Vector3 &vec ) +{ + *this = *this + vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator -=( const Vector3 &vec ) +{ + *this = *this - vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( const floatInVec &scalar ) const +{ + return Vector3( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( ) const +{ + //return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); + + VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + __m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf; + return Vector3(_mm_xor_ps(get128(),NEG_MASK)); +} + +VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec ) +{ + return floatInVec(scalar) * vec; +} + +VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ) +{ + return vec * scalar; +} + +VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec ) +{ + return Vector3( _mm_rcp_ps( vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec ) +{ + return Vector3( fabsf4( vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Vector3( _mm_or_ps( + _mm_and_ps ( vmask, vec0.get128() ), // Value + _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs +} + +VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec ) +{ + return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec ) +{ + return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec ) +{ + return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec ) +{ + return floatInVec( _vmathVfDot3( vec.get128(), vec.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 ); +} + + +VECTORMATH_FORCE_INLINE const Vector3 normalizeApprox( const Vector3 &vec ) +{ + return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec ) +{ + return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ) +{ + return select( vec0, vec1, boolInVec(select1) ); +} + + +VECTORMATH_FORCE_INLINE const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1) +{ + return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128())); +} + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Vector3 &vec ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +#endif + +VECTORMATH_FORCE_INLINE Vector4::Vector4( float _x, float _y, float _z, float _w ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, _w); + } + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +{ + mVec128 = _mm_unpacklo_ps( + _mm_unpacklo_ps( _x.get128(), _z.get128() ), + _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, float _w ) +{ + mVec128 = xyz.get128(); + _vmathVfSetElement(mVec128, _w, 3); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w ) +{ + mVec128 = xyz.get128(); + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &vec ) +{ + mVec128 = vec.get128(); + mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const Point3 &pnt ) +{ + mVec128 = pnt.get128(); + mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const Quat &quat ) +{ + mVec128 = quat.get128(); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +VECTORMATH_FORCE_INLINE Vector4::Vector4( __m128 vf4 ) +{ + mVec128 = vf4; +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::xAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_1000 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::yAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0100 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::zAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0010 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::wAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0001 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ) +{ + return lerp( floatInVec(t), vec0, vec1 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +{ + return slerp( floatInVec(t), unitVec0, unitVec1 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +{ + __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() ); + __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt ); + angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t + angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t + angles = _mm_mul_ps( angles, angle ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); +} + +VECTORMATH_FORCE_INLINE __m128 Vector4::get128( ) const +{ + return mVec128; +} +/* +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ) +{ + twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128()); + twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128()); +} +*/ +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator =( const Vector4 &vec ) +{ + mVec128 = vec.mVec128; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setXYZ( const Vector3 &vec ) +{ + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + mVec128 = vec_sel( vec.get128(), mVec128, sw ); + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ( ) const +{ + return Vector3( mVec128 ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( float _w ) +{ + _vmathVfSetElement(mVec128, _w, 3); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( const floatInVec &_w ) +{ + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::getW( ) const +{ + return floatInVec( mVec128, 3 ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE VecIdx Vector4::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator +( const Vector4 &vec ) const +{ + return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( const Vector4 &vec ) const +{ + return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( const floatInVec &scalar ) const +{ + return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator +=( const Vector4 &vec ) +{ + *this = *this + vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator -=( const Vector4 &vec ) +{ + *this = *this - vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( const floatInVec &scalar ) const +{ + return Vector4( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( ) const +{ + return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec ) +{ + return floatInVec(scalar) * vec; +} + +VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ) +{ + return vec * scalar; +} + +VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec ) +{ + return Vector4( _mm_rcp_ps( vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec ) +{ + return Vector4( fabsf4( vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Vector4( _mm_or_ps( + _mm_and_ps ( vmask, vec0.get128() ), // Value + _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs +} + +VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec ) +{ + return floatInVec( _mm_max_ps( + _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec ) +{ + return floatInVec( _mm_min_ps( + _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec ) +{ + return floatInVec( _mm_add_ps( + _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec ) +{ + return floatInVec( _vmathVfDot4( vec.get128(), vec.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 ); +} + +VECTORMATH_FORCE_INLINE const Vector4 normalizeApprox( const Vector4 &vec ) +{ + return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec ) +{ + return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); +} + +VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ) +{ + return select( vec0, vec1, boolInVec(select1) ); +} + + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Vector4 &vec ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +#endif + +VECTORMATH_FORCE_INLINE Point3::Point3( float _x, float _y, float _z ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); +} + +VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +{ + mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() ); +} + +VECTORMATH_FORCE_INLINE Point3::Point3( const Vector3 &vec ) +{ + mVec128 = vec.get128(); +} + +VECTORMATH_FORCE_INLINE Point3::Point3( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +VECTORMATH_FORCE_INLINE Point3::Point3( __m128 vf4 ) +{ + mVec128 = vf4; +} + +VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ) +{ + return lerp( floatInVec(t), pnt0, pnt1 ); +} + +VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ) +{ + return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); +} + +VECTORMATH_FORCE_INLINE __m128 Point3::get128( ) const +{ + return mVec128; +} + +VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad ) +{ + __m128 dstVec = *quad; + VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize + dstVec = vec_sel(pnt.get128(), dstVec, sw); + *quad = dstVec; +} + +VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ) +{ + const float *quads = (float *)threeQuads; + pnt0 = Point3( _mm_load_ps(quads) ); + pnt1 = Point3( _mm_loadu_ps(quads + 3) ); + pnt2 = Point3( _mm_loadu_ps(quads + 6) ); + pnt3 = Point3( _mm_loadu_ps(quads + 9) ); +} + +VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ) +{ + __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); + __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); + VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff}; + VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0}; + threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw ); + threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) ); + threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); +} +/* +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ) +{ +#if 0 + __m128 xyz0[3]; + __m128 xyz1[3]; + storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 ); + storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 ); + threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]); + threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]); + threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]); +#else + assert(0); +#endif +} +*/ +VECTORMATH_FORCE_INLINE Point3 & Point3::operator =( const Point3 &pnt ) +{ + mVec128 = pnt.mVec128; + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Point3::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Point3::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Point3::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE VecIdx Point3::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const floatInVec Point3::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +VECTORMATH_FORCE_INLINE const Vector3 Point3::operator -( const Point3 &pnt ) const +{ + return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 Point3::operator +( const Vector3 &vec ) const +{ + return Point3( _mm_add_ps( mVec128, vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 Point3::operator -( const Vector3 &vec ) const +{ + return Point3( _mm_sub_ps( mVec128, vec.get128() ) ); +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::operator +=( const Vector3 &vec ) +{ + *this = *this + vec; + return *this; +} + +VECTORMATH_FORCE_INLINE Point3 & Point3::operator -=( const Vector3 &vec ) +{ + *this = *this - vec; + return *this; +} + +VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt ) +{ + return Point3( _mm_rcp_ps( pnt.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt ) +{ + return Point3( fabsf4( pnt.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Point3( _mm_or_ps( + _mm_and_ps ( vmask, pnt0.get128() ), // Value + _mm_andnot_ps( vmask, pnt1.get128() ) ) ); // Signs +} + +VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt ) +{ + return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt ) +{ + return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt ) +{ + return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal ) +{ + return scale( pnt, floatInVec( scaleVal ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ) +{ + return mulPerElem( pnt, Point3( scaleVal ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ) +{ + return mulPerElem( pnt, Point3( scaleVec ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ) +{ + return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 ); +} + +VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt ) +{ + return lengthSqr( Vector3( pnt ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt ) +{ + return length( Vector3( pnt ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return lengthSqr( ( pnt1 - pnt0 ) ); +} + +VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return length( ( pnt1 - pnt0 ) ); +} + +VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ) +{ + return select( pnt0, pnt1, boolInVec(select1) ); +} + +VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ) +{ + return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) ); +} + + + +#ifdef _VECTORMATH_DEBUG + +VECTORMATH_FORCE_INLINE void print( const Point3 &pnt ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = pnt.get128(); + printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = pnt.get128(); + printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/sse/vecidx_aos.h b/test/Bullet2/vectormath/sse/vecidx_aos.h new file mode 100644 index 000000000..8ba4b1d75 --- /dev/null +++ b/test/Bullet2/vectormath/sse/vecidx_aos.h @@ -0,0 +1,80 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VECTORMATH_VECIDX_AOS_H +#define _VECTORMATH_VECIDX_AOS_H + + +#include "floatInVec.h" + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// VecIdx +// Used in setting elements of Vector3, Vector4, Point3, or Quat with the +// subscripting operator. +// + +VM_ATTRIBUTE_ALIGNED_CLASS16 (class) VecIdx +{ +private: + __m128 &ref; + int i; +public: + inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; } + + // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined + // in which case, implicitly casts to floatInVec, and one must call + // getAsFloat to convert to float. + // +#ifdef _VECTORMATH_NO_SCALAR_CAST + inline operator floatInVec() const; + inline float getAsFloat() const; +#else + inline operator float() const; +#endif + + inline float operator =( float scalar ); + inline floatInVec operator =( const floatInVec &scalar ); + inline floatInVec operator =( const VecIdx& scalar ); + inline floatInVec operator *=( float scalar ); + inline floatInVec operator *=( const floatInVec &scalar ); + inline floatInVec operator /=( float scalar ); + inline floatInVec operator /=( const floatInVec &scalar ); + inline floatInVec operator +=( float scalar ); + inline floatInVec operator +=( const floatInVec &scalar ); + inline floatInVec operator -=( float scalar ); + inline floatInVec operator -=( const floatInVec &scalar ); +}; + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/test/Bullet2/vectormath/sse/vectormath_aos.h b/test/Bullet2/vectormath/sse/vectormath_aos.h new file mode 100644 index 000000000..be5ae8c6e --- /dev/null +++ b/test/Bullet2/vectormath/sse/vectormath_aos.h @@ -0,0 +1,2547 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_AOS_CPP_SSE_H +#define _VECTORMATH_AOS_CPP_SSE_H + +#include +#include +#include +#include + +#define Vector3Ref Vector3& +#define QuatRef Quat& +#define Matrix3Ref Matrix3& + +#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) + #define USE_SSE3_LDDQU + + #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) __declspec(align(16)) a + #define VM_ATTRIBUTE_ALIGN16 __declspec(align(16)) + #define VECTORMATH_FORCE_INLINE __forceinline +#else + #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) a __attribute__ ((aligned (16))) + #define VM_ATTRIBUTE_ALIGN16 __attribute__ ((aligned (16))) + #define VECTORMATH_FORCE_INLINE inline __attribute__ ((always_inline)) + #ifdef __SSE3__ + #define USE_SSE3_LDDQU + #endif //__SSE3__ +#endif//_WIN32 + + +#ifdef USE_SSE3_LDDQU +#include //_mm_lddqu_si128 +#endif //USE_SSE3_LDDQU + + +// TODO: Tidy +typedef __m128 vec_float4; +typedef __m128 vec_uint4; +typedef __m128 vec_int4; +typedef __m128i vec_uchar16; +typedef __m128i vec_ushort8; + +#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e)) + +#define _mm_ror_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec)) +#define _mm_rol_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec)) + +#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4)) + +#define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec) +#define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec) + +#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) ) + +union SSEFloat +{ + __m128i vi; + __m128 m128; + __m128 vf; + unsigned int ui[4]; + unsigned short s[8]; + float f[4]; + SSEFloat(__m128 v) : m128(v) {} + SSEFloat(__m128i v) : vi(v) {} + SSEFloat() {}//uninitialized +}; + +static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, __m128 mask) +{ + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +} +static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask) +{ + return vec_sel(a, b, _mm_load_ps((float *)_mask)); +} +static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask) +{ + return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask)); +} + +static VECTORMATH_FORCE_INLINE __m128 toM128(unsigned int x) +{ + return _mm_set1_ps( *(float *)&x ); +} + +static VECTORMATH_FORCE_INLINE __m128 fabsf4(__m128 x) +{ + return _mm_and_ps( x, toM128( 0x7fffffff ) ); +} +/* +union SSE64 +{ + __m128 m128; + struct + { + __m64 m01; + __m64 m23; + } m64; +}; + +static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m64.m01 = _mm_cvttps_pi32(x); + sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2)); + _mm_empty(); + return sse64.m128; +} + +static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m128 = x; + __m128 result =_mm_movelh_ps( + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01), + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23)); + _mm_empty(); + return result; +} +*/ +static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + __m128i result = _mm_cvtps_epi32(x); + return (__m128 &)result; +} + +static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + return _mm_cvtepi32_ps((__m128i &)x); +} + +#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) ) +#define vec_sub(a,b) _mm_sub_ps( a, b ) +#define vec_add(a,b) _mm_add_ps( a, b ) +#define vec_mul(a,b) _mm_mul_ps( a, b ) +#define vec_xor(a,b) _mm_xor_ps( a, b ) +#define vec_and(a,b) _mm_and_ps( a, b ) +#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b ) +#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b ) + +#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b ) +#define vec_mergel(a,b) _mm_unpackhi_ps( a, b ) + +#define vec_andc(a,b) _mm_andnot_ps( b, a ) + +#define sqrtf4(x) _mm_sqrt_ps( x ) +#define rsqrtf4(x) _mm_rsqrt_ps( x ) +#define recipf4(x) _mm_rcp_ps( x ) +#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x ) + +static VECTORMATH_FORCE_INLINE __m128 newtonrapson_rsqrt4( const __m128 v ) +{ +#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) +#define _three _mm_setr_ps(3.f,3.f,3.f,3.f) +const __m128 approx = _mm_rsqrt_ps( v ); +const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx); +return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) ); +} + +static VECTORMATH_FORCE_INLINE __m128 acosf4(__m128 x) +{ + __m128 xabs = fabsf4(x); + __m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() ); + __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + __m128 xabs2 = _mm_mul_ps(xabs, xabs); + __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); + __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f), + xabs, _mm_set1_ps(0.0066700901f)), + xabs, _mm_set1_ps(-0.0170881256f)), + xabs, _mm_set1_ps( 0.0308918810f)); + __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f), + xabs, _mm_set1_ps(0.0889789874f)), + xabs, _mm_set1_ps(-0.2145988016f)), + xabs, _mm_set1_ps( 1.5707963050f)); + + __m128 result = vec_madd(hi, xabs4, lo); + + // Adjust the result if x is negactive. + return vec_sel( + vec_mul(t1, result), // Positive + vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative + select); +} + +static VECTORMATH_FORCE_INLINE __m128 sinf4(vec_float4 x) +{ + +// +// Common constants used to evaluate sinf4/cosf4/tanf4 +// +#define _SINCOS_CC0 -0.0013602249f +#define _SINCOS_CC1 0.0416566950f +#define _SINCOS_CC2 -0.4999990225f +#define _SINCOS_SC0 -0.0001950727f +#define _SINCOS_SC1 0.0083320758f +#define _SINCOS_SC2 -0.1666665247f + +#define _SINCOS_KC1 1.57079625129f +#define _SINCOS_KC2 7.54978995489e-8f + + vec_float4 xl,xl2,xl3,res; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + vec_int4 q = vec_cts(xl,0); + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = _mm_and_ps(q,toM128(0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset, + toM128(0x1)), + _mm_setzero_ps())); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + return vec_sel( + vec_xor(toM128(0x80000000U), res), // Negative + res, // Positive + vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps())); +} + +static VECTORMATH_FORCE_INLINE void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0); + vec_int4 q = vec_cts(xl,0); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = vec_and(q,toM128((int)0x3)); + __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin); + offsetCos = (__m128 &)temp; + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps()); + vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps()); + *s = vec_sel(cx,sx,sinMask); + *c = vec_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps()); + cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps()); + + *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask); + *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask); +} + +#include "vecidx_aos.h" +#include "floatInVec.h" +#include "boolInVec.h" + +#ifdef _VECTORMATH_DEBUG +#include +#endif +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + __m128 mVec128; + + VECTORMATH_FORCE_INLINE void set128(vec_float4 vec); + + VECTORMATH_FORCE_INLINE vec_float4& get128Ref(); + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Vector3( ) { }; + + // Default copy constructor + // + VECTORMATH_FORCE_INLINE Vector3(const Vector3& vec); + + // Construct a 3-D vector from x, y, and z elements + // + VECTORMATH_FORCE_INLINE Vector3( float x, float y, float z ); + + // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit VECTORMATH_FORCE_INLINE Vector3( const Point3 &pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Vector3( float scalar ); + + // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Vector3( const floatInVec &scalar ); + + // Set vector float data in a 3-D vector + // + explicit VECTORMATH_FORCE_INLINE Vector3( __m128 vf4 ); + + // Get vector float data from a 3-D vector + // + VECTORMATH_FORCE_INLINE __m128 get128( ) const; + + // Assign one 3-D vector to another + // + VECTORMATH_FORCE_INLINE Vector3 & operator =( const Vector3 &vec ); + + // Set the x element of a 3-D vector + // + VECTORMATH_FORCE_INLINE Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + VECTORMATH_FORCE_INLINE Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + VECTORMATH_FORCE_INLINE Vector3 & setZ( float z ); + + // Set the x element of a 3-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getX( ) const; + + // Get the y element of a 3-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getY( ) const; + + // Get the z element of a 3-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D vector by index + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + VECTORMATH_FORCE_INLINE VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const; + + // Add two 3-D vectors + // + VECTORMATH_FORCE_INLINE const Vector3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + VECTORMATH_FORCE_INLINE const Vector3 operator -( const Vector3 &vec ) const; + + // Add a 3-D vector to a 3-D point + // + VECTORMATH_FORCE_INLINE const Point3 operator +( const Point3 &pnt ) const; + + // Multiply a 3-D vector by a scalar + // + VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + VECTORMATH_FORCE_INLINE const Vector3 operator /( float scalar ) const; + + // Multiply a 3-D vector by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar ) const; + + // Divide a 3-D vector by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Vector3 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + VECTORMATH_FORCE_INLINE Vector3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + VECTORMATH_FORCE_INLINE Vector3 & operator -=( const Vector3 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + VECTORMATH_FORCE_INLINE Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + VECTORMATH_FORCE_INLINE Vector3 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector3 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 3-D vector + // + VECTORMATH_FORCE_INLINE const Vector3 operator -( ) const; + + // Construct x axis + // + static VECTORMATH_FORCE_INLINE const Vector3 xAxis( ); + + // Construct y axis + // + static VECTORMATH_FORCE_INLINE const Vector3 yAxis( ); + + // Construct z axis + // + static VECTORMATH_FORCE_INLINE const Vector3 zAxis( ); + +}; + +// Multiply a 3-D vector by a scalar +// +VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec ); + +// Multiply a 3-D vector by a scalar (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ); + +// Multiply two 3-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec ); + +// Compute the absolute value of a 3-D vector per element +// +VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec ); + +// Copy sign from one 3-D vector to another, per element +// +VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum of two 3-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Minimum of two 3-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum element of a 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec ); + +// Minimum element of a 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec ); + +// Compute the sum of all elements of a 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec ); + +// Compute the dot product of two 3-D vectors +// +VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the square of the length of a 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec ); + +// Compute the length of a 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec ); + +// Compute cross product of two 3-D vectors +// +VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ); + +// Outer product of two 3-D vectors +// +VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// NOTE: +// Slower than column post-multiply. +// +VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Conditionally select between two 3-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ); + +// Conditionally select between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word +// +VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad ); + +// Load four three-float 3-D vectors, stored in three quadwords +// +VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ); + +// Store four 3-D vectors in three quadwords +// +VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ); + +// Store eight 3-D vectors as half-floats +// +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Vector3 &vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Vector4( ) { }; + + // Construct a 4-D vector from x, y, z, and w elements + // + VECTORMATH_FORCE_INLINE Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, const floatInVec &w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit VECTORMATH_FORCE_INLINE Vector4( const Vector3 &vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit VECTORMATH_FORCE_INLINE Vector4( const Point3 &pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit VECTORMATH_FORCE_INLINE Vector4( const Quat &quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Vector4( float scalar ); + + // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Vector4( const floatInVec &scalar ); + + // Set vector float data in a 4-D vector + // + explicit VECTORMATH_FORCE_INLINE Vector4( __m128 vf4 ); + + // Get vector float data from a 4-D vector + // + VECTORMATH_FORCE_INLINE __m128 get128( ) const; + + // Assign one 4-D vector to another + // + VECTORMATH_FORCE_INLINE Vector4 & operator =( const Vector4 &vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + VECTORMATH_FORCE_INLINE Vector4 & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a 4-D vector + // + VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & setW( float w ); + + // Set the x element of a 4-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & setX( const floatInVec &x ); + + // Set the y element of a 4-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & setY( const floatInVec &y ); + + // Set the z element of a 4-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & setZ( const floatInVec &z ); + + // Set the w element of a 4-D vector (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & setW( const floatInVec &w ); + + // Get the x element of a 4-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getX( ) const; + + // Get the y element of a 4-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getY( ) const; + + // Get the z element of a 4-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const; + + // Get the w element of a 4-D vector + // + VECTORMATH_FORCE_INLINE const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + VECTORMATH_FORCE_INLINE VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const; + + // Add two 4-D vectors + // + VECTORMATH_FORCE_INLINE const Vector4 operator +( const Vector4 &vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + VECTORMATH_FORCE_INLINE const Vector4 operator -( const Vector4 &vec ) const; + + // Multiply a 4-D vector by a scalar + // + VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + VECTORMATH_FORCE_INLINE const Vector4 operator /( float scalar ) const; + + // Multiply a 4-D vector by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar ) const; + + // Divide a 4-D vector by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Vector4 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & operator +=( const Vector4 &vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + VECTORMATH_FORCE_INLINE Vector4 & operator -=( const Vector4 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + VECTORMATH_FORCE_INLINE Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + VECTORMATH_FORCE_INLINE Vector4 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Vector4 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 4-D vector + // + VECTORMATH_FORCE_INLINE const Vector4 operator -( ) const; + + // Construct x axis + // + static VECTORMATH_FORCE_INLINE const Vector4 xAxis( ); + + // Construct y axis + // + static VECTORMATH_FORCE_INLINE const Vector4 yAxis( ); + + // Construct z axis + // + static VECTORMATH_FORCE_INLINE const Vector4 zAxis( ); + + // Construct w axis + // + static VECTORMATH_FORCE_INLINE const Vector4 wAxis( ); + +}; + +// Multiply a 4-D vector by a scalar +// +VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec ); + +// Multiply a 4-D vector by a scalar (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ); + +// Multiply two 4-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec ); + +// Compute the absolute value of a 4-D vector per element +// +VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec ); + +// Copy sign from one 4-D vector to another, per element +// +VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum of two 4-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Minimum of two 4-D vectors per element +// +VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum element of a 4-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec ); + +// Minimum element of a 4-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec ); + +// Compute the sum of all elements of a 4-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec ); + +// Compute the dot product of two 4-D vectors +// +VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the square of the length of a 4-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec ); + +// Compute the length of a 4-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec ); + +// Outer product of two 4-D vectors +// +VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Conditionally select between two 4-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ); + +// Conditionally select between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ); + +// Store four 4-D vectors as half-floats +// +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Vector4 &vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Point3( ) { }; + + // Construct a 3-D point from x, y, and z elements + // + VECTORMATH_FORCE_INLINE Point3( float x, float y, float z ); + + // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit VECTORMATH_FORCE_INLINE Point3( const Vector3 &vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Point3( float scalar ); + + // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Point3( const floatInVec &scalar ); + + // Set vector float data in a 3-D point + // + explicit VECTORMATH_FORCE_INLINE Point3( __m128 vf4 ); + + // Get vector float data from a 3-D point + // + VECTORMATH_FORCE_INLINE __m128 get128( ) const; + + // Assign one 3-D point to another + // + VECTORMATH_FORCE_INLINE Point3 & operator =( const Point3 &pnt ); + + // Set the x element of a 3-D point + // + VECTORMATH_FORCE_INLINE Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + VECTORMATH_FORCE_INLINE Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + VECTORMATH_FORCE_INLINE Point3 & setZ( float z ); + + // Set the x element of a 3-D point (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Point3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D point (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Point3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D point (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Point3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D point + // + VECTORMATH_FORCE_INLINE const floatInVec getX( ) const; + + // Get the y element of a 3-D point + // + VECTORMATH_FORCE_INLINE const floatInVec getY( ) const; + + // Get the z element of a 3-D point + // + VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D point by index + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + VECTORMATH_FORCE_INLINE VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + VECTORMATH_FORCE_INLINE const Vector3 operator -( const Point3 &pnt ) const; + + // Add a 3-D point to a 3-D vector + // + VECTORMATH_FORCE_INLINE const Point3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + VECTORMATH_FORCE_INLINE const Point3 operator -( const Vector3 &vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + VECTORMATH_FORCE_INLINE Point3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + VECTORMATH_FORCE_INLINE Point3 & operator -=( const Vector3 &vec ); + +}; + +// Multiply two 3-D points per element +// +VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt ); + +// Compute the absolute value of a 3-D point per element +// +VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt ); + +// Copy sign from one 3-D point to another, per element +// +VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum of two 3-D points per element +// +VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Minimum of two 3-D points per element +// +VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum element of a 3-D point +// +VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt ); + +// Minimum element of a 3-D point +// +VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt ); + +// Compute the sum of all elements of a 3-D point +// +VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt ); + +// Apply uniform scale to a 3-D point +// +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal ); + +// Apply uniform scale to a 3-D point (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt ); + +// Compute the square of the distance between two 3-D points +// +VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the distance between two 3-D points +// +VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Conditionally select between two 3-D points +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ); + +// Conditionally select between two 3-D points (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word +// +VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad ); + +// Load four three-float 3-D points, stored in three quadwords +// +VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ); + +// Store four 3-D points in three quadwords +// +VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ); + +// Store eight 3-D points as half-floats +// +VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Point3 &pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Quat( ) { }; + + VECTORMATH_FORCE_INLINE Quat(const Quat& quat); + + // Construct a quaternion from x, y, z, and w elements + // + VECTORMATH_FORCE_INLINE Quat( float x, float y, float z, float w ); + + // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a quaternion from a 3-D vector and a scalar + // + VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, float w ); + + // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, const floatInVec &w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit VECTORMATH_FORCE_INLINE Quat( const Vector4 &vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit VECTORMATH_FORCE_INLINE Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Quat( float scalar ); + + // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Quat( const floatInVec &scalar ); + + // Set vector float data in a quaternion + // + explicit VECTORMATH_FORCE_INLINE Quat( __m128 vf4 ); + + // Get vector float data from a quaternion + // + VECTORMATH_FORCE_INLINE __m128 get128( ) const; + + // Set a quaterion from vector float data + // + VECTORMATH_FORCE_INLINE void set128(vec_float4 vec); + + // Assign one quaternion to another + // + VECTORMATH_FORCE_INLINE Quat & operator =( const Quat &quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + VECTORMATH_FORCE_INLINE Quat & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a quaternion + // + VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + VECTORMATH_FORCE_INLINE Quat & setX( float x ); + + // Set the y element of a quaternion + // + VECTORMATH_FORCE_INLINE Quat & setY( float y ); + + // Set the z element of a quaternion + // + VECTORMATH_FORCE_INLINE Quat & setZ( float z ); + + // Set the w element of a quaternion + // + VECTORMATH_FORCE_INLINE Quat & setW( float w ); + + // Set the x element of a quaternion (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & setX( const floatInVec &x ); + + // Set the y element of a quaternion (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & setY( const floatInVec &y ); + + // Set the z element of a quaternion (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & setZ( const floatInVec &z ); + + // Set the w element of a quaternion (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & setW( const floatInVec &w ); + + // Get the x element of a quaternion + // + VECTORMATH_FORCE_INLINE const floatInVec getX( ) const; + + // Get the y element of a quaternion + // + VECTORMATH_FORCE_INLINE const floatInVec getY( ) const; + + // Get the z element of a quaternion + // + VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const; + + // Get the w element of a quaternion + // + VECTORMATH_FORCE_INLINE const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + VECTORMATH_FORCE_INLINE Quat & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a quaternion by index + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + VECTORMATH_FORCE_INLINE VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const; + + // Add two quaternions + // + VECTORMATH_FORCE_INLINE const Quat operator +( const Quat &quat ) const; + + // Subtract a quaternion from another quaternion + // + VECTORMATH_FORCE_INLINE const Quat operator -( const Quat &quat ) const; + + // Multiply two quaternions + // + VECTORMATH_FORCE_INLINE const Quat operator *( const Quat &quat ) const; + + // Multiply a quaternion by a scalar + // + VECTORMATH_FORCE_INLINE const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + VECTORMATH_FORCE_INLINE const Quat operator /( float scalar ) const; + + // Multiply a quaternion by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar ) const; + + // Divide a quaternion by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Quat operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + VECTORMATH_FORCE_INLINE Quat & operator +=( const Quat &quat ); + + // Perform compound assignment and subtraction by a quaternion + // + VECTORMATH_FORCE_INLINE Quat & operator -=( const Quat &quat ); + + // Perform compound assignment and multiplication by a quaternion + // + VECTORMATH_FORCE_INLINE Quat & operator *=( const Quat &quat ); + + // Perform compound assignment and multiplication by a scalar + // + VECTORMATH_FORCE_INLINE Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + VECTORMATH_FORCE_INLINE Quat & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Quat & operator /=( const floatInVec &scalar ); + + // Negate all elements of a quaternion + // + VECTORMATH_FORCE_INLINE const Quat operator -( ) const; + + // Construct an identity quaternion + // + static VECTORMATH_FORCE_INLINE const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static VECTORMATH_FORCE_INLINE const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static VECTORMATH_FORCE_INLINE const Quat rotation( float radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Quat rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static VECTORMATH_FORCE_INLINE const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static VECTORMATH_FORCE_INLINE const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static VECTORMATH_FORCE_INLINE const Quat rotationZ( float radians ); + + // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Quat rotationX( const floatInVec &radians ); + + // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Quat rotationY( const floatInVec &radians ); + + // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Quat rotationZ( const floatInVec &radians ); + +}; + +// Multiply a quaternion by a scalar +// +VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat ); + +// Multiply a quaternion by a scalar (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat ); + +// Compute the conjugate of a quaternion +// +VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec ); + +// Compute the dot product of two quaternions +// +VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 ); + +// Compute the norm of a quaternion +// +VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat ); + +// Compute the length of a quaternion +// +VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ); + +// Linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical quadrangle interpolation +// +VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Spherical quadrangle interpolation (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Conditionally select between two quaternions +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ); + +// Conditionally select between two quaternions (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Quat &quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Matrix3( ) { }; + + // Copy a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + VECTORMATH_FORCE_INLINE Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit VECTORMATH_FORCE_INLINE Matrix3( const Quat &unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Matrix3( float scalar ); + + // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Matrix3( const floatInVec &scalar ); + + // Assign one 3x3 matrix to another + // + VECTORMATH_FORCE_INLINE Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & setCol2( const Vector3 &col2 ); + + // Get column 0 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Matrix3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Matrix3 & setRow( int row, const Vector3 &vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + VECTORMATH_FORCE_INLINE Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, float val ); + + // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + VECTORMATH_FORCE_INLINE const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + VECTORMATH_FORCE_INLINE const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + VECTORMATH_FORCE_INLINE const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply two 3x3 matrices + // + VECTORMATH_FORCE_INLINE const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + VECTORMATH_FORCE_INLINE Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static VECTORMATH_FORCE_INLINE const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const Quat &unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static VECTORMATH_FORCE_INLINE const Matrix3 scale( const Vector3 &scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +// Conditionally select between two 3x3 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Matrix4( ) { }; + + // Copy a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + VECTORMATH_FORCE_INLINE Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit VECTORMATH_FORCE_INLINE Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + VECTORMATH_FORCE_INLINE Matrix4( const Matrix3 & mat, const Vector3 &translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + VECTORMATH_FORCE_INLINE Matrix4( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Matrix4( float scalar ); + + // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Matrix4( const floatInVec &scalar ); + + // Assign one 4x4 matrix to another + // + VECTORMATH_FORCE_INLINE Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + VECTORMATH_FORCE_INLINE Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + VECTORMATH_FORCE_INLINE Matrix4 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & setCol0( const Vector4 &col0 ); + + // Set column 1 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & setCol1( const Vector4 &col1 ); + + // Set column 2 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & setCol2( const Vector4 &col2 ); + + // Set column 3 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & setCol3( const Vector4 &col3 ); + + // Get column 0 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Matrix4 & setCol( int col, const Vector4 &vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Matrix4 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + VECTORMATH_FORCE_INLINE Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + VECTORMATH_FORCE_INLINE const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, float val ); + + // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + VECTORMATH_FORCE_INLINE const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + VECTORMATH_FORCE_INLINE const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector4 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector3 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + VECTORMATH_FORCE_INLINE const Vector4 operator *( const Point3 &pnt ) const; + + // Multiply two 4x4 matrices + // + VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + VECTORMATH_FORCE_INLINE Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static VECTORMATH_FORCE_INLINE const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const Quat &unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static VECTORMATH_FORCE_INLINE const Matrix4 scale( const Vector3 &scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static VECTORMATH_FORCE_INLINE const Matrix4 translation( const Vector3 &translateVec ); + + // Construct viewing matrix based on eye, position looked at, and up direction + // + static VECTORMATH_FORCE_INLINE const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ); + + // Construct a perspective projection matrix + // + static VECTORMATH_FORCE_INLINE const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static VECTORMATH_FORCE_INLINE const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static VECTORMATH_FORCE_INLINE const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) +// +VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +// Conditionally select between two 4x4 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + VECTORMATH_FORCE_INLINE Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + VECTORMATH_FORCE_INLINE Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + VECTORMATH_FORCE_INLINE Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + VECTORMATH_FORCE_INLINE Transform3( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit VECTORMATH_FORCE_INLINE Transform3( float scalar ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type) + // + explicit VECTORMATH_FORCE_INLINE Transform3( const floatInVec &scalar ); + + // Assign one 3x4 transformation matrix to another + // + VECTORMATH_FORCE_INLINE Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + VECTORMATH_FORCE_INLINE Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + VECTORMATH_FORCE_INLINE Transform3 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3 & setCol2( const Vector3 &col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3 & setCol3( const Vector3 &col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Transform3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE Transform3 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + VECTORMATH_FORCE_INLINE Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, float val ); + + // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type) + // + VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + VECTORMATH_FORCE_INLINE const Point3 operator *( const Point3 &pnt ) const; + + // Multiply two 3x4 transformation matrices + // + VECTORMATH_FORCE_INLINE const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + VECTORMATH_FORCE_INLINE Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static VECTORMATH_FORCE_INLINE const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationX( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationY( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static VECTORMATH_FORCE_INLINE const Transform3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static VECTORMATH_FORCE_INLINE const Transform3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static VECTORMATH_FORCE_INLINE const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static VECTORMATH_FORCE_INLINE const Transform3 rotation( const Quat &unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static VECTORMATH_FORCE_INLINE const Transform3 scale( const Vector3 &scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static VECTORMATH_FORCE_INLINE const Transform3 translation( const Vector3 &translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif diff --git a/test/Bullet2/vectormath/vmInclude.h b/test/Bullet2/vectormath/vmInclude.h new file mode 100644 index 000000000..656514e42 --- /dev/null +++ b/test/Bullet2/vectormath/vmInclude.h @@ -0,0 +1,31 @@ + +#ifndef __VM_INCLUDE_H +#define __VM_INCLUDE_H + +#include "LinearMath/btScalar.h" + +#if defined (USE_SYSTEM_VECTORMATH) || defined (__CELLOS_LV2__) + #include +#else //(USE_SYSTEM_VECTORMATH) + #if defined (BT_USE_SSE) + #include "sse/vectormath_aos.h" + #else //all other platforms + #if defined (BT_USE_NEON) + #include "neon/vectormath_aos.h" + #else + #include "scalar/vectormath_aos.h" + #endif + #endif //(BT_USE_SSE) && defined (_WIN32) +#endif //(USE_SYSTEM_VECTORMATH) + + + +typedef Vectormath::Aos::Vector3 vmVector3; +typedef Vectormath::Aos::Quat vmQuat; +typedef Vectormath::Aos::Matrix3 vmMatrix3; +typedef Vectormath::Aos::Transform3 vmTransform3; +typedef Vectormath::Aos::Point3 vmPoint3; + +#endif //__VM_INCLUDE_H + + diff --git a/test/collision/premake4.lua b/test/collision/premake4.lua index f0a56f600..41269a6ab 100644 --- a/test/collision/premake4.lua +++ b/test/collision/premake4.lua @@ -1,5 +1,5 @@ - project "test_bullet_collision" + project "Test_BulletCollision" kind "ConsoleApp"