diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h new file mode 100644 index 000000000..d18cb15ce --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h @@ -0,0 +1,247 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _BOOLINVEC_H +#define _BOOLINVEC_H + +#include + +namespace Vectormath { + +class floatInVec; + +//-------------------------------------------------------------------------------------------------- +// boolInVec class +// + +class boolInVec +{ + private: + __m128 mData; + + inline boolInVec(__m128 vec); + public: + inline boolInVec() {} + + // matches standard type conversions + // + inline boolInVec(const floatInVec &vec); + + // explicit cast from bool + // + explicit inline boolInVec(bool scalar); + +#ifdef _VECTORMATH_NO_SCALAR_CAST + // explicit cast to bool + // + inline bool getAsBool() const; +#else + // implicit cast to bool + // + inline operator bool() const; +#endif + + // get vector data + // bool value is splatted across all word slots of vector as 0 (false) or -1 (true) + // + inline __m128 get128() const; + + // operators + // + inline const boolInVec operator ! () const; + inline boolInVec& operator = (const boolInVec &vec); + inline boolInVec& operator &= (const boolInVec &vec); + inline boolInVec& operator ^= (const boolInVec &vec); + inline boolInVec& operator |= (const boolInVec &vec); + + // friend functions + // + friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1); + friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1); + friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1); +}; + +//-------------------------------------------------------------------------------------------------- +// boolInVec functions +// + +// operators +// +inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1); +inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1); + +// select between vec0 and vec1 using boolInVec. +// false selects vec0, true selects vec1 +// +inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1); + +} // namespace Vectormath + +//-------------------------------------------------------------------------------------------------- +// boolInVec implementation +// + +#include "floatInVec.h" + +namespace Vectormath { + +inline +boolInVec::boolInVec(__m128 vec) +{ + mData = vec; +} + +inline +boolInVec::boolInVec(const floatInVec &vec) +{ + *this = (vec != floatInVec(0.0f)); +} + +inline +boolInVec::boolInVec(bool scalar) +{ + unsigned int mask = -(int)scalar; + mData = _mm_set1_ps(*(float *)&mask); // TODO: Union +} + +#ifdef _VECTORMATH_NO_SCALAR_CAST +inline +bool +boolInVec::getAsBool() const +#else +inline +boolInVec::operator bool() const +#endif +{ + return *(bool *)&mData; +} + +inline +__m128 +boolInVec::get128() const +{ + return mData; +} + +inline +const boolInVec +boolInVec::operator ! () const +{ + return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps()))); +} + +inline +boolInVec& +boolInVec::operator = (const boolInVec &vec) +{ + mData = vec.mData; + return *this; +} + +inline +boolInVec& +boolInVec::operator &= (const boolInVec &vec) +{ + *this = *this & vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator ^= (const boolInVec &vec) +{ + *this = *this ^ vec; + return *this; +} + +inline +boolInVec& +boolInVec::operator |= (const boolInVec &vec) +{ + *this = *this | vec; + return *this; +} + +inline +const boolInVec +operator == (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator != (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator & (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator | (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator ^ (const boolInVec &vec0, const boolInVec &vec1) +{ + return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1) +{ + return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128())); +} + +} // namespace Vectormath + +#endif // boolInVec_h diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h new file mode 100644 index 000000000..6443865b1 --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h @@ -0,0 +1,340 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _FLOATINVEC_H +#define _FLOATINVEC_H + +#include +#include + +namespace Vectormath { + +class boolInVec; + +//-------------------------------------------------------------------------------------------------- +// floatInVec class +// + +class floatInVec +{ + private: + __m128 mData; + + public: + inline floatInVec(__m128 vec); + + inline floatInVec() {} + + // matches standard type conversions + // + inline floatInVec(const boolInVec &vec); + + // construct from a slot of __m128 + // + inline floatInVec(__m128 vec, int slot); + + // explicit cast from float + // + explicit inline floatInVec(float scalar); + +#ifdef _VECTORMATH_NO_SCALAR_CAST + // explicit cast to float + // + inline float getAsFloat() const; +#else + // implicit cast to float + // + inline operator float() const; +#endif + + // get vector data + // float value is splatted across all word slots of vector + // + inline __m128 get128() const; + + // operators + // + inline const floatInVec operator ++ (int); + inline const floatInVec operator -- (int); + inline floatInVec& operator ++ (); + inline floatInVec& operator -- (); + inline const floatInVec operator - () const; + inline floatInVec& operator = (const floatInVec &vec); + inline floatInVec& operator *= (const floatInVec &vec); + inline floatInVec& operator /= (const floatInVec &vec); + inline floatInVec& operator += (const floatInVec &vec); + inline floatInVec& operator -= (const floatInVec &vec); + + // friend functions + // + friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1); + friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1); +}; + +//-------------------------------------------------------------------------------------------------- +// floatInVec functions +// + +// operators +// +inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1); +inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1); +inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1); + +// select between vec0 and vec1 using boolInVec. +// false selects vec0, true selects vec1 +// +inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1); + +} // namespace Vectormath + +//-------------------------------------------------------------------------------------------------- +// floatInVec implementation +// + +#include "boolInVec.h" + +namespace Vectormath { + +inline +floatInVec::floatInVec(__m128 vec) +{ + mData = vec; +} + +inline +floatInVec::floatInVec(const boolInVec &vec) +{ + mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128()); +} + +inline +floatInVec::floatInVec(__m128 vec, int slot) +{ + SSEFloat v; + v.m128 = vec; + mData = _mm_set1_ps(v.f[slot]); +} + +inline +floatInVec::floatInVec(float scalar) +{ + mData = _mm_set1_ps(scalar); +} + +#ifdef _VECTORMATH_NO_SCALAR_CAST +inline +float +floatInVec::getAsFloat() const +#else +inline +floatInVec::operator float() const +#endif +{ + return *((float *)&mData); +} + +inline +__m128 +floatInVec::get128() const +{ + return mData; +} + +inline +const floatInVec +floatInVec::operator ++ (int) +{ + __m128 olddata = mData; + operator ++(); + return floatInVec(olddata); +} + +inline +const floatInVec +floatInVec::operator -- (int) +{ + __m128 olddata = mData; + operator --(); + return floatInVec(olddata); +} + +inline +floatInVec& +floatInVec::operator ++ () +{ + *this += floatInVec(_mm_set1_ps(1.0f)); + return *this; +} + +inline +floatInVec& +floatInVec::operator -- () +{ + *this -= floatInVec(_mm_set1_ps(1.0f)); + return *this; +} + +inline +const floatInVec +floatInVec::operator - () const +{ + return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData)); +} + +inline +floatInVec& +floatInVec::operator = (const floatInVec &vec) +{ + mData = vec.mData; + return *this; +} + +inline +floatInVec& +floatInVec::operator *= (const floatInVec &vec) +{ + *this = *this * vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator /= (const floatInVec &vec) +{ + *this = *this / vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator += (const floatInVec &vec) +{ + *this = *this + vec; + return *this; +} + +inline +floatInVec& +floatInVec::operator -= (const floatInVec &vec) +{ + *this = *this - vec; + return *this; +} + +inline +const floatInVec +operator * (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +operator / (const floatInVec &num, const floatInVec &den) +{ + return floatInVec(_mm_div_ps(num.get128(), den.get128())); +} + +inline +const floatInVec +operator + (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +operator - (const floatInVec &vec0, const floatInVec &vec1) +{ + return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator < (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128())); +} + +inline +const boolInVec +operator <= (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128())); +} + +inline +const boolInVec +operator > (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator >= (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator == (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128())); +} + +inline +const boolInVec +operator != (const floatInVec &vec0, const floatInVec &vec1) +{ + return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128())); +} + +inline +const floatInVec +select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1) +{ + return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128())); +} + +} // namespace Vectormath + +#endif // floatInVec_h diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h new file mode 100644 index 000000000..6bdc5b3fa --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h @@ -0,0 +1,2187 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_MAT_AOS_CPP_H +#define _VECTORMATH_MAT_AOS_CPP_H + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// Constants +// for shuffles, words are labeled [x,y,z,w] [a,b,c,d] + +#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }) +#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D }) +#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z }) +#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D }) +#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y }) +#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A }) +#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B }) +#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C }) +#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X }) +#define _VECTORMATH_PI_OVER_2 1.570796327f + +//----------------------------------------------------------------------------- +// Definitions + +inline Matrix3::Matrix3( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; +} + +inline Matrix3::Matrix3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +inline Matrix3::Matrix3( const floatInVec &scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); +} + +inline Matrix3::Matrix3( const Quat &unitQuat ) +{ + __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2; + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + __declspec(align(16)) unsigned int sx[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int sz[4] = {0, 0, 0xffffffff, 0}; + __m128 select_x = _mm_load_ps((float *)sx); + __m128 select_z = _mm_load_ps((float *)sz); + + xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() ); + wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) ); + yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) ); + zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) ); + yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) ); + zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) ); + + tmp0 = _mm_mul_ps( yzxw_2, wwww ); // tmp0 = 2yw, 2zw, 2xw, 2w2 + tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) ); // tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2 + tmp2 = _mm_mul_ps( yzxw, xyzw_2 ); // tmp2 = 2xy, 2yz, 2xz, 2w2 + tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 ); // tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2 + tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) ); // tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2 + tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) ); // tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2 + + tmp3 = vec_sel( tmp0, tmp1, select_x ); + tmp4 = vec_sel( tmp1, tmp2, select_x ); + tmp5 = vec_sel( tmp2, tmp0, select_x ); + mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) ); + mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) ); + mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) ); +} + +inline Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; +} + +inline Matrix3 & Matrix3::setCol0( const Vector3 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix3 & Matrix3::setCol1( const Vector3 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix3 & Matrix3::setCol2( const Vector3 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix3 & Matrix3::setCol( int col, const Vector3 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + return *this; +} + +inline Matrix3 & Matrix3::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +inline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline const floatInVec Matrix3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Matrix3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Matrix3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Matrix3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Matrix3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::getRow( int row ) const +{ + return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) ); +} + +inline Vector3 & Matrix3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Matrix3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix3 & Matrix3::operator =( const Matrix3 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + return *this; +} + +inline const Matrix3 transpose( const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, res0, res1, res2; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + res0 = vec_mergeh( tmp0, mat.getCol1().get128() ); + //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + res1 = vec_sel(res1, mat.getCol1().get128(), select_y); + //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y); + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +inline const Matrix3 inverse( const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2; + tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() ); + tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() ); + tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() ); + dot = _vmathVfDot3( tmp2, mat.getCol2().get128() ); + dot = vec_splat( dot, 0 ); + invdet = recipf4( dot ); + tmp3 = vec_mergeh( tmp0, tmp2 ); + tmp4 = vec_mergel( tmp0, tmp2 ); + inv0 = vec_mergeh( tmp3, tmp1 ); + //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX ); + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tmp1, select_y); + //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y); + inv0 = vec_mul( inv0, invdet ); + inv1 = vec_mul( inv1, invdet ); + inv2 = vec_mul( inv2, invdet ); + return Matrix3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ) + ); +} + +inline const floatInVec determinant( const Matrix3 & mat ) +{ + return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) ); +} + +inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ) + ); +} + +inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const +{ + return Matrix3( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix3 Matrix3::operator -( ) const +{ + return Matrix3( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ) + ); +} + +inline const Matrix3 absPerElem( const Matrix3 & mat ) +{ + return Matrix3( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +inline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const +{ + return Matrix3( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ) + ); +} + +inline Matrix3 & Matrix3::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +inline Matrix3 & Matrix3::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ) +{ + return floatInVec(scalar) * mat; +} + +inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ) +{ + return mat * scalar; +} + +inline const Vector3 Matrix3::operator *( const Vector3 &vec ) const +{ + __m128 res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( vec.get128(), 0 ); + yyyy = vec_splat( vec.get128(), 1 ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_mul( mCol0.get128(), xxxx ); + res = vec_madd( mCol1.get128(), yyyy, res ); + res = vec_madd( mCol2.get128(), zzzz, res ); + return Vector3( res ); +} + +inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const +{ + return Matrix3( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ) + ); +} + +inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ) +{ + return Matrix3( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ) + ); +} + +inline const Matrix3 Matrix3::identity( ) +{ + return Matrix3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +inline const Matrix3 Matrix3::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Matrix3( + Vector3::xAxis( ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +inline const Matrix3 Matrix3::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +inline const Matrix3 Matrix3::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Matrix3( + Vector3( res0 ), + Vector3::yAxis( ), + Vector3( res2 ) + ); +} + +inline const Matrix3 Matrix3::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +inline const Matrix3 Matrix3::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + __m128 zero; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3::zAxis( ) + ); +} + +inline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + __declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Matrix3( + Vector3( vec_mul( Z0, Y0 ) ), + Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +inline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; + axis = unitVec.get128(); + sincosf4( radians.get128(), &s, &c ); + xxxx = vec_splat( axis, 0 ); + yyyy = vec_splat( axis, 1 ); + zzzz = vec_splat( axis, 2 ); + oneMinusC = vec_sub( _mm_set1_ps(1.0f), c ); + axisS = vec_mul( axis, s ); + negAxisS = negatef4( axisS ); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX ); + tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) ); + tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z); + //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX ); + tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x ); + //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX ); + tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) ); + tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y); + tmp0 = vec_sel( tmp0, c, select_x ); + tmp1 = vec_sel( tmp1, c, select_y ); + tmp2 = vec_sel( tmp2, c, select_z ); + return Matrix3( + Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ), + Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ), + Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ) + ); +} + +inline const Matrix3 Matrix3::rotation( const Quat &unitQuat ) +{ + return Matrix3( unitQuat ); +} + +inline const Matrix3 Matrix3::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Matrix3( + Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ) + ); +} + +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ) +{ + return Matrix3( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ) + ); +} + +inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ) +{ + return Matrix3( + mulPerElem( mat.getCol0(), scaleVec ), + mulPerElem( mat.getCol1(), scaleVec ), + mulPerElem( mat.getCol2(), scaleVec ) + ); +} + +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ) +{ + return Matrix3( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix3 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); +} + +inline void print( const Matrix3 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Matrix4::Matrix4( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; +} + +inline Matrix4::Matrix4( float scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +inline Matrix4::Matrix4( const floatInVec &scalar ) +{ + mCol0 = Vector4( scalar ); + mCol1 = Vector4( scalar ); + mCol2 = Vector4( scalar ); + mCol3 = Vector4( scalar ); +} + +inline Matrix4::Matrix4( const Transform3 & mat ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( mat.getCol3(), 1.0f ); +} + +inline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec ) +{ + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec ) +{ + Matrix3 mat; + mat = Matrix3( unitQuat ); + mCol0 = Vector4( mat.getCol0(), 0.0f ); + mCol1 = Vector4( mat.getCol1(), 0.0f ); + mCol2 = Vector4( mat.getCol2(), 0.0f ); + mCol3 = Vector4( translateVec, 1.0f ); +} + +inline Matrix4 & Matrix4::setCol0( const Vector4 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Matrix4 & Matrix4::setCol1( const Vector4 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Matrix4 & Matrix4::setCol2( const Vector4 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Matrix4 & Matrix4::setCol3( const Vector4 &_col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Matrix4 & Matrix4::setCol( int col, const Vector4 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Matrix4 & Matrix4::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +inline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val ) +{ + Vector4 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline const floatInVec Matrix4::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector4 Matrix4::getCol0( ) const +{ + return mCol0; +} + +inline const Vector4 Matrix4::getCol1( ) const +{ + return mCol1; +} + +inline const Vector4 Matrix4::getCol2( ) const +{ + return mCol2; +} + +inline const Vector4 Matrix4::getCol3( ) const +{ + return mCol3; +} + +inline const Vector4 Matrix4::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector4 & Matrix4::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector4 Matrix4::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Matrix4 & Matrix4::operator =( const Matrix4 & mat ) +{ + mCol0 = mat.mCol0; + mCol1 = mat.mCol1; + mCol2 = mat.mCol2; + mCol3 = mat.mCol3; + return *this; +} + +inline const Matrix4 transpose( const Matrix4 & mat ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() ); + tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() ); + res0 = vec_mergeh( tmp0, tmp1 ); + res1 = vec_mergel( tmp0, tmp1 ); + res2 = vec_mergeh( tmp2, tmp3 ); + res3 = vec_mergel( tmp2, tmp3 ); + return Matrix4( + Vector4( res0 ), + Vector4( res1 ), + Vector4( res2 ), + Vector4( res3 ) + ); +} + +// TODO: Tidy +static __declspec(align(16)) const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000}; +static __declspec(align(16)) const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000}; +static __declspec(align(16)) const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f}; + +inline const Matrix4 inverse( const Matrix4 & mat ) +{ + __m128 Va,Vb,Vc; + __m128 r1,r2,r3,tt,tt2; + __m128 sum,Det,RDet; + __m128 trns0,trns1,trns2,trns3; + + __m128 _L1 = mat.getCol0().get128(); + __m128 _L2 = mat.getCol1().get128(); + __m128 _L3 = mat.getCol2().get128(); + __m128 _L4 = mat.getCol3().get128(); + // Calculating the minterms for the first line. + + // _mm_ror_ps is just a macro using _mm_shuffle_ps(). + tt = _L4; tt2 = _mm_ror_ps(_L3,1); + Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3'·V4 + Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3'·V4" + Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3'·V4^ + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3"·V4^ - V3^·V4" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^·V4' - V3'·V4^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3'·V4" - V3"·V4' + + tt = _L2; + Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1); + Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2)); + Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3)); + + // Calculating the determinant. + Det = _mm_mul_ps(sum,_L1); + Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det)); + + const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN); + const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP); + + __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN); + + // Calculating the minterms of the second line (using previous results). + tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP); + + // Testing the determinant. + Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1)); + + // Calculating the minterms of the third line. + tt = _mm_ror_ps(_L1,1); + Va = _mm_mul_ps(tt,Vb); // V1'·V2" + Vb = _mm_mul_ps(tt,Vc); // V1'·V2^ + Vc = _mm_mul_ps(tt,_L2); // V1'·V2 + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V1"·V2^ - V1^·V2" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V1^·V2' - V1'·V2^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V1'·V2" - V1"·V2' + + tt = _mm_ror_ps(_L4,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN); + + // Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs). + RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f? + RDet = _mm_shuffle_ps(RDet,RDet,0x00); + + // Devide the first 12 minterms with the determinant. + mtL1 = _mm_mul_ps(mtL1, RDet); + mtL2 = _mm_mul_ps(mtL2, RDet); + mtL3 = _mm_mul_ps(mtL3, RDet); + + // Calculate the minterms of the forth line and devide by the determinant. + tt = _mm_ror_ps(_L3,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP); + mtL4 = _mm_mul_ps(mtL4, RDet); + + // Now we just have to transpose the minterms matrix. + trns0 = _mm_unpacklo_ps(mtL1,mtL2); + trns1 = _mm_unpacklo_ps(mtL3,mtL4); + trns2 = _mm_unpackhi_ps(mtL1,mtL2); + trns3 = _mm_unpackhi_ps(mtL3,mtL4); + _L1 = _mm_movelh_ps(trns0,trns1); + _L2 = _mm_movehl_ps(trns1,trns0); + _L3 = _mm_movelh_ps(trns2,trns3); + _L4 = _mm_movehl_ps(trns3,trns2); + + return Matrix4( + Vector4( _L1 ), + Vector4( _L2 ), + Vector4( _L3 ), + Vector4( _L4 ) + ); +} + +inline const Matrix4 affineInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( inverse( affineMat ) ); +} + +inline const Matrix4 orthoInverse( const Matrix4 & mat ) +{ + Transform3 affineMat; + affineMat.setCol0( mat.getCol0().getXYZ( ) ); + affineMat.setCol1( mat.getCol1().getXYZ( ) ); + affineMat.setCol2( mat.getCol2().getXYZ( ) ); + affineMat.setCol3( mat.getCol3().getXYZ( ) ); + return Matrix4( orthoInverse( affineMat ) ); +} + +inline const floatInVec determinant( const Matrix4 & mat ) +{ + __m128 Va,Vb,Vc; + __m128 r1,r2,r3,tt,tt2; + __m128 sum,Det; + + __m128 _L1 = mat.getCol0().get128(); + __m128 _L2 = mat.getCol1().get128(); + __m128 _L3 = mat.getCol2().get128(); + __m128 _L4 = mat.getCol3().get128(); + // Calculating the minterms for the first line. + + // _mm_ror_ps is just a macro using _mm_shuffle_ps(). + tt = _L4; tt2 = _mm_ror_ps(_L3,1); + Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3'·V4 + Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3'·V4" + Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3'·V4^ + + r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3"·V4^ - V3^·V4" + r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^·V4' - V3'·V4^ + r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3'·V4" - V3"·V4' + + tt = _L2; + Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1); + Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2)); + Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3)); + + // Calculating the determinant. + Det = _mm_mul_ps(sum,_L1); + Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det)); + + // Calculating the minterms of the second line (using previous results). + tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2)); + tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3)); + + // Testing the determinant. + Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1)); + return floatInVec(Det, 0); +} + +inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 + mat.mCol0 ), + ( mCol1 + mat.mCol1 ), + ( mCol2 + mat.mCol2 ), + ( mCol3 + mat.mCol3 ) + ); +} + +inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const +{ + return Matrix4( + ( mCol0 - mat.mCol0 ), + ( mCol1 - mat.mCol1 ), + ( mCol2 - mat.mCol2 ), + ( mCol3 - mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat ) +{ + *this = *this + mat; + return *this; +} + +inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat ) +{ + *this = *this - mat; + return *this; +} + +inline const Matrix4 Matrix4::operator -( ) const +{ + return Matrix4( + ( -mCol0 ), + ( -mCol1 ), + ( -mCol2 ), + ( -mCol3 ) + ); +} + +inline const Matrix4 absPerElem( const Matrix4 & mat ) +{ + return Matrix4( + absPerElem( mat.getCol0() ), + absPerElem( mat.getCol1() ), + absPerElem( mat.getCol2() ), + absPerElem( mat.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +inline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const +{ + return Matrix4( + ( mCol0 * scalar ), + ( mCol1 * scalar ), + ( mCol2 * scalar ), + ( mCol3 * scalar ) + ); +} + +inline Matrix4 & Matrix4::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +inline Matrix4 & Matrix4::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ) +{ + return floatInVec(scalar) * mat; +} + +inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ) +{ + return mat * scalar; +} + +inline const Vector4 Matrix4::operator *( const Vector4 &vec ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3))))) + ); +} + +inline const Vector4 Matrix4::operator *( const Vector3 &vec ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2)))) + ); +} + +inline const Vector4 Matrix4::operator *( const Point3 &pnt ) const +{ + return Vector4( + _mm_add_ps( + _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))), + _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128())) + ); +} + +inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const +{ + return Matrix4( + ( *this * mat.mCol0 ), + ( *this * mat.mCol1 ), + ( *this * mat.mCol2 ), + ( *this * mat.mCol3 ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat ) +{ + *this = *this * mat; + return *this; +} + +inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const +{ + return Matrix4( + ( *this * tfrm.getCol0() ), + ( *this * tfrm.getCol1() ), + ( *this * tfrm.getCol2() ), + ( *this * Point3( tfrm.getCol3() ) ) + ); +} + +inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ) +{ + return Matrix4( + mulPerElem( mat0.getCol0(), mat1.getCol0() ), + mulPerElem( mat0.getCol1(), mat1.getCol1() ), + mulPerElem( mat0.getCol2(), mat1.getCol2() ), + mulPerElem( mat0.getCol3(), mat1.getCol3() ) + ); +} + +inline const Matrix4 Matrix4::identity( ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 ) +{ + mCol0.setXYZ( mat3.getCol0() ); + mCol1.setXYZ( mat3.getCol1() ); + mCol2.setXYZ( mat3.getCol2() ); + return *this; +} + +inline const Matrix3 Matrix4::getUpper3x3( ) const +{ + return Matrix3( + mCol0.getXYZ( ), + mCol1.getXYZ( ), + mCol2.getXYZ( ) + ); +} + +inline Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec ) +{ + mCol3.setXYZ( translateVec ); + return *this; +} + +inline const Vector3 Matrix4::getTranslation( ) const +{ + return mCol3.getXYZ( ); +} + +inline const Matrix4 Matrix4::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +inline const Matrix4 Matrix4::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Matrix4( + Vector4::xAxis( ), + Vector4( res1 ), + Vector4( res2 ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +inline const Matrix4 Matrix4::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Matrix4( + Vector4( res0 ), + Vector4::yAxis( ), + Vector4( res2 ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +inline const Matrix4 Matrix4::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + __m128 zero; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Matrix4( + Vector4( res0 ), + Vector4( res1 ), + Vector4::zAxis( ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + __declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Matrix4( + Vector4( vec_mul( Z0, Y0 ) ), + Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +inline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2; + axis = unitVec.get128(); + sincosf4( radians.get128(), &s, &c ); + xxxx = vec_splat( axis, 0 ); + yyyy = vec_splat( axis, 1 ); + zzzz = vec_splat( axis, 2 ); + oneMinusC = vec_sub( _mm_set1_ps(1.0f), c ); + axisS = vec_mul( axis, s ); + negAxisS = negatef4( axisS ); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX ); + tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) ); + tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z); + //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX ); + tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x ); + //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX ); + tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) ); + tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y); + tmp0 = vec_sel( tmp0, c, select_x ); + tmp1 = vec_sel( tmp1, c, select_y ); + tmp2 = vec_sel( tmp2, c, select_z ); + __declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) ); + tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) ); + tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) ); + tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) ); + return Matrix4( + Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ), + Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ), + Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 Matrix4::rotation( const Quat &unitQuat ) +{ + return Matrix4( Transform3::rotation( unitQuat ) ); +} + +inline const Matrix4 Matrix4::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Matrix4( + Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ), + Vector4::wAxis( ) + ); +} + +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ) +{ + return Matrix4( + ( mat.getCol0() * scaleVec.getX( ) ), + ( mat.getCol1() * scaleVec.getY( ) ), + ( mat.getCol2() * scaleVec.getZ( ) ), + mat.getCol3() + ); +} + +inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ) +{ + Vector4 scale4; + scale4 = Vector4( scaleVec, 1.0f ); + return Matrix4( + mulPerElem( mat.getCol0(), scale4 ), + mulPerElem( mat.getCol1(), scale4 ), + mulPerElem( mat.getCol2(), scale4 ), + mulPerElem( mat.getCol3(), scale4 ) + ); +} + +inline const Matrix4 Matrix4::translation( const Vector3 &translateVec ) +{ + return Matrix4( + Vector4::xAxis( ), + Vector4::yAxis( ), + Vector4::zAxis( ), + Vector4( translateVec, 1.0f ) + ); +} + +inline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ) +{ + Matrix4 m4EyeFrame; + Vector3 v3X, v3Y, v3Z; + v3Y = normalize( upVec ); + v3Z = normalize( ( eyePos - lookAtPos ) ); + v3X = normalize( cross( v3Y, v3Z ) ); + v3Y = cross( v3Z, v3X ); + m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) ); + return orthoInverse( m4EyeFrame ); +} + +inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar ) +{ + float f, rangeInv; + __m128 zero, col0, col1, col2, col3; + union { __m128 v; float s[4]; } tmp; + f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f ); + rangeInv = 1.0f / ( zNear - zFar ); + zero = _mm_setzero_ps(); + tmp.v = zero; + tmp.s[0] = f / aspect; + col0 = tmp.v; + tmp.v = zero; + tmp.s[1] = f; + col1 = tmp.v; + tmp.v = zero; + tmp.s[2] = ( zNear + zFar ) * rangeInv; + tmp.s[3] = -1.0f; + col2 = tmp.v; + tmp.v = zero; + tmp.s[2] = zNear * zFar * rangeInv * 2.0f; + col3 = tmp.v; + return Matrix4( + Vector4( col0 ), + Vector4( col1 ), + Vector4( col2 ), + Vector4( col3 ) + ); +} + +inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + /* function implementation based on code from STIDC SDK: */ + /* -------------------------------------------------------------- */ + /* PLEASE DO NOT MODIFY THIS SECTION */ + /* This prolog section is automatically generated. */ + /* */ + /* (C)Copyright */ + /* Sony Computer Entertainment, Inc., */ + /* Toshiba Corporation, */ + /* International Business Machines Corporation, */ + /* 2001,2002. */ + /* S/T/I Confidential Information */ + /* -------------------------------------------------------------- */ + __m128 lbf, rtn; + __m128 diff, sum, inv_diff; + __m128 diagonal, column, near2; + __m128 zero = _mm_setzero_ps(); + union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union? + l.s[0] = left; + f.s[0] = zFar; + r.s[0] = right; + n.s[0] = zNear; + b.s[0] = bottom; + t.s[0] = top; + lbf = vec_mergeh( l.v, f.v ); + rtn = vec_mergeh( r.v, n.v ); + lbf = vec_mergeh( lbf, b.v ); + rtn = vec_mergeh( rtn, t.v ); + diff = vec_sub( rtn, lbf ); + sum = vec_add( rtn, lbf ); + inv_diff = recipf4( diff ); + near2 = vec_splat( n.v, 0 ); + near2 = vec_add( near2, near2 ); + diagonal = vec_mul( near2, inv_diff ); + column = vec_mul( sum, inv_diff ); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + __declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + return Matrix4( + Vector4( vec_sel( zero, diagonal, select_x ) ), + Vector4( vec_sel( zero, diagonal, select_y ) ), + Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ), + Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) ) + ); +} + +inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar ) +{ + /* function implementation based on code from STIDC SDK: */ + /* -------------------------------------------------------------- */ + /* PLEASE DO NOT MODIFY THIS SECTION */ + /* This prolog section is automatically generated. */ + /* */ + /* (C)Copyright */ + /* Sony Computer Entertainment, Inc., */ + /* Toshiba Corporation, */ + /* International Business Machines Corporation, */ + /* 2001,2002. */ + /* S/T/I Confidential Information */ + /* -------------------------------------------------------------- */ + __m128 lbf, rtn; + __m128 diff, sum, inv_diff, neg_inv_diff; + __m128 diagonal, column; + __m128 zero = _mm_setzero_ps(); + union { __m128 v; float s[4]; } l, f, r, n, b, t; + l.s[0] = left; + f.s[0] = zFar; + r.s[0] = right; + n.s[0] = zNear; + b.s[0] = bottom; + t.s[0] = top; + lbf = vec_mergeh( l.v, f.v ); + rtn = vec_mergeh( r.v, n.v ); + lbf = vec_mergeh( lbf, b.v ); + rtn = vec_mergeh( rtn, t.v ); + diff = vec_sub( rtn, lbf ); + sum = vec_add( rtn, lbf ); + inv_diff = recipf4( diff ); + neg_inv_diff = negatef4( inv_diff ); + diagonal = vec_add( inv_diff, inv_diff ); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + __declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero + return Matrix4( + Vector4( vec_sel( zero, diagonal, select_x ) ), + Vector4( vec_sel( zero, diagonal, select_y ) ), + Vector4( vec_sel( zero, diagonal, select_z ) ), + Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) ) + ); +} + +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ) +{ + return Matrix4( + select( mat0.getCol0(), mat1.getCol0(), select1 ), + select( mat0.getCol1(), mat1.getCol1(), select1 ), + select( mat0.getCol2(), mat1.getCol2(), select1 ), + select( mat0.getCol3(), mat1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Matrix4 & mat ) +{ + print( mat.getRow( 0 ) ); + print( mat.getRow( 1 ) ); + print( mat.getRow( 2 ) ); + print( mat.getRow( 3 ) ); +} + +inline void print( const Matrix4 & mat, const char * name ) +{ + printf("%s:\n", name); + print( mat ); +} + +#endif + +inline Transform3::Transform3( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; +} + +inline Transform3::Transform3( float scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +inline Transform3::Transform3( const floatInVec &scalar ) +{ + mCol0 = Vector3( scalar ); + mCol1 = Vector3( scalar ); + mCol2 = Vector3( scalar ); + mCol3 = Vector3( scalar ); +} + +inline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 ) +{ + mCol0 = _col0; + mCol1 = _col1; + mCol2 = _col2; + mCol3 = _col3; +} + +inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ) +{ + this->setUpper3x3( tfrm ); + this->setTranslation( translateVec ); +} + +inline Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec ) +{ + this->setUpper3x3( Matrix3( unitQuat ) ); + this->setTranslation( translateVec ); +} + +inline Transform3 & Transform3::setCol0( const Vector3 &_col0 ) +{ + mCol0 = _col0; + return *this; +} + +inline Transform3 & Transform3::setCol1( const Vector3 &_col1 ) +{ + mCol1 = _col1; + return *this; +} + +inline Transform3 & Transform3::setCol2( const Vector3 &_col2 ) +{ + mCol2 = _col2; + return *this; +} + +inline Transform3 & Transform3::setCol3( const Vector3 &_col3 ) +{ + mCol3 = _col3; + return *this; +} + +inline Transform3 & Transform3::setCol( int col, const Vector3 &vec ) +{ + *(&mCol0 + col) = vec; + return *this; +} + +inline Transform3 & Transform3::setRow( int row, const Vector4 &vec ) +{ + mCol0.setElem( row, vec.getElem( 0 ) ); + mCol1.setElem( row, vec.getElem( 1 ) ); + mCol2.setElem( row, vec.getElem( 2 ) ); + mCol3.setElem( row, vec.getElem( 3 ) ); + return *this; +} + +inline Transform3 & Transform3::setElem( int col, int row, float val ) +{ + (*this)[col].setElem(row, val); + return *this; +} + +inline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val ) +{ + Vector3 tmpV3_0; + tmpV3_0 = this->getCol( col ); + tmpV3_0.setElem( row, val ); + this->setCol( col, tmpV3_0 ); + return *this; +} + +inline const floatInVec Transform3::getElem( int col, int row ) const +{ + return this->getCol( col ).getElem( row ); +} + +inline const Vector3 Transform3::getCol0( ) const +{ + return mCol0; +} + +inline const Vector3 Transform3::getCol1( ) const +{ + return mCol1; +} + +inline const Vector3 Transform3::getCol2( ) const +{ + return mCol2; +} + +inline const Vector3 Transform3::getCol3( ) const +{ + return mCol3; +} + +inline const Vector3 Transform3::getCol( int col ) const +{ + return *(&mCol0 + col); +} + +inline const Vector4 Transform3::getRow( int row ) const +{ + return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) ); +} + +inline Vector3 & Transform3::operator []( int col ) +{ + return *(&mCol0 + col); +} + +inline const Vector3 Transform3::operator []( int col ) const +{ + return *(&mCol0 + col); +} + +inline Transform3 & Transform3::operator =( const Transform3 & tfrm ) +{ + mCol0 = tfrm.mCol0; + mCol1 = tfrm.mCol1; + mCol2 = tfrm.mCol2; + mCol3 = tfrm.mCol3; + return *this; +} + +inline const Transform3 inverse( const Transform3 & tfrm ) +{ + __m128 inv0, inv1, inv2, inv3; + __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet; + __m128 xxxx, yyyy, zzzz; + tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() ); + tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() ); + tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() ); + inv3 = negatef4( tfrm.getCol3().get128() ); + dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() ); + dot = vec_splat( dot, 0 ); + invdet = recipf4( dot ); + tmp3 = vec_mergeh( tmp0, tmp2 ); + tmp4 = vec_mergel( tmp0, tmp2 ); + inv0 = vec_mergeh( tmp3, tmp1 ); + xxxx = vec_splat( inv3, 0 ); + //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX ); + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tmp1, select_y); + //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y); + yyyy = vec_splat( inv3, 1 ); + zzzz = vec_splat( inv3, 2 ); + inv3 = vec_mul( inv0, xxxx ); + inv3 = vec_madd( inv1, yyyy, inv3 ); + inv3 = vec_madd( inv2, zzzz, inv3 ); + inv0 = vec_mul( inv0, invdet ); + inv1 = vec_mul( inv1, invdet ); + inv2 = vec_mul( inv2, invdet ); + inv3 = vec_mul( inv3, invdet ); + return Transform3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ), + Vector3( inv3 ) + ); +} + +inline const Transform3 orthoInverse( const Transform3 & tfrm ) +{ + __m128 inv0, inv1, inv2, inv3; + __m128 tmp0, tmp1; + __m128 xxxx, yyyy, zzzz; + tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() ); + tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() ); + inv3 = negatef4( tfrm.getCol3().get128() ); + inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() ); + xxxx = vec_splat( inv3, 0 ); + //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y); + //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y); + yyyy = vec_splat( inv3, 1 ); + zzzz = vec_splat( inv3, 2 ); + inv3 = vec_mul( inv0, xxxx ); + inv3 = vec_madd( inv1, yyyy, inv3 ); + inv3 = vec_madd( inv2, zzzz, inv3 ); + return Transform3( + Vector3( inv0 ), + Vector3( inv1 ), + Vector3( inv2 ), + Vector3( inv3 ) + ); +} + +inline const Transform3 absPerElem( const Transform3 & tfrm ) +{ + return Transform3( + absPerElem( tfrm.getCol0() ), + absPerElem( tfrm.getCol1() ), + absPerElem( tfrm.getCol2() ), + absPerElem( tfrm.getCol3() ) + ); +} + +inline const Vector3 Transform3::operator *( const Vector3 &vec ) const +{ + __m128 res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( vec.get128(), 0 ); + yyyy = vec_splat( vec.get128(), 1 ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_mul( mCol0.get128(), xxxx ); + res = vec_madd( mCol1.get128(), yyyy, res ); + res = vec_madd( mCol2.get128(), zzzz, res ); + return Vector3( res ); +} + +inline const Point3 Transform3::operator *( const Point3 &pnt ) const +{ + __m128 tmp0, tmp1, res; + __m128 xxxx, yyyy, zzzz; + xxxx = vec_splat( pnt.get128(), 0 ); + yyyy = vec_splat( pnt.get128(), 1 ); + zzzz = vec_splat( pnt.get128(), 2 ); + tmp0 = vec_mul( mCol0.get128(), xxxx ); + tmp1 = vec_mul( mCol1.get128(), yyyy ); + tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 ); + tmp1 = vec_add( mCol3.get128(), tmp1 ); + res = vec_add( tmp0, tmp1 ); + return Point3( res ); +} + +inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const +{ + return Transform3( + ( *this * tfrm.mCol0 ), + ( *this * tfrm.mCol1 ), + ( *this * tfrm.mCol2 ), + Vector3( ( *this * Point3( tfrm.mCol3 ) ) ) + ); +} + +inline Transform3 & Transform3::operator *=( const Transform3 & tfrm ) +{ + *this = *this * tfrm; + return *this; +} + +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ) +{ + return Transform3( + mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ), + mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ), + mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ), + mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() ) + ); +} + +inline const Transform3 Transform3::identity( ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm ) +{ + mCol0 = tfrm.getCol0(); + mCol1 = tfrm.getCol1(); + mCol2 = tfrm.getCol2(); + return *this; +} + +inline const Matrix3 Transform3::getUpper3x3( ) const +{ + return Matrix3( mCol0, mCol1, mCol2 ); +} + +inline Transform3 & Transform3::setTranslation( const Vector3 &translateVec ) +{ + mCol3 = translateVec; + return *this; +} + +inline const Vector3 Transform3::getTranslation( ) const +{ + return mCol3; +} + +inline const Transform3 Transform3::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +inline const Transform3 Transform3::rotationX( const floatInVec &radians ) +{ + __m128 s, c, res1, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res1 = vec_sel( zero, c, select_y ); + res1 = vec_sel( res1, s, select_z ); + res2 = vec_sel( zero, negatef4(s), select_y ); + res2 = vec_sel( res2, c, select_z ); + return Transform3( + Vector3::xAxis( ), + Vector3( res1 ), + Vector3( res2 ), + Vector3( _mm_setzero_ps() ) + ); +} + +inline const Transform3 Transform3::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +inline const Transform3 Transform3::rotationY( const floatInVec &radians ) +{ + __m128 s, c, res0, res2; + __m128 zero; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, negatef4(s), select_z ); + res2 = vec_sel( zero, s, select_x ); + res2 = vec_sel( res2, c, select_z ); + return Transform3( + Vector3( res0 ), + Vector3::yAxis( ), + Vector3( res2 ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +inline const Transform3 Transform3::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, res0, res1; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __m128 zero = _mm_setzero_ps(); + sincosf4( radians.get128(), &s, &c ); + res0 = vec_sel( zero, c, select_x ); + res0 = vec_sel( res0, s, select_y ); + res1 = vec_sel( zero, negatef4(s), select_x ); + res1 = vec_sel( res1, c, select_y ); + return Transform3( + Vector3( res0 ), + Vector3( res1 ), + Vector3::zAxis( ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ ) +{ + __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp; + angles = Vector4( radiansXYZ, 0.0f ).get128(); + sincosf4( angles, &s, &c ); + negS = negatef4( s ); + Z0 = vec_mergel( c, s ); + Z1 = vec_mergel( negS, c ); + __declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0}; + Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) ); + Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) ); + Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) ); + X0 = vec_splat( s, 0 ); + X1 = vec_splat( c, 0 ); + tmp = vec_mul( Z0, Y1 ); + return Transform3( + Vector3( vec_mul( Z0, Y0 ) ), + Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ), + Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +inline const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::rotation( const Quat &unitQuat ) +{ + return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) ); +} + +inline const Transform3 Transform3::scale( const Vector3 &scaleVec ) +{ + __m128 zero = _mm_setzero_ps(); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + return Transform3( + Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ), + Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ), + Vector3( 0.0f ) + ); +} + +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ) +{ + return Transform3( + ( tfrm.getCol0() * scaleVec.getX( ) ), + ( tfrm.getCol1() * scaleVec.getY( ) ), + ( tfrm.getCol2() * scaleVec.getZ( ) ), + tfrm.getCol3() + ); +} + +inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ) +{ + return Transform3( + mulPerElem( tfrm.getCol0(), scaleVec ), + mulPerElem( tfrm.getCol1(), scaleVec ), + mulPerElem( tfrm.getCol2(), scaleVec ), + mulPerElem( tfrm.getCol3(), scaleVec ) + ); +} + +inline const Transform3 Transform3::translation( const Vector3 &translateVec ) +{ + return Transform3( + Vector3::xAxis( ), + Vector3::yAxis( ), + Vector3::zAxis( ), + translateVec + ); +} + +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ) +{ + return Transform3( + select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ), + select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ), + select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ), + select( tfrm0.getCol3(), tfrm1.getCol3(), select1 ) + ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Transform3 & tfrm ) +{ + print( tfrm.getRow( 0 ) ); + print( tfrm.getRow( 1 ) ); + print( tfrm.getRow( 2 ) ); +} + +inline void print( const Transform3 & tfrm, const char * name ) +{ + printf("%s:\n", name); + print( tfrm ); +} + +#endif + +inline Quat::Quat( const Matrix3 & tfrm ) +{ + __m128 res; + __m128 col0, col1, col2; + __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff; + __m128 zy_xz_yx, yz_zx_xy, sum, diff; + __m128 radicand, invSqrt, scale; + __m128 res0, res1, res2, res3; + __m128 xx, yy, zz; + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + __declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff}; + + col0 = tfrm.getCol0().get128(); + col1 = tfrm.getCol1().get128(); + col2 = tfrm.getCol2().get128(); + + /* four cases: */ + /* trace > 0 */ + /* else */ + /* xx largest diagonal element */ + /* yy largest diagonal element */ + /* zz largest diagonal element */ + + /* compute quaternion for each case */ + + xx_yy = vec_sel( col0, col1, select_y ); + //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX ); + //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY ); + //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC ); + xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) ); + xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck + yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) ); + zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) ); + + diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); + diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz ); + radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) ); + invSqrt = rsqrtf4( radicand ); + + zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03 + //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX ); + zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) ); // zy_xz_yx = 12 12 01 00 + zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y ); // zy_xz_yx = 12 20 01 00 + yz_zx_xy = vec_sel( col0, col1, select_x ); // yz_zx_xy = 10 01 02 03 + //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX ); + yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) ); // yz_zx_xy = 10 02 10 10 + yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x ); // yz_zx_xy = 21 02 10 10 + + sum = vec_add( zy_xz_yx, yz_zx_xy ); + diff = vec_sub( zy_xz_yx, yz_zx_xy ); + + scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) ); + + //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA ); + res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) ); + res0 = vec_sel( res0, vec_splat(diff, 0), select_w ); // TODO: Ck + //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB ); + res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) ); + res1 = vec_sel( res1, vec_splat(diff, 1), select_w ); // TODO: Ck + //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC ); + res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) ); + res2 = vec_sel( res2, vec_splat(diff, 2), select_w ); // TODO: Ck + res3 = diff; + res0 = vec_sel( res0, radicand, select_x ); + res1 = vec_sel( res1, radicand, select_y ); + res2 = vec_sel( res2, radicand, select_z ); + res3 = vec_sel( res3, radicand, select_w ); + res0 = vec_mul( res0, vec_splat( scale, 0 ) ); + res1 = vec_mul( res1, vec_splat( scale, 1 ) ); + res2 = vec_mul( res2, vec_splat( scale, 2 ) ); + res3 = vec_mul( res3, vec_splat( scale, 3 ) ); + + /* determine case and select answer */ + + xx = vec_splat( col0, 0 ); + yy = vec_splat( col1, 1 ); + zz = vec_splat( col2, 2 ); + res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) ); + res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) ); + res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) ); + mVec128 = res; +} + +inline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 ) +{ + return Matrix3( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ) + ); +} + +inline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 ) +{ + return Matrix4( + ( tfrm0 * tfrm1.getX( ) ), + ( tfrm0 * tfrm1.getY( ) ), + ( tfrm0 * tfrm1.getZ( ) ), + ( tfrm0 * tfrm1.getW( ) ) + ); +} + +inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ) +{ + __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res; + __m128 xxxx, yyyy, zzzz; + tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() ); + tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() ); + xxxx = vec_splat( vec.get128(), 0 ); + mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() ); + //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX ); + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2)); + mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y); + //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX ); + mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0)); + mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y); + yyyy = vec_splat( vec.get128(), 1 ); + res = vec_mul( mcol0, xxxx ); + zzzz = vec_splat( vec.get128(), 2 ); + res = vec_madd( mcol1, yyyy, res ); + res = vec_madd( mcol2, zzzz, res ); + return Vector3( res ); +} + +inline const Matrix3 crossMatrix( const Vector3 &vec ) +{ + __m128 neg, res0, res1, res2; + neg = negatef4( vec.get128() ); + __declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0}; + //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX ); + res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) ); + res0 = vec_sel(res0, vec_splat(neg, 1), select_z); + //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX ); + res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x); + //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX ); + res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) ); + res2 = vec_sel(res2, vec_splat(neg, 0), select_y); + __declspec(align(16)) unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff}; + __declspec(align(16)) unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff}; + __declspec(align(16)) unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff}; + res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) ); + res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) ); + res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects? + return Matrix3( + Vector3( res0 ), + Vector3( res1 ), + Vector3( res2 ) + ); +} + +inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ) +{ + return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) ); +} + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h new file mode 100644 index 000000000..c91f5db41 --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h @@ -0,0 +1,533 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef _VECTORMATH_QUAT_AOS_CPP_H +#define _VECTORMATH_QUAT_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +#endif + +namespace Vectormath { +namespace Aos { + +inline Quat::Quat( float _x, float _y, float _z, float _w ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, _w); +} + +inline Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +{ + mVec128 = _mm_unpacklo_ps( + _mm_unpacklo_ps( _x.get128(), _z.get128() ), + _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); +} + +inline Quat::Quat( const Vector3 &xyz, float _w ) +{ + mVec128 = xyz.get128(); + _vmathVfSetElement(mVec128, _w, 3); +} + +inline Quat::Quat( const Vector3 &xyz, const floatInVec &_w ) +{ + mVec128 = xyz.get128(); + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); +} + +inline Quat::Quat( const Vector4 &vec ) +{ + mVec128 = vec.get128(); +} + +inline Quat::Quat( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +inline Quat::Quat( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +inline Quat::Quat( __m128 vf4 ) +{ + mVec128 = vf4; +} + +inline const Quat Quat::identity( ) +{ + return Quat( _VECTORMATH_UNIT_0001 ); +} + +inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ) +{ + return lerp( floatInVec(t), quat0, quat1 ); +} + +inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ) +{ + return ( quat0 + ( ( quat1 - quat0 ) * t ) ); +} + +inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ) +{ + return slerp( floatInVec(t), unitQuat0, unitQuat1 ); +} + +inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ) +{ + Quat start; + vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + vec_uint4 selectMask; + cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() ); + selectMask = (vec_uint4)vec_cmpgt( _mm_setzero_ps(), cosAngle ); + cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask ); + start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) ); + selectMask = (vec_uint4)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt ); + angles = vec_mergeh( _mm_set1_ps(1.0f), tttt ); + angles = vec_mergeh( angles, oneMinusT ); + angles = vec_madd( angles, angle, _mm_setzero_ps() ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) ); +} + +inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +{ + return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 ); +} + +inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ) +{ + return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) ); +} + +inline __m128 Quat::get128( ) const +{ + return mVec128; +} + +inline Quat & Quat::operator =( const Quat &quat ) +{ + mVec128 = quat.mVec128; + return *this; +} + +inline Quat & Quat::setXYZ( const Vector3 &vec ) +{ + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + mVec128 = vec_sel( vec.get128(), mVec128, sw ); + return *this; +} + +inline const Vector3 Quat::getXYZ( ) const +{ + return Vector3( mVec128 ); +} + +inline Quat & Quat::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +inline Quat & Quat::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +inline const floatInVec Quat::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +inline Quat & Quat::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +inline Quat & Quat::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +inline const floatInVec Quat::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +inline Quat & Quat::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +inline Quat & Quat::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +inline const floatInVec Quat::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +inline Quat & Quat::setW( float _w ) +{ + _vmathVfSetElement(mVec128, _w, 3); + return *this; +} + +inline Quat & Quat::setW( const floatInVec &_w ) +{ + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); + return *this; +} + +inline const floatInVec Quat::getW( ) const +{ + return floatInVec( mVec128, 3 ); +} + +inline Quat & Quat::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +inline Quat & Quat::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +inline const floatInVec Quat::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline VecIdx Quat::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +inline const floatInVec Quat::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline const Quat Quat::operator +( const Quat &quat ) const +{ + return Quat( _mm_add_ps( mVec128, quat.mVec128 ) ); +} + +inline const Quat Quat::operator -( const Quat &quat ) const +{ + return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) ); +} + +inline const Quat Quat::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +inline const Quat Quat::operator *( const floatInVec &scalar ) const +{ + return Quat( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +inline Quat & Quat::operator +=( const Quat &quat ) +{ + *this = *this + quat; + return *this; +} + +inline Quat & Quat::operator -=( const Quat &quat ) +{ + *this = *this - quat; + return *this; +} + +inline Quat & Quat::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline Quat & Quat::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Quat Quat::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +inline const Quat Quat::operator /( const floatInVec &scalar ) const +{ + return Quat( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +inline Quat & Quat::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline Quat & Quat::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Quat Quat::operator -( ) const +{ + return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); +} + +inline const Quat operator *( float scalar, const Quat &quat ) +{ + return floatInVec(scalar) * quat; +} + +inline const Quat operator *( const floatInVec &scalar, const Quat &quat ) +{ + return quat * scalar; +} + +inline const floatInVec dot( const Quat &quat0, const Quat &quat1 ) +{ + return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 ); +} + +inline const floatInVec norm( const Quat &quat ) +{ + return floatInVec( _vmathVfDot4( quat.get128(), quat.get128() ), 0 ); +} + +inline const floatInVec length( const Quat &quat ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 ); +} + +inline const Quat normalize( const Quat &quat ) +{ + return Quat( _mm_mul_ps( quat.get128(), _mm_rsqrt_ps( _vmathVfDot4( quat.get128(), quat.get128() ) ) ) ); +} + +inline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + Vector3 crossVec; + __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res; + cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() ); + cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) ); + recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 ); + cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 ); + crossVec = cross( unitVec0, unitVec1 ); + res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 ); + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw ); + return Quat( res ); +} + +inline const Quat Quat::rotation( float radians, const Vector3 &unitVec ) +{ + return rotation( floatInVec(radians), unitVec ); +} + +inline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw ); + return Quat( res ); +} + +inline const Quat Quat::rotationX( float radians ) +{ + return rotationX( floatInVec(radians) ); +} + +inline const Quat Quat::rotationX( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + __declspec(align(16)) unsigned int xsw[4] = {0xffffffff, 0, 0, 0}; + __declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, xsw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +inline const Quat Quat::rotationY( float radians ) +{ + return rotationY( floatInVec(radians) ); +} + +inline const Quat Quat::rotationY( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + __declspec(align(16)) unsigned int ysw[4] = {0, 0xffffffff, 0, 0}; + __declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, ysw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +inline const Quat Quat::rotationZ( float radians ) +{ + return rotationZ( floatInVec(radians) ); +} + +inline const Quat Quat::rotationZ( const floatInVec &radians ) +{ + __m128 s, c, angle, res; + angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) ); + sincosf4( angle, &s, &c ); + __declspec(align(16)) unsigned int zsw[4] = {0, 0, 0xffffffff, 0}; + __declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff}; + res = vec_sel( _mm_setzero_ps(), s, zsw ); + res = vec_sel( res, c, wsw ); + return Quat( res ); +} + +inline const Quat Quat::operator *( const Quat &quat ) const +{ + __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3; + __m128 product, l_wxyz, r_wxyz, xy, qw; + ldata = mVec128; + rdata = quat.mVec128; + tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) ); + qv = vec_mul( vec_splat( ldata, 3 ), rdata ); + qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv ); + qv = vec_madd( tmp0, tmp1, qv ); + qv = vec_nmsub( tmp2, tmp3, qv ); + product = vec_mul( ldata, rdata ); + l_wxyz = vec_sld( ldata, ldata, 12 ); + r_wxyz = vec_sld( rdata, rdata, 12 ); + qw = vec_nmsub( l_wxyz, r_wxyz, product ); + xy = vec_madd( l_wxyz, r_wxyz, product ); + qw = vec_sub( qw, vec_sld( xy, xy, 8 ) ); + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + return Quat( vec_sel( qv, qw, sw ) ); +} + +inline Quat & Quat::operator *=( const Quat &quat ) +{ + *this = *this * quat; + return *this; +} + +inline const Vector3 rotate( const Quat &quat, const Vector3 &vec ) +{ __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res; + qdata = quat.get128(); + vdata = vec.get128(); + tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) ); + wwww = vec_splat( qdata, 3 ); + qv = vec_mul( wwww, vdata ); + qv = vec_madd( tmp0, tmp1, qv ); + qv = vec_nmsub( tmp2, tmp3, qv ); + product = vec_mul( qdata, vdata ); + qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product ); + qw = vec_add( vec_sld( product, product, 8 ), qw ); + tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) ); + res = vec_mul( vec_splat( qw, 0 ), qdata ); + res = vec_madd( wwww, qv, res ); + res = vec_madd( tmp0, tmp1, res ); + res = vec_nmsub( tmp2, tmp3, res ); + return Vector3( res ); +} + +inline const Quat conj( const Quat &quat ) +{ + __declspec(align(16)) unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0}; + return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) ); +} + +inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ) +{ + return select( quat0, quat1, boolInVec(select1) ); +} + +inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ) +{ + return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Quat &quat ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = quat.get128(); + printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +inline void print( const Quat &quat, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = quat.get128(); + printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/readme_e.txt b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/readme_e.txt new file mode 100644 index 000000000..6f30b2507 --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/readme_e.txt @@ -0,0 +1 @@ +This folder contains an SSE implementation of the c++ aos component of the vectormath library as used by PSSG. \ No newline at end of file diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h new file mode 100644 index 000000000..3557af08d --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h @@ -0,0 +1,1380 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VECTORMATH_VEC_AOS_CPP_H +#define _VECTORMATH_VEC_AOS_CPP_H + +//----------------------------------------------------------------------------- +// Constants +// for permutes words are labeled [x,y,z,w] [a,b,c,d] + +#define _VECTORMATH_PERM_X 0x00010203 +#define _VECTORMATH_PERM_Y 0x04050607 +#define _VECTORMATH_PERM_Z 0x08090a0b +#define _VECTORMATH_PERM_W 0x0c0d0e0f +#define _VECTORMATH_PERM_A 0x10111213 +#define _VECTORMATH_PERM_B 0x14151617 +#define _VECTORMATH_PERM_C 0x18191a1b +#define _VECTORMATH_PERM_D 0x1c1d1e1f +#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A } +#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B } +#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C } +#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W } +#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W } +#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 } +#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 } +#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 } +#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff } +#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f } +#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f } +#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f } +#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f } +#define _VECTORMATH_SLERP_TOL 0.999f + +//----------------------------------------------------------------------------- +// Definitions + +#ifndef _VECTORMATH_INTERNAL_FUNCTIONS +#define _VECTORMATH_INTERNAL_FUNCTIONS + +static inline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 ) +{ + __m128 result = _mm_mul_ps( vec0, vec1); + return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) ); +} + +static inline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 ) +{ + __m128 result = _mm_mul_ps(vec0, vec1); + return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)), + _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)), + _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3))))); +} + +static inline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 ) +{ + __m128 tmp0, tmp1, tmp2, tmp3, result; + tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) ); + tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) ); + tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) ); + tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) ); + result = vec_mul( tmp0, tmp1 ); + result = vec_nmsub( tmp2, tmp3, result ); + return result; +} +/* +static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v) +{ +#if 0 + vec_int4 bexp; + vec_uint4 mant, sign, hfloat; + vec_uint4 notZero, isInf; + const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u); + const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu); + const vec_uint4 mergeSign = (vec_uint4)(0x00008000u); + + sign = vec_sr((vec_uint4)v, (vec_uint4)16); + mant = vec_sr((vec_uint4)v, (vec_uint4)13); + bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff); + + notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112); + isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142); + + bexp = _mm_add_ps(bexp, (vec_int4)-112); + bexp = vec_sl(bexp, (vec_uint4)10); + + hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant); + hfloat = vec_sel((vec_uint4)(0), hfloat, notZero); + hfloat = vec_sel(hfloat, hfloatInf, isInf); + hfloat = vec_sel(hfloat, sign, mergeSign); + + return hfloat; +#else + assert(0); + return _mm_setzero_ps(); +#endif +} + +static inline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v) +{ +#if 0 + vec_uint4 hfloat_u, hfloat_v; + const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; + hfloat_u = _vmathVfToHalfFloatsUnpacked(u); + hfloat_v = _vmathVfToHalfFloatsUnpacked(v); + return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack); +#else + assert(0); + return _mm_setzero_si128(); +#endif +} +*/ + +static inline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot) +{ + SSEFloat s; + s.m128 = src; + SSEFloat d; + d.m128 = dst; + d.f[slot] = s.f[slot]; + return d.m128; +} + +#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar + +static inline __m128 _vmathVfSplatScalar(float scalar) +{ + return _mm_set1_ps(scalar); +} + +#endif + +namespace Vectormath { +namespace Aos { + +#ifdef _VECTORMATH_NO_SCALAR_CAST +inline VecIdx::operator floatInVec() const +{ + return floatInVec(ref, i); +} + +inline float VecIdx::getAsFloat() const +#else +inline VecIdx::operator float() const +#endif +{ + return ((float *)&ref)[i]; +} + +inline float VecIdx::operator =( float scalar ) +{ + _vmathVfSetElement(ref, scalar, i); + return scalar; +} + +inline floatInVec VecIdx::operator =( const floatInVec &scalar ) +{ + ref = _vmathVfInsert(ref, scalar.get128(), i); + return scalar; +} + +inline floatInVec VecIdx::operator =( const VecIdx& scalar ) +{ + return *this = floatInVec(scalar.ref, scalar.i); +} + +inline floatInVec VecIdx::operator *=( float scalar ) +{ + return *this *= floatInVec(scalar); +} + +inline floatInVec VecIdx::operator *=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) * scalar; +} + +inline floatInVec VecIdx::operator /=( float scalar ) +{ + return *this /= floatInVec(scalar); +} + +inline floatInVec VecIdx::operator /=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) / scalar; +} + +inline floatInVec VecIdx::operator +=( float scalar ) +{ + return *this += floatInVec(scalar); +} + +inline floatInVec VecIdx::operator +=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) + scalar; +} + +inline floatInVec VecIdx::operator -=( float scalar ) +{ + return *this -= floatInVec(scalar); +} + +inline floatInVec VecIdx::operator -=( const floatInVec &scalar ) +{ + return *this = floatInVec(ref, i) - scalar; +} + +inline Vector3::Vector3( float _x, float _y, float _z ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); +} + +inline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +{ + __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() ); + mVec128 = _mm_unpacklo_ps( xz, _y.get128() ); +} + +inline Vector3::Vector3( const Point3 &pnt ) +{ + mVec128 = pnt.get128(); +} + +inline Vector3::Vector3( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +inline Vector3::Vector3( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +inline Vector3::Vector3( __m128 vf4 ) +{ + mVec128 = vf4; +} + +inline const Vector3 Vector3::xAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_1000 ); +} + +inline const Vector3 Vector3::yAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_0100 ); +} + +inline const Vector3 Vector3::zAxis( ) +{ + return Vector3( _VECTORMATH_UNIT_0010 ); +} + +inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ) +{ + return lerp( floatInVec(t), vec0, vec1 ); +} + +inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + return slerp( floatInVec(t), unitVec0, unitVec1 ); +} + +inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ) +{ + __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() ); + __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt ); + angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t + angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t + angles = _mm_mul_ps( angles, angle ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); +} + +inline __m128 Vector3::get128( ) const +{ + return mVec128; +} + +inline void storeXYZ( const Vector3 &vec, __m128 * quad ) +{ + __m128 dstVec = *quad; + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize + dstVec = vec_sel(vec.get128(), dstVec, sw); + *quad = dstVec; +} + +inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ) +{ + const float *quads = (float *)threeQuads; + vec0 = Vector3( _mm_load_ps(quads) ); + vec1 = Vector3( _mm_loadu_ps(quads + 3) ); + vec2 = Vector3( _mm_loadu_ps(quads + 6) ); + vec3 = Vector3( _mm_loadu_ps(quads + 9) ); +} + +inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ) +{ + __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); + __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); + __declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff}; + __declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0}; + threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw ); + threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) ); + threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); +} +/* +inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ) +{ + assert(0); +#if 0 + __m128 xyz0[3]; + __m128 xyz1[3]; + storeXYZArray( vec0, vec1, vec2, vec3, xyz0 ); + storeXYZArray( vec4, vec5, vec6, vec7, xyz1 ); + threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]); + threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]); + threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]); +#endif +} +*/ +inline Vector3 & Vector3::operator =( const Vector3 &vec ) +{ + mVec128 = vec.mVec128; + return *this; +} + +inline Vector3 & Vector3::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +inline Vector3 & Vector3::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +inline const floatInVec Vector3::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +inline Vector3 & Vector3::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +inline Vector3 & Vector3::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +inline const floatInVec Vector3::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +inline Vector3 & Vector3::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +inline Vector3 & Vector3::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +inline const floatInVec Vector3::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +inline Vector3 & Vector3::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +inline Vector3 & Vector3::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +inline const floatInVec Vector3::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline VecIdx Vector3::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +inline const floatInVec Vector3::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline const Vector3 Vector3::operator +( const Vector3 &vec ) const +{ + return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) ); +} + +inline const Vector3 Vector3::operator -( const Vector3 &vec ) const +{ + return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) ); +} + +inline const Point3 Vector3::operator +( const Point3 &pnt ) const +{ + return Point3( _mm_add_ps( mVec128, pnt.get128() ) ); +} + +inline const Vector3 Vector3::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +inline const Vector3 Vector3::operator *( const floatInVec &scalar ) const +{ + return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +inline Vector3 & Vector3::operator +=( const Vector3 &vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector3 & Vector3::operator -=( const Vector3 &vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector3 & Vector3::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline Vector3 & Vector3::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector3 Vector3::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +inline const Vector3 Vector3::operator /( const floatInVec &scalar ) const +{ + return Vector3( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +inline Vector3 & Vector3::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline Vector3 & Vector3::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector3 Vector3::operator -( ) const +{ + return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); +} + +inline const Vector3 operator *( float scalar, const Vector3 &vec ) +{ + return floatInVec(scalar) * vec; +} + +inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ) +{ + return vec * scalar; +} + +inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const Vector3 recipPerElem( const Vector3 &vec ) +{ + return Vector3( _mm_rcp_ps( vec.get128() ) ); +} + +inline const Vector3 absPerElem( const Vector3 &vec ) +{ + return Vector3( fabsf4( vec.get128() ) ); +} + +inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Vector3( _mm_or_ps( + _mm_and_ps ( vmask, vec0.get128() ), // Value + _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs +} + +inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const floatInVec maxElem( const Vector3 &vec ) +{ + return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const floatInVec minElem( const Vector3 &vec ) +{ + return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +inline const floatInVec sum( const Vector3 &vec ) +{ + return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) ); +} + +inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 ); +} + +inline const floatInVec lengthSqr( const Vector3 &vec ) +{ + return floatInVec( _vmathVfDot3( vec.get128(), vec.get128() ), 0 ); +} + +inline const floatInVec length( const Vector3 &vec ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 ); +} + +inline const Vector3 normalize( const Vector3 &vec ) +{ + return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) ); +} + +inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ) +{ + return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) ); +} + +inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ) +{ + return select( vec0, vec1, boolInVec(select1) ); +} + +inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ) +{ + return Vector3(vec_sel( vec0.get128(), vec1.get128(), select1.get128() )); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector3 &vec ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +inline void print( const Vector3 &vec, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +#endif + +inline Vector4::Vector4( float _x, float _y, float _z, float _w ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, _w); + } + +inline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w ) +{ + mVec128 = _mm_unpacklo_ps( + _mm_unpacklo_ps( _x.get128(), _z.get128() ), + _mm_unpacklo_ps( _y.get128(), _w.get128() ) ); +} + +inline Vector4::Vector4( const Vector3 &xyz, float _w ) +{ + mVec128 = xyz.get128(); + _vmathVfSetElement(mVec128, _w, 3); +} + +inline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w ) +{ + mVec128 = xyz.get128(); + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); +} + +inline Vector4::Vector4( const Vector3 &vec ) +{ + mVec128 = vec.get128(); + mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3); +} + +inline Vector4::Vector4( const Point3 &pnt ) +{ + mVec128 = pnt.get128(); + mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3); +} + +inline Vector4::Vector4( const Quat &quat ) +{ + mVec128 = quat.get128(); +} + +inline Vector4::Vector4( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +inline Vector4::Vector4( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +inline Vector4::Vector4( __m128 vf4 ) +{ + mVec128 = vf4; +} + +inline const Vector4 Vector4::xAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_1000 ); +} + +inline const Vector4 Vector4::yAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0100 ); +} + +inline const Vector4 Vector4::zAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0010 ); +} + +inline const Vector4 Vector4::wAxis( ) +{ + return Vector4( _VECTORMATH_UNIT_0001 ); +} + +inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ) +{ + return lerp( floatInVec(t), vec0, vec1 ); +} + +inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ) +{ + return ( vec0 + ( ( vec1 - vec0 ) * t ) ); +} + +inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +{ + return slerp( floatInVec(t), unitVec0, unitVec1 ); +} + +inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ) +{ + __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines; + cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() ); + __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle ); + angle = acosf4( cosAngle ); + tttt = t.get128(); + oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt ); + angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t + angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t + angles = _mm_mul_ps( angles, angle ); + sines = sinf4( angles ); + scales = _mm_div_ps( sines, vec_splat( sines, 0 ) ); + scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask ); + scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask ); + return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) ); +} + +inline __m128 Vector4::get128( ) const +{ + return mVec128; +} +/* +inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ) +{ + twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128()); + twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128()); +} +*/ +inline Vector4 & Vector4::operator =( const Vector4 &vec ) +{ + mVec128 = vec.mVec128; + return *this; +} + +inline Vector4 & Vector4::setXYZ( const Vector3 &vec ) +{ + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; + mVec128 = vec_sel( vec.get128(), mVec128, sw ); + return *this; +} + +inline const Vector3 Vector4::getXYZ( ) const +{ + return Vector3( mVec128 ); +} + +inline Vector4 & Vector4::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +inline Vector4 & Vector4::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +inline const floatInVec Vector4::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +inline Vector4 & Vector4::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +inline Vector4 & Vector4::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +inline const floatInVec Vector4::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +inline Vector4 & Vector4::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +inline Vector4 & Vector4::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +inline const floatInVec Vector4::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +inline Vector4 & Vector4::setW( float _w ) +{ + _vmathVfSetElement(mVec128, _w, 3); + return *this; +} + +inline Vector4 & Vector4::setW( const floatInVec &_w ) +{ + mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3); + return *this; +} + +inline const floatInVec Vector4::getW( ) const +{ + return floatInVec( mVec128, 3 ); +} + +inline Vector4 & Vector4::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +inline Vector4 & Vector4::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +inline const floatInVec Vector4::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline VecIdx Vector4::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +inline const floatInVec Vector4::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline const Vector4 Vector4::operator +( const Vector4 &vec ) const +{ + return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) ); +} + +inline const Vector4 Vector4::operator -( const Vector4 &vec ) const +{ + return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) ); +} + +inline const Vector4 Vector4::operator *( float scalar ) const +{ + return *this * floatInVec(scalar); +} + +inline const Vector4 Vector4::operator *( const floatInVec &scalar ) const +{ + return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) ); +} + +inline Vector4 & Vector4::operator +=( const Vector4 &vec ) +{ + *this = *this + vec; + return *this; +} + +inline Vector4 & Vector4::operator -=( const Vector4 &vec ) +{ + *this = *this - vec; + return *this; +} + +inline Vector4 & Vector4::operator *=( float scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline Vector4 & Vector4::operator *=( const floatInVec &scalar ) +{ + *this = *this * scalar; + return *this; +} + +inline const Vector4 Vector4::operator /( float scalar ) const +{ + return *this / floatInVec(scalar); +} + +inline const Vector4 Vector4::operator /( const floatInVec &scalar ) const +{ + return Vector4( _mm_div_ps( mVec128, scalar.get128() ) ); +} + +inline Vector4 & Vector4::operator /=( float scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline Vector4 & Vector4::operator /=( const floatInVec &scalar ) +{ + *this = *this / scalar; + return *this; +} + +inline const Vector4 Vector4::operator -( ) const +{ + return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) ); +} + +inline const Vector4 operator *( float scalar, const Vector4 &vec ) +{ + return floatInVec(scalar) * vec; +} + +inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ) +{ + return vec * scalar; +} + +inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const Vector4 recipPerElem( const Vector4 &vec ) +{ + return Vector4( _mm_rcp_ps( vec.get128() ) ); +} + +inline const Vector4 absPerElem( const Vector4 &vec ) +{ + return Vector4( fabsf4( vec.get128() ) ); +} + +inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Vector4( _mm_or_ps( + _mm_and_ps ( vmask, vec0.get128() ), // Value + _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs +} + +inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const floatInVec maxElem( const Vector4 &vec ) +{ + return floatInVec( _mm_max_ps( + _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) ); +} + +inline const floatInVec minElem( const Vector4 &vec ) +{ + return floatInVec( _mm_min_ps( + _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +inline const floatInVec sum( const Vector4 &vec ) +{ + return floatInVec( _mm_add_ps( + _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), + _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) ); +} + +inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ) +{ + return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 ); +} + +inline const floatInVec lengthSqr( const Vector4 &vec ) +{ + return floatInVec( _vmathVfDot4( vec.get128(), vec.get128() ), 0 ); +} + +inline const floatInVec length( const Vector4 &vec ) +{ + return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 ); +} + +inline const Vector4 normalize( const Vector4 &vec ) +{ + return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) ); +} + +inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ) +{ + return select( vec0, vec1, boolInVec(select1) ); +} + +inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ) +{ + return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Vector4 &vec ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +inline void print( const Vector4 &vec, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = vec.get128(); + printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] ); +} + +#endif + +inline Point3::Point3( float _x, float _y, float _z ) +{ + mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f); +} + +inline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z ) +{ + mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() ); +} + +inline Point3::Point3( const Vector3 &vec ) +{ + mVec128 = vec.get128(); +} + +inline Point3::Point3( float scalar ) +{ + mVec128 = floatInVec(scalar).get128(); +} + +inline Point3::Point3( const floatInVec &scalar ) +{ + mVec128 = scalar.get128(); +} + +inline Point3::Point3( __m128 vf4 ) +{ + mVec128 = vf4; +} + +inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ) +{ + return lerp( floatInVec(t), pnt0, pnt1 ); +} + +inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ) +{ + return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) ); +} + +inline __m128 Point3::get128( ) const +{ + return mVec128; +} + +inline void storeXYZ( const Point3 &pnt, __m128 * quad ) +{ + __m128 dstVec = *quad; + __declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize + dstVec = vec_sel(pnt.get128(), dstVec, sw); + *quad = dstVec; +} + +inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ) +{ + const float *quads = (float *)threeQuads; + pnt0 = Point3( _mm_load_ps(quads) ); + pnt1 = Point3( _mm_loadu_ps(quads + 3) ); + pnt2 = Point3( _mm_loadu_ps(quads + 6) ); + pnt3 = Point3( _mm_loadu_ps(quads + 9) ); +} + +inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ) +{ + __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) ); + __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) ); + __declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff}; + __declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0}; + threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw ); + threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) ); + threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw ); +} +/* +inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ) +{ +#if 0 + __m128 xyz0[3]; + __m128 xyz1[3]; + storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 ); + storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 ); + threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]); + threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]); + threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]); +#else + assert(0); +#endif +} +*/ +inline Point3 & Point3::operator =( const Point3 &pnt ) +{ + mVec128 = pnt.mVec128; + return *this; +} + +inline Point3 & Point3::setX( float _x ) +{ + _vmathVfSetElement(mVec128, _x, 0); + return *this; +} + +inline Point3 & Point3::setX( const floatInVec &_x ) +{ + mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0); + return *this; +} + +inline const floatInVec Point3::getX( ) const +{ + return floatInVec( mVec128, 0 ); +} + +inline Point3 & Point3::setY( float _y ) +{ + _vmathVfSetElement(mVec128, _y, 1); + return *this; +} + +inline Point3 & Point3::setY( const floatInVec &_y ) +{ + mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1); + return *this; +} + +inline const floatInVec Point3::getY( ) const +{ + return floatInVec( mVec128, 1 ); +} + +inline Point3 & Point3::setZ( float _z ) +{ + _vmathVfSetElement(mVec128, _z, 2); + return *this; +} + +inline Point3 & Point3::setZ( const floatInVec &_z ) +{ + mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2); + return *this; +} + +inline const floatInVec Point3::getZ( ) const +{ + return floatInVec( mVec128, 2 ); +} + +inline Point3 & Point3::setElem( int idx, float value ) +{ + _vmathVfSetElement(mVec128, value, idx); + return *this; +} + +inline Point3 & Point3::setElem( int idx, const floatInVec &value ) +{ + mVec128 = _vmathVfInsert(mVec128, value.get128(), idx); + return *this; +} + +inline const floatInVec Point3::getElem( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline VecIdx Point3::operator []( int idx ) +{ + return VecIdx( mVec128, idx ); +} + +inline const floatInVec Point3::operator []( int idx ) const +{ + return floatInVec( mVec128, idx ); +} + +inline const Vector3 Point3::operator -( const Point3 &pnt ) const +{ + return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) ); +} + +inline const Point3 Point3::operator +( const Vector3 &vec ) const +{ + return Point3( _mm_add_ps( mVec128, vec.get128() ) ); +} + +inline const Point3 Point3::operator -( const Vector3 &vec ) const +{ + return Point3( _mm_sub_ps( mVec128, vec.get128() ) ); +} + +inline Point3 & Point3::operator +=( const Vector3 &vec ) +{ + *this = *this + vec; + return *this; +} + +inline Point3 & Point3::operator -=( const Vector3 &vec ) +{ + *this = *this - vec; + return *this; +} + +inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) ); +} + +inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) ); +} + +inline const Point3 recipPerElem( const Point3 &pnt ) +{ + return Point3( _mm_rcp_ps( pnt.get128() ) ); +} + +inline const Point3 absPerElem( const Point3 &pnt ) +{ + return Point3( fabsf4( pnt.get128() ) ); +} + +inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + __m128 vmask = toM128(0x7fffffff); + return Point3( _mm_or_ps( + _mm_and_ps ( vmask, pnt0.get128() ), // Value + _mm_andnot_ps( vmask, pnt1.get128() ) ) ); // Signs +} + +inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) ); +} + +inline const floatInVec maxElem( const Point3 &pnt ) +{ + return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) ); +} + +inline const floatInVec minElem( const Point3 &pnt ) +{ + return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +inline const floatInVec sum( const Point3 &pnt ) +{ + return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) ); +} + +inline const Point3 scale( const Point3 &pnt, float scaleVal ) +{ + return scale( pnt, floatInVec( scaleVal ) ); +} + +inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ) +{ + return mulPerElem( pnt, Point3( scaleVal ) ); +} + +inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ) +{ + return mulPerElem( pnt, Point3( scaleVec ) ); +} + +inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ) +{ + return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 ); +} + +inline const floatInVec distSqrFromOrigin( const Point3 &pnt ) +{ + return lengthSqr( Vector3( pnt ) ); +} + +inline const floatInVec distFromOrigin( const Point3 &pnt ) +{ + return length( Vector3( pnt ) ); +} + +inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return lengthSqr( ( pnt1 - pnt0 ) ); +} + +inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ) +{ + return length( ( pnt1 - pnt0 ) ); +} + +inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ) +{ + return select( pnt0, pnt1, boolInVec(select1) ); +} + +inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ) +{ + return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) ); +} + +#ifdef _VECTORMATH_DEBUG + +inline void print( const Point3 &pnt ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = pnt.get128(); + printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +inline void print( const Point3 &pnt, const char * name ) +{ + union { __m128 v; float s[4]; } tmp; + tmp.v = pnt.get128(); + printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] ); +} + +#endif + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h new file mode 100644 index 000000000..d4ce0b82c --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h @@ -0,0 +1,79 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VECTORMATH_VECIDX_AOS_H +#define _VECTORMATH_VECIDX_AOS_H + +#include "floatInVec.h" + +namespace Vectormath { +namespace Aos { + +//----------------------------------------------------------------------------- +// VecIdx +// Used in setting elements of Vector3, Vector4, Point3, or Quat with the +// subscripting operator. +// + +class VecIdx +{ +private: + __m128 &ref __attribute__ ((aligned(16))); + int i __attribute__ ((aligned(16))); +public: + inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; } + + // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined + // in which case, implicitly casts to floatInVec, and one must call + // getAsFloat to convert to float. + // +#ifdef _VECTORMATH_NO_SCALAR_CAST + inline operator floatInVec() const; + inline float getAsFloat() const; +#else + inline operator float() const; +#endif + + inline float operator =( float scalar ); + inline floatInVec operator =( const floatInVec &scalar ); + inline floatInVec operator =( const VecIdx& scalar ); + inline floatInVec operator *=( float scalar ); + inline floatInVec operator *=( const floatInVec &scalar ); + inline floatInVec operator /=( float scalar ); + inline floatInVec operator /=( const floatInVec &scalar ); + inline floatInVec operator +=( float scalar ); + inline floatInVec operator +=( const floatInVec &scalar ); + inline floatInVec operator -=( float scalar ); + inline floatInVec operator -=( const floatInVec &scalar ); +}; + +} // namespace Aos +} // namespace Vectormath + +#endif diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h new file mode 100644 index 000000000..d927f4bc8 --- /dev/null +++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h @@ -0,0 +1,2491 @@ +/* + Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. + All rights reserved. + + Redistribution and use in source and binary forms, + with or without modification, are permitted provided that the + following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Sony Computer Entertainment Inc nor the names + of its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VECTORMATH_AOS_CPP_H +#define _VECTORMATH_AOS_CPP_H + +#include +#include +#include +#include + +// TODO: Tidy +typedef __m128 vec_float4; +typedef __m128 vec_uint4; +typedef __m128 vec_int4; +typedef __m128i vec_uchar16; +typedef __m128i vec_ushort8; + +#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e)) + +#define _mm_ror_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec)) +#define _mm_rol_ps(vec,i) \ + (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec)) + +#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4)) + +#define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec) +#define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec) + +#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) ) + +union SSEFloat +{ + __m128 m128; + float f[4]; +}; + +static inline __m128 vec_sel(__m128 a, __m128 b, __m128 mask) +{ + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +} +static inline __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask) +{ + return vec_sel(a, b, _mm_load_ps((float *)_mask)); +} +static inline __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask) +{ + return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask)); +} + +static inline __m128 toM128(unsigned int x) +{ + return _mm_set1_ps( *(float *)&x ); +} + +static inline __m128 fabsf4(__m128 x) +{ + return _mm_and_ps( x, toM128( 0x7fffffff ) ); +} +/* +union SSE64 +{ + __m128 m128; + struct + { + __m64 m01; + __m64 m23; + } m64; +}; + +static inline __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m64.m01 = _mm_cvttps_pi32(x); + sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2)); + _mm_empty(); + return sse64.m128; +} + +static inline __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + SSE64 sse64; + sse64.m128 = x; + __m128 result =_mm_movelh_ps( + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01), + _mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23)); + _mm_empty(); + return result; +} +*/ +static inline __m128 vec_cts(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + __m128i result = _mm_cvtps_epi32(x); + return (__m128 &)result; +} + +static inline __m128 vec_ctf(__m128 x, int a) +{ + assert(a == 0); // Only 2^0 supported + (void)a; + return _mm_cvtepi32_ps((__m128i &)x); +} + +#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) ) +#define vec_sub(a,b) _mm_sub_ps( a, b ) +#define vec_add(a,b) _mm_add_ps( a, b ) +#define vec_mul(a,b) _mm_mul_ps( a, b ) +#define vec_xor(a,b) _mm_xor_ps( a, b ) +#define vec_and(a,b) _mm_and_ps( a, b ) +#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b ) +#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b ) + +#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b ) +#define vec_mergel(a,b) _mm_unpackhi_ps( a, b ) + +#define vec_andc(a,b) _mm_andnot_ps( b, a ) + +#define sqrtf4(x) _mm_sqrt_ps( x ) +#define rsqrtf4(x) _mm_rsqrt_ps( x ) +#define recipf4(x) _mm_rcp_ps( x ) +#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x ) + +static inline __m128 acosf4(__m128 x) +{ + __m128 xabs = fabsf4(x); + __m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() ); + __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi amd lo). + */ + __m128 xabs2 = _mm_mul_ps(xabs, xabs); + __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); + __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f), + xabs, _mm_set1_ps(0.0066700901f)), + xabs, _mm_set1_ps(-0.0170881256f)), + xabs, _mm_set1_ps( 0.0308918810f)); + __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f), + xabs, _mm_set1_ps(0.0889789874f)), + xabs, _mm_set1_ps(-0.2145988016f)), + xabs, _mm_set1_ps( 1.5707963050f)); + + __m128 result = vec_madd(hi, xabs4, lo); + + // Adjust the result if x is negactive. + return vec_sel( + vec_mul(t1, result), // Positive + vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative + select); +} + +static inline __m128 sinf4(vec_float4 x) +{ + +// +// Common constants used to evaluate sinf4/cosf4/tanf4 +// +#define _SINCOS_CC0 -0.0013602249f +#define _SINCOS_CC1 0.0416566950f +#define _SINCOS_CC2 -0.4999990225f +#define _SINCOS_SC0 -0.0001950727f +#define _SINCOS_SC1 0.0083320758f +#define _SINCOS_SC2 -0.1666665247f + +#define _SINCOS_KC1 1.57079625129f +#define _SINCOS_KC2 7.54978995489e-8f + + vec_float4 xl,xl2,xl3,res; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + vec_int4 q = vec_cts(xl,0); + + // Compute an offset based on the quadrant that the angle falls in + // + vec_int4 offset = _mm_and_ps(q,toM128(0x3)); + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset, + toM128(0x1)), + _mm_setzero_ps())); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + return vec_sel( + vec_xor(toM128(0x80000000U), res), // Negative + res, // Positive + vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps())); +} + +static inline void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c) +{ + vec_float4 xl,xl2,xl3; + vec_int4 offsetSin, offsetCos; + + // Range reduction using : xl = angle * TwoOverPi; + // + xl = vec_mul(x, _mm_set1_ps(0.63661977236f)); + + // Find the quadrant the angle falls in + // using: q = (int) (ceil(abs(xl))*sign(xl)) + // + //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0); + vec_int4 q = vec_cts(xl,0); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + // + offsetSin = vec_and(q,toM128((int)0x3)); + __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin); + offsetCos = (__m128 &)temp; + + // Remainder in range [-pi/4..pi/4] + // + vec_float4 qf = vec_ctf(q,0); + xl = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x)); + + // Compute x^2 and x^3 + // + xl2 = vec_mul(xl,xl); + xl3 = vec_mul(xl2,xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + // + vec_float4 cx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f)); + vec_float4 sx = + vec_madd( + vec_madd( + vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + // + vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps()); + vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps()); + *s = vec_sel(cx,sx,sinMask); + *c = vec_sel(cx,sx,cosMask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + // + sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps()); + cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps()); + + *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask); + *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask); +} + +#include "vecidx_aos.h" +#include "floatInVec.h" +#include "boolInVec.h" + +#ifdef _VECTORMATH_DEBUG +#include +#endif +namespace Vectormath { + +namespace Aos { + +//----------------------------------------------------------------------------- +// Forward Declarations +// + +class Vector3; +class Vector4; +class Point3; +class Quat; +class Matrix3; +class Matrix4; +class Transform3; + +// A 3-D vector in array-of-structures format +// +class Vector3 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + inline Vector3( ) { }; + + // Construct a 3-D vector from x, y, and z elements + // + inline Vector3( float x, float y, float z ); + + // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type) + // + inline Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D point into a 3-D vector + // + explicit inline Vector3( const Point3 &pnt ); + + // Set all elements of a 3-D vector to the same scalar value + // + explicit inline Vector3( float scalar ); + + // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit inline Vector3( const floatInVec &scalar ); + + // Set vector float data in a 3-D vector + // + explicit inline Vector3( __m128 vf4 ); + + // Get vector float data from a 3-D vector + // + inline __m128 get128( ) const; + + // Assign one 3-D vector to another + // + inline Vector3 & operator =( const Vector3 &vec ); + + // Set the x element of a 3-D vector + // + inline Vector3 & setX( float x ); + + // Set the y element of a 3-D vector + // + inline Vector3 & setY( float y ); + + // Set the z element of a 3-D vector + // + inline Vector3 & setZ( float z ); + + // Set the x element of a 3-D vector (scalar data contained in vector data type) + // + inline Vector3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D vector (scalar data contained in vector data type) + // + inline Vector3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D vector (scalar data contained in vector data type) + // + inline Vector3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D vector + // + inline const floatInVec getX( ) const; + + // Get the y element of a 3-D vector + // + inline const floatInVec getY( ) const; + + // Get the z element of a 3-D vector + // + inline const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D vector by index + // + inline Vector3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type) + // + inline Vector3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D vector by index + // + inline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + inline const floatInVec operator []( int idx ) const; + + // Add two 3-D vectors + // + inline const Vector3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from another 3-D vector + // + inline const Vector3 operator -( const Vector3 &vec ) const; + + // Add a 3-D vector to a 3-D point + // + inline const Point3 operator +( const Point3 &pnt ) const; + + // Multiply a 3-D vector by a scalar + // + inline const Vector3 operator *( float scalar ) const; + + // Divide a 3-D vector by a scalar + // + inline const Vector3 operator /( float scalar ) const; + + // Multiply a 3-D vector by a scalar (scalar data contained in vector data type) + // + inline const Vector3 operator *( const floatInVec &scalar ) const; + + // Divide a 3-D vector by a scalar (scalar data contained in vector data type) + // + inline const Vector3 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Vector3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Vector3 & operator -=( const Vector3 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector3 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector3 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + inline Vector3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + inline Vector3 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 3-D vector + // + inline const Vector3 operator -( ) const; + + // Construct x axis + // + static inline const Vector3 xAxis( ); + + // Construct y axis + // + static inline const Vector3 yAxis( ); + + // Construct z axis + // + static inline const Vector3 zAxis( ); + +}; + +// Multiply a 3-D vector by a scalar +// +inline const Vector3 operator *( float scalar, const Vector3 &vec ); + +// Multiply a 3-D vector by a scalar (scalar data contained in vector data type) +// +inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec ); + +// Multiply two 3-D vectors per element +// +inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Divide two 3-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the reciprocal of a 3-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector3 recipPerElem( const Vector3 &vec ); + +// Compute the absolute value of a 3-D vector per element +// +inline const Vector3 absPerElem( const Vector3 &vec ); + +// Copy sign from one 3-D vector to another, per element +// +inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum of two 3-D vectors per element +// +inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Minimum of two 3-D vectors per element +// +inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 ); + +// Maximum element of a 3-D vector +// +inline const floatInVec maxElem( const Vector3 &vec ); + +// Minimum element of a 3-D vector +// +inline const floatInVec minElem( const Vector3 &vec ); + +// Compute the sum of all elements of a 3-D vector +// +inline const floatInVec sum( const Vector3 &vec ); + +// Compute the dot product of two 3-D vectors +// +inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 ); + +// Compute the square of the length of a 3-D vector +// +inline const floatInVec lengthSqr( const Vector3 &vec ); + +// Compute the length of a 3-D vector +// +inline const floatInVec length( const Vector3 &vec ); + +// Normalize a 3-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector3 normalize( const Vector3 &vec ); + +// Compute cross product of two 3-D vectors +// +inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 ); + +// Outer product of two 3-D vectors +// +inline const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 ); + +// Pre-multiply a row vector by a 3x3 matrix +// NOTE: +// Slower than column post-multiply. +// +inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat ); + +// Cross-product matrix of a 3-D vector +// +inline const Matrix3 crossMatrix( const Vector3 &vec ); + +// Create cross-product matrix and multiply +// NOTE: +// Faster than separately creating a cross-product matrix and multiplying. +// +inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat ); + +// Linear interpolation between two 3-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 ); + +// Spherical linear interpolation between two 3-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 ); + +// Conditionally select between two 3-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 ); + +// Conditionally select between two 3-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word +// +inline void storeXYZ( const Vector3 &vec, __m128 * quad ); + +// Load four three-float 3-D vectors, stored in three quadwords +// +inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads ); + +// Store four 3-D vectors in three quadwords +// +inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads ); + +// Store eight 3-D vectors as half-floats +// +inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 &vec ); + +// Print a 3-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector3 &vec, const char * name ); + +#endif + +// A 4-D vector in array-of-structures format +// +class Vector4 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + inline Vector4( ) { }; + + // Construct a 4-D vector from x, y, z, and w elements + // + inline Vector4( float x, float y, float z, float w ); + + // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type) + // + inline Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a 4-D vector from a 3-D vector and a scalar + // + inline Vector4( const Vector3 &xyz, float w ); + + // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type) + // + inline Vector4( const Vector3 &xyz, const floatInVec &w ); + + // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0 + // + explicit inline Vector4( const Vector3 &vec ); + + // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1 + // + explicit inline Vector4( const Point3 &pnt ); + + // Copy elements from a quaternion into a 4-D vector + // + explicit inline Vector4( const Quat &quat ); + + // Set all elements of a 4-D vector to the same scalar value + // + explicit inline Vector4( float scalar ); + + // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type) + // + explicit inline Vector4( const floatInVec &scalar ); + + // Set vector float data in a 4-D vector + // + explicit inline Vector4( __m128 vf4 ); + + // Get vector float data from a 4-D vector + // + inline __m128 get128( ) const; + + // Assign one 4-D vector to another + // + inline Vector4 & operator =( const Vector4 &vec ); + + // Set the x, y, and z elements of a 4-D vector + // NOTE: + // This function does not change the w element. + // + inline Vector4 & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a 4-D vector + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a 4-D vector + // + inline Vector4 & setX( float x ); + + // Set the y element of a 4-D vector + // + inline Vector4 & setY( float y ); + + // Set the z element of a 4-D vector + // + inline Vector4 & setZ( float z ); + + // Set the w element of a 4-D vector + // + inline Vector4 & setW( float w ); + + // Set the x element of a 4-D vector (scalar data contained in vector data type) + // + inline Vector4 & setX( const floatInVec &x ); + + // Set the y element of a 4-D vector (scalar data contained in vector data type) + // + inline Vector4 & setY( const floatInVec &y ); + + // Set the z element of a 4-D vector (scalar data contained in vector data type) + // + inline Vector4 & setZ( const floatInVec &z ); + + // Set the w element of a 4-D vector (scalar data contained in vector data type) + // + inline Vector4 & setW( const floatInVec &w ); + + // Get the x element of a 4-D vector + // + inline const floatInVec getX( ) const; + + // Get the y element of a 4-D vector + // + inline const floatInVec getY( ) const; + + // Get the z element of a 4-D vector + // + inline const floatInVec getZ( ) const; + + // Get the w element of a 4-D vector + // + inline const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a 4-D vector by index + // + inline Vector4 & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type) + // + inline Vector4 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a 4-D vector by index + // + inline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + inline const floatInVec operator []( int idx ) const; + + // Add two 4-D vectors + // + inline const Vector4 operator +( const Vector4 &vec ) const; + + // Subtract a 4-D vector from another 4-D vector + // + inline const Vector4 operator -( const Vector4 &vec ) const; + + // Multiply a 4-D vector by a scalar + // + inline const Vector4 operator *( float scalar ) const; + + // Divide a 4-D vector by a scalar + // + inline const Vector4 operator /( float scalar ) const; + + // Multiply a 4-D vector by a scalar (scalar data contained in vector data type) + // + inline const Vector4 operator *( const floatInVec &scalar ) const; + + // Divide a 4-D vector by a scalar (scalar data contained in vector data type) + // + inline const Vector4 operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a 4-D vector + // + inline Vector4 & operator +=( const Vector4 &vec ); + + // Perform compound assignment and subtraction by a 4-D vector + // + inline Vector4 & operator -=( const Vector4 &vec ); + + // Perform compound assignment and multiplication by a scalar + // + inline Vector4 & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Vector4 & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + inline Vector4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + inline Vector4 & operator /=( const floatInVec &scalar ); + + // Negate all elements of a 4-D vector + // + inline const Vector4 operator -( ) const; + + // Construct x axis + // + static inline const Vector4 xAxis( ); + + // Construct y axis + // + static inline const Vector4 yAxis( ); + + // Construct z axis + // + static inline const Vector4 zAxis( ); + + // Construct w axis + // + static inline const Vector4 wAxis( ); + +}; + +// Multiply a 4-D vector by a scalar +// +inline const Vector4 operator *( float scalar, const Vector4 &vec ); + +// Multiply a 4-D vector by a scalar (scalar data contained in vector data type) +// +inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec ); + +// Multiply two 4-D vectors per element +// +inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Divide two 4-D vectors per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the reciprocal of a 4-D vector per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Vector4 recipPerElem( const Vector4 &vec ); + +// Compute the absolute value of a 4-D vector per element +// +inline const Vector4 absPerElem( const Vector4 &vec ); + +// Copy sign from one 4-D vector to another, per element +// +inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum of two 4-D vectors per element +// +inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Minimum of two 4-D vectors per element +// +inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 ); + +// Maximum element of a 4-D vector +// +inline const floatInVec maxElem( const Vector4 &vec ); + +// Minimum element of a 4-D vector +// +inline const floatInVec minElem( const Vector4 &vec ); + +// Compute the sum of all elements of a 4-D vector +// +inline const floatInVec sum( const Vector4 &vec ); + +// Compute the dot product of two 4-D vectors +// +inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 ); + +// Compute the square of the length of a 4-D vector +// +inline const floatInVec lengthSqr( const Vector4 &vec ); + +// Compute the length of a 4-D vector +// +inline const floatInVec length( const Vector4 &vec ); + +// Normalize a 4-D vector +// NOTE: +// The result is unpredictable when all elements of vec are at or near zero. +// +inline const Vector4 normalize( const Vector4 &vec ); + +// Outer product of two 4-D vectors +// +inline const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 ); + +// Spherical linear interpolation between two 4-D vectors +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// The result is unpredictable if the vectors point in opposite directions. +// Does not clamp t between 0 and 1. +// +inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 ); + +// Conditionally select between two 4-D vectors +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 ); + +// Conditionally select between two 4-D vectors (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 ); + +// Store four 4-D vectors as half-floats +// +inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4-D vector +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 &vec ); + +// Print a 4-D vector and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Vector4 &vec, const char * name ); + +#endif + +// A 3-D point in array-of-structures format +// +class Point3 +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + inline Point3( ) { }; + + // Construct a 3-D point from x, y, and z elements + // + inline Point3( float x, float y, float z ); + + // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type) + // + inline Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z ); + + // Copy elements from a 3-D vector into a 3-D point + // + explicit inline Point3( const Vector3 &vec ); + + // Set all elements of a 3-D point to the same scalar value + // + explicit inline Point3( float scalar ); + + // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type) + // + explicit inline Point3( const floatInVec &scalar ); + + // Set vector float data in a 3-D point + // + explicit inline Point3( __m128 vf4 ); + + // Get vector float data from a 3-D point + // + inline __m128 get128( ) const; + + // Assign one 3-D point to another + // + inline Point3 & operator =( const Point3 &pnt ); + + // Set the x element of a 3-D point + // + inline Point3 & setX( float x ); + + // Set the y element of a 3-D point + // + inline Point3 & setY( float y ); + + // Set the z element of a 3-D point + // + inline Point3 & setZ( float z ); + + // Set the x element of a 3-D point (scalar data contained in vector data type) + // + inline Point3 & setX( const floatInVec &x ); + + // Set the y element of a 3-D point (scalar data contained in vector data type) + // + inline Point3 & setY( const floatInVec &y ); + + // Set the z element of a 3-D point (scalar data contained in vector data type) + // + inline Point3 & setZ( const floatInVec &z ); + + // Get the x element of a 3-D point + // + inline const floatInVec getX( ) const; + + // Get the y element of a 3-D point + // + inline const floatInVec getY( ) const; + + // Get the z element of a 3-D point + // + inline const floatInVec getZ( ) const; + + // Set an x, y, or z element of a 3-D point by index + // + inline Point3 & setElem( int idx, float value ); + + // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type) + // + inline Point3 & setElem( int idx, const floatInVec &value ); + + // Get an x, y, or z element of a 3-D point by index + // + inline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + inline const floatInVec operator []( int idx ) const; + + // Subtract a 3-D point from another 3-D point + // + inline const Vector3 operator -( const Point3 &pnt ) const; + + // Add a 3-D point to a 3-D vector + // + inline const Point3 operator +( const Vector3 &vec ) const; + + // Subtract a 3-D vector from a 3-D point + // + inline const Point3 operator -( const Vector3 &vec ) const; + + // Perform compound assignment and addition with a 3-D vector + // + inline Point3 & operator +=( const Vector3 &vec ); + + // Perform compound assignment and subtraction by a 3-D vector + // + inline Point3 & operator -=( const Vector3 &vec ); + +}; + +// Multiply two 3-D points per element +// +inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Divide two 3-D points per element +// NOTE: +// Floating-point behavior matches standard library function divf4. +// +inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the reciprocal of a 3-D point per element +// NOTE: +// Floating-point behavior matches standard library function recipf4. +// +inline const Point3 recipPerElem( const Point3 &pnt ); + +// Compute the absolute value of a 3-D point per element +// +inline const Point3 absPerElem( const Point3 &pnt ); + +// Copy sign from one 3-D point to another, per element +// +inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum of two 3-D points per element +// +inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Minimum of two 3-D points per element +// +inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 ); + +// Maximum element of a 3-D point +// +inline const floatInVec maxElem( const Point3 &pnt ); + +// Minimum element of a 3-D point +// +inline const floatInVec minElem( const Point3 &pnt ); + +// Compute the sum of all elements of a 3-D point +// +inline const floatInVec sum( const Point3 &pnt ); + +// Apply uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 &pnt, float scaleVal ); + +// Apply uniform scale to a 3-D point (scalar data contained in vector data type) +// +inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal ); + +// Apply non-uniform scale to a 3-D point +// +inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec ); + +// Scalar projection of a 3-D point on a unit-length 3-D vector +// +inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec ); + +// Compute the square of the distance of a 3-D point from the coordinate-system origin +// +inline const floatInVec distSqrFromOrigin( const Point3 &pnt ); + +// Compute the distance of a 3-D point from the coordinate-system origin +// +inline const floatInVec distFromOrigin( const Point3 &pnt ); + +// Compute the square of the distance between two 3-D points +// +inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 ); + +// Compute the distance between two 3-D points +// +inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Linear interpolation between two 3-D points (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 ); + +// Conditionally select between two 3-D points +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 ); + +// Conditionally select between two 3-D points (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 ); + +// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word +// +inline void storeXYZ( const Point3 &pnt, __m128 * quad ); + +// Load four three-float 3-D points, stored in three quadwords +// +inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads ); + +// Store four 3-D points in three quadwords +// +inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads ); + +// Store eight 3-D points as half-floats +// +inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3-D point +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 &pnt ); + +// Print a 3-D point and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Point3 &pnt, const char * name ); + +#endif + +// A quaternion in array-of-structures format +// +class Quat +{ + __m128 mVec128; + +public: + // Default constructor; does no initialization + // + inline Quat( ) { }; + + // Construct a quaternion from x, y, z, and w elements + // + inline Quat( float x, float y, float z, float w ); + + // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type) + // + inline Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w ); + + // Construct a quaternion from a 3-D vector and a scalar + // + inline Quat( const Vector3 &xyz, float w ); + + // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type) + // + inline Quat( const Vector3 &xyz, const floatInVec &w ); + + // Copy elements from a 4-D vector into a quaternion + // + explicit inline Quat( const Vector4 &vec ); + + // Convert a rotation matrix to a unit-length quaternion + // + explicit inline Quat( const Matrix3 & rotMat ); + + // Set all elements of a quaternion to the same scalar value + // + explicit inline Quat( float scalar ); + + // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type) + // + explicit inline Quat( const floatInVec &scalar ); + + // Set vector float data in a quaternion + // + explicit inline Quat( __m128 vf4 ); + + // Get vector float data from a quaternion + // + inline __m128 get128( ) const; + + // Assign one quaternion to another + // + inline Quat & operator =( const Quat &quat ); + + // Set the x, y, and z elements of a quaternion + // NOTE: + // This function does not change the w element. + // + inline Quat & setXYZ( const Vector3 &vec ); + + // Get the x, y, and z elements of a quaternion + // + inline const Vector3 getXYZ( ) const; + + // Set the x element of a quaternion + // + inline Quat & setX( float x ); + + // Set the y element of a quaternion + // + inline Quat & setY( float y ); + + // Set the z element of a quaternion + // + inline Quat & setZ( float z ); + + // Set the w element of a quaternion + // + inline Quat & setW( float w ); + + // Set the x element of a quaternion (scalar data contained in vector data type) + // + inline Quat & setX( const floatInVec &x ); + + // Set the y element of a quaternion (scalar data contained in vector data type) + // + inline Quat & setY( const floatInVec &y ); + + // Set the z element of a quaternion (scalar data contained in vector data type) + // + inline Quat & setZ( const floatInVec &z ); + + // Set the w element of a quaternion (scalar data contained in vector data type) + // + inline Quat & setW( const floatInVec &w ); + + // Get the x element of a quaternion + // + inline const floatInVec getX( ) const; + + // Get the y element of a quaternion + // + inline const floatInVec getY( ) const; + + // Get the z element of a quaternion + // + inline const floatInVec getZ( ) const; + + // Get the w element of a quaternion + // + inline const floatInVec getW( ) const; + + // Set an x, y, z, or w element of a quaternion by index + // + inline Quat & setElem( int idx, float value ); + + // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type) + // + inline Quat & setElem( int idx, const floatInVec &value ); + + // Get an x, y, z, or w element of a quaternion by index + // + inline const floatInVec getElem( int idx ) const; + + // Subscripting operator to set or get an element + // + inline VecIdx operator []( int idx ); + + // Subscripting operator to get an element + // + inline const floatInVec operator []( int idx ) const; + + // Add two quaternions + // + inline const Quat operator +( const Quat &quat ) const; + + // Subtract a quaternion from another quaternion + // + inline const Quat operator -( const Quat &quat ) const; + + // Multiply two quaternions + // + inline const Quat operator *( const Quat &quat ) const; + + // Multiply a quaternion by a scalar + // + inline const Quat operator *( float scalar ) const; + + // Divide a quaternion by a scalar + // + inline const Quat operator /( float scalar ) const; + + // Multiply a quaternion by a scalar (scalar data contained in vector data type) + // + inline const Quat operator *( const floatInVec &scalar ) const; + + // Divide a quaternion by a scalar (scalar data contained in vector data type) + // + inline const Quat operator /( const floatInVec &scalar ) const; + + // Perform compound assignment and addition with a quaternion + // + inline Quat & operator +=( const Quat &quat ); + + // Perform compound assignment and subtraction by a quaternion + // + inline Quat & operator -=( const Quat &quat ); + + // Perform compound assignment and multiplication by a quaternion + // + inline Quat & operator *=( const Quat &quat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Quat & operator *=( float scalar ); + + // Perform compound assignment and division by a scalar + // + inline Quat & operator /=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + inline Quat & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and division by a scalar (scalar data contained in vector data type) + // + inline Quat & operator /=( const floatInVec &scalar ); + + // Negate all elements of a quaternion + // + inline const Quat operator -( ) const; + + // Construct an identity quaternion + // + static inline const Quat identity( ); + + // Construct a quaternion to rotate between two unit-length 3-D vectors + // NOTE: + // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions. + // + static inline const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 ); + + // Construct a quaternion to rotate around a unit-length 3-D vector + // + static inline const Quat rotation( float radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static inline const Quat rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a quaternion to rotate around the x axis + // + static inline const Quat rotationX( float radians ); + + // Construct a quaternion to rotate around the y axis + // + static inline const Quat rotationY( float radians ); + + // Construct a quaternion to rotate around the z axis + // + static inline const Quat rotationZ( float radians ); + + // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type) + // + static inline const Quat rotationX( const floatInVec &radians ); + + // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type) + // + static inline const Quat rotationY( const floatInVec &radians ); + + // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type) + // + static inline const Quat rotationZ( const floatInVec &radians ); + +}; + +// Multiply a quaternion by a scalar +// +inline const Quat operator *( float scalar, const Quat &quat ); + +// Multiply a quaternion by a scalar (scalar data contained in vector data type) +// +inline const Quat operator *( const floatInVec &scalar, const Quat &quat ); + +// Compute the conjugate of a quaternion +// +inline const Quat conj( const Quat &quat ); + +// Use a unit-length quaternion to rotate a 3-D vector +// +inline const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec ); + +// Compute the dot product of two quaternions +// +inline const floatInVec dot( const Quat &quat0, const Quat &quat1 ); + +// Compute the norm of a quaternion +// +inline const floatInVec norm( const Quat &quat ); + +// Compute the length of a quaternion +// +inline const floatInVec length( const Quat &quat ); + +// Normalize a quaternion +// NOTE: +// The result is unpredictable when all elements of quat are at or near zero. +// +inline const Quat normalize( const Quat &quat ); + +// Linear interpolation between two quaternions +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 ); + +// Linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Does not clamp t between 0 and 1. +// +inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 ); + +// Spherical linear interpolation between two quaternions +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical linear interpolation between two quaternions (scalar data contained in vector data type) +// NOTE: +// Interpolates along the shortest path between orientations. +// Does not clamp t between 0 and 1. +// +inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 ); + +// Spherical quadrangle interpolation +// +inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Spherical quadrangle interpolation (scalar data contained in vector data type) +// +inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 ); + +// Conditionally select between two quaternions +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 ); + +// Conditionally select between two quaternions (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a quaternion +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat &quat ); + +// Print a quaternion and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Quat &quat, const char * name ); + +#endif + +// A 3x3 matrix in array-of-structures format +// +class Matrix3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + +public: + // Default constructor; does no initialization + // + inline Matrix3( ) { }; + + // Copy a 3x3 matrix + // + inline Matrix3( const Matrix3 & mat ); + + // Construct a 3x3 matrix containing the specified columns + // + inline Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 ); + + // Construct a 3x3 rotation matrix from a unit-length quaternion + // + explicit inline Matrix3( const Quat &unitQuat ); + + // Set all elements of a 3x3 matrix to the same scalar value + // + explicit inline Matrix3( float scalar ); + + // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit inline Matrix3( const floatInVec &scalar ); + + // Assign one 3x3 matrix to another + // + inline Matrix3 & operator =( const Matrix3 & mat ); + + // Set column 0 of a 3x3 matrix + // + inline Matrix3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x3 matrix + // + inline Matrix3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x3 matrix + // + inline Matrix3 & setCol2( const Vector3 &col2 ); + + // Get column 0 of a 3x3 matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x3 matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x3 matrix + // + inline const Vector3 getCol2( ) const; + + // Set the column of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x3 matrix referred to by the specified index + // + inline Matrix3 & setRow( int row, const Vector3 &vec ); + + // Get the column of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x3 matrix referred to by the specified index + // + inline const Vector3 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x3 matrix referred to by column and row indices + // + inline Matrix3 & setElem( int col, int row, float val ); + + // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type) + // + inline Matrix3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x3 matrix referred to by column and row indices + // + inline const floatInVec getElem( int col, int row ) const; + + // Add two 3x3 matrices + // + inline const Matrix3 operator +( const Matrix3 & mat ) const; + + // Subtract a 3x3 matrix from another 3x3 matrix + // + inline const Matrix3 operator -( const Matrix3 & mat ) const; + + // Negate all elements of a 3x3 matrix + // + inline const Matrix3 operator -( ) const; + + // Multiply a 3x3 matrix by a scalar + // + inline const Matrix3 operator *( float scalar ) const; + + // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) + // + inline const Matrix3 operator *( const floatInVec &scalar ) const; + + // Multiply a 3x3 matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply two 3x3 matrices + // + inline const Matrix3 operator *( const Matrix3 & mat ) const; + + // Perform compound assignment and addition with a 3x3 matrix + // + inline Matrix3 & operator +=( const Matrix3 & mat ); + + // Perform compound assignment and subtraction by a 3x3 matrix + // + inline Matrix3 & operator -=( const Matrix3 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix3 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + inline Matrix3 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 3x3 matrix + // + inline Matrix3 & operator *=( const Matrix3 & mat ); + + // Construct an identity 3x3 matrix + // + static inline const Matrix3 identity( ); + + // Construct a 3x3 matrix to rotate around the x axis + // + static inline const Matrix3 rotationX( float radians ); + + // Construct a 3x3 matrix to rotate around the y axis + // + static inline const Matrix3 rotationY( float radians ); + + // Construct a 3x3 matrix to rotate around the z axis + // + static inline const Matrix3 rotationZ( float radians ); + + // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static inline const Matrix3 rotationX( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static inline const Matrix3 rotationY( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static inline const Matrix3 rotationZ( const floatInVec &radians ); + + // Construct a 3x3 matrix to rotate around the x, y, and z axes + // + static inline const Matrix3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static inline const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix3 rotation( const Quat &unitQuat ); + + // Construct a 3x3 matrix to perform scaling + // + static inline const Matrix3 scale( const Vector3 &scaleVec ); + +}; +// Multiply a 3x3 matrix by a scalar +// +inline const Matrix3 operator *( float scalar, const Matrix3 & mat ); + +// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type) +// +inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat ); + +// Append (post-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x3 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat ); + +// Multiply two 3x3 matrices per element +// +inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 ); + +// Compute the absolute value of a 3x3 matrix per element +// +inline const Matrix3 absPerElem( const Matrix3 & mat ); + +// Transpose of a 3x3 matrix +// +inline const Matrix3 transpose( const Matrix3 & mat ); + +// Compute the inverse of a 3x3 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix3 inverse( const Matrix3 & mat ); + +// Determinant of a 3x3 matrix +// +inline const floatInVec determinant( const Matrix3 & mat ); + +// Conditionally select between two 3x3 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 ); + +// Conditionally select between two 3x3 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x3 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat ); + +// Print a 3x3 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix3 & mat, const char * name ); + +#endif + +// A 4x4 matrix in array-of-structures format +// +class Matrix4 +{ + Vector4 mCol0; + Vector4 mCol1; + Vector4 mCol2; + Vector4 mCol3; + +public: + // Default constructor; does no initialization + // + inline Matrix4( ) { }; + + // Copy a 4x4 matrix + // + inline Matrix4( const Matrix4 & mat ); + + // Construct a 4x4 matrix containing the specified columns + // + inline Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 ); + + // Construct a 4x4 matrix from a 3x4 transformation matrix + // + explicit inline Matrix4( const Transform3 & mat ); + + // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector + // + inline Matrix4( const Matrix3 & mat, const Vector3 &translateVec ); + + // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector + // + inline Matrix4( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 4x4 matrix to the same scalar value + // + explicit inline Matrix4( float scalar ); + + // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type) + // + explicit inline Matrix4( const floatInVec &scalar ); + + // Assign one 4x4 matrix to another + // + inline Matrix4 & operator =( const Matrix4 & mat ); + + // Set the upper-left 3x3 submatrix + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 4x4 matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // NOTE: + // This function does not change the bottom row elements. + // + inline Matrix4 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 4x4 matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 4x4 matrix + // + inline Matrix4 & setCol0( const Vector4 &col0 ); + + // Set column 1 of a 4x4 matrix + // + inline Matrix4 & setCol1( const Vector4 &col1 ); + + // Set column 2 of a 4x4 matrix + // + inline Matrix4 & setCol2( const Vector4 &col2 ); + + // Set column 3 of a 4x4 matrix + // + inline Matrix4 & setCol3( const Vector4 &col3 ); + + // Get column 0 of a 4x4 matrix + // + inline const Vector4 getCol0( ) const; + + // Get column 1 of a 4x4 matrix + // + inline const Vector4 getCol1( ) const; + + // Get column 2 of a 4x4 matrix + // + inline const Vector4 getCol2( ) const; + + // Get column 3 of a 4x4 matrix + // + inline const Vector4 getCol3( ) const; + + // Set the column of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setCol( int col, const Vector4 &vec ); + + // Set the row of a 4x4 matrix referred to by the specified index + // + inline Matrix4 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getCol( int col ) const; + + // Get the row of a 4x4 matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector4 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector4 operator []( int col ) const; + + // Set the element of a 4x4 matrix referred to by column and row indices + // + inline Matrix4 & setElem( int col, int row, float val ); + + // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type) + // + inline Matrix4 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 4x4 matrix referred to by column and row indices + // + inline const floatInVec getElem( int col, int row ) const; + + // Add two 4x4 matrices + // + inline const Matrix4 operator +( const Matrix4 & mat ) const; + + // Subtract a 4x4 matrix from another 4x4 matrix + // + inline const Matrix4 operator -( const Matrix4 & mat ) const; + + // Negate all elements of a 4x4 matrix + // + inline const Matrix4 operator -( ) const; + + // Multiply a 4x4 matrix by a scalar + // + inline const Matrix4 operator *( float scalar ) const; + + // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) + // + inline const Matrix4 operator *( const floatInVec &scalar ) const; + + // Multiply a 4x4 matrix by a 4-D vector + // + inline const Vector4 operator *( const Vector4 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D vector + // + inline const Vector4 operator *( const Vector3 &vec ) const; + + // Multiply a 4x4 matrix by a 3-D point + // + inline const Vector4 operator *( const Point3 &pnt ) const; + + // Multiply two 4x4 matrices + // + inline const Matrix4 operator *( const Matrix4 & mat ) const; + + // Multiply a 4x4 matrix by a 3x4 transformation matrix + // + inline const Matrix4 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and addition with a 4x4 matrix + // + inline Matrix4 & operator +=( const Matrix4 & mat ); + + // Perform compound assignment and subtraction by a 4x4 matrix + // + inline Matrix4 & operator -=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a scalar + // + inline Matrix4 & operator *=( float scalar ); + + // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type) + // + inline Matrix4 & operator *=( const floatInVec &scalar ); + + // Perform compound assignment and multiplication by a 4x4 matrix + // + inline Matrix4 & operator *=( const Matrix4 & mat ); + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Matrix4 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 4x4 matrix + // + static inline const Matrix4 identity( ); + + // Construct a 4x4 matrix to rotate around the x axis + // + static inline const Matrix4 rotationX( float radians ); + + // Construct a 4x4 matrix to rotate around the y axis + // + static inline const Matrix4 rotationY( float radians ); + + // Construct a 4x4 matrix to rotate around the z axis + // + static inline const Matrix4 rotationZ( float radians ); + + // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type) + // + static inline const Matrix4 rotationX( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type) + // + static inline const Matrix4 rotationY( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type) + // + static inline const Matrix4 rotationZ( const floatInVec &radians ); + + // Construct a 4x4 matrix to rotate around the x, y, and z axes + // + static inline const Matrix4 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector + // + static inline const Matrix4 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static inline const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Matrix4 rotation( const Quat &unitQuat ); + + // Construct a 4x4 matrix to perform scaling + // + static inline const Matrix4 scale( const Vector3 &scaleVec ); + + // Construct a 4x4 matrix to perform translation + // + static inline const Matrix4 translation( const Vector3 &translateVec ); + + // Construct viewing matrix based on eye, position looked at, and up direction + // + static inline const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec ); + + // Construct a perspective projection matrix + // + static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar ); + + // Construct a perspective projection matrix based on frustum + // + static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar ); + + // Construct an orthographic projection matrix + // + static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar ); + +}; +// Multiply a 4x4 matrix by a scalar +// +inline const Matrix4 operator *( float scalar, const Matrix4 & mat ); + +// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type) +// +inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat ); + +// Append (post-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 4x4 matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat ); + +// Multiply two 4x4 matrices per element +// +inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 ); + +// Compute the absolute value of a 4x4 matrix per element +// +inline const Matrix4 absPerElem( const Matrix4 & mat ); + +// Transpose of a 4x4 matrix +// +inline const Matrix4 transpose( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix +// NOTE: +// Result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 inverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. The result is unpredictable when the determinant of mat is equal to or near 0. +// +inline const Matrix4 affineInverse( const Matrix4 & mat ); + +// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions. +// +inline const Matrix4 orthoInverse( const Matrix4 & mat ); + +// Determinant of a 4x4 matrix +// +inline const floatInVec determinant( const Matrix4 & mat ); + +// Conditionally select between two 4x4 matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 ); + +// Conditionally select between two 4x4 matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 4x4 matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat ); + +// Print a 4x4 matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Matrix4 & mat, const char * name ); + +#endif + +// A 3x4 transformation matrix in array-of-structures format +// +class Transform3 +{ + Vector3 mCol0; + Vector3 mCol1; + Vector3 mCol2; + Vector3 mCol3; + +public: + // Default constructor; does no initialization + // + inline Transform3( ) { }; + + // Copy a 3x4 transformation matrix + // + inline Transform3( const Transform3 & tfrm ); + + // Construct a 3x4 transformation matrix containing the specified columns + // + inline Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 ); + + // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector + // + inline Transform3( const Matrix3 & tfrm, const Vector3 &translateVec ); + + // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector + // + inline Transform3( const Quat &unitQuat, const Vector3 &translateVec ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value + // + explicit inline Transform3( float scalar ); + + // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type) + // + explicit inline Transform3( const floatInVec &scalar ); + + // Assign one 3x4 transformation matrix to another + // + inline Transform3 & operator =( const Transform3 & tfrm ); + + // Set the upper-left 3x3 submatrix + // + inline Transform3 & setUpper3x3( const Matrix3 & mat3 ); + + // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix + // + inline const Matrix3 getUpper3x3( ) const; + + // Set translation component + // + inline Transform3 & setTranslation( const Vector3 &translateVec ); + + // Get the translation component of a 3x4 transformation matrix + // + inline const Vector3 getTranslation( ) const; + + // Set column 0 of a 3x4 transformation matrix + // + inline Transform3 & setCol0( const Vector3 &col0 ); + + // Set column 1 of a 3x4 transformation matrix + // + inline Transform3 & setCol1( const Vector3 &col1 ); + + // Set column 2 of a 3x4 transformation matrix + // + inline Transform3 & setCol2( const Vector3 &col2 ); + + // Set column 3 of a 3x4 transformation matrix + // + inline Transform3 & setCol3( const Vector3 &col3 ); + + // Get column 0 of a 3x4 transformation matrix + // + inline const Vector3 getCol0( ) const; + + // Get column 1 of a 3x4 transformation matrix + // + inline const Vector3 getCol1( ) const; + + // Get column 2 of a 3x4 transformation matrix + // + inline const Vector3 getCol2( ) const; + + // Get column 3 of a 3x4 transformation matrix + // + inline const Vector3 getCol3( ) const; + + // Set the column of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setCol( int col, const Vector3 &vec ); + + // Set the row of a 3x4 transformation matrix referred to by the specified index + // + inline Transform3 & setRow( int row, const Vector4 &vec ); + + // Get the column of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector3 getCol( int col ) const; + + // Get the row of a 3x4 transformation matrix referred to by the specified index + // + inline const Vector4 getRow( int row ) const; + + // Subscripting operator to set or get a column + // + inline Vector3 & operator []( int col ); + + // Subscripting operator to get a column + // + inline const Vector3 operator []( int col ) const; + + // Set the element of a 3x4 transformation matrix referred to by column and row indices + // + inline Transform3 & setElem( int col, int row, float val ); + + // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type) + // + inline Transform3 & setElem( int col, int row, const floatInVec &val ); + + // Get the element of a 3x4 transformation matrix referred to by column and row indices + // + inline const floatInVec getElem( int col, int row ) const; + + // Multiply a 3x4 transformation matrix by a 3-D vector + // + inline const Vector3 operator *( const Vector3 &vec ) const; + + // Multiply a 3x4 transformation matrix by a 3-D point + // + inline const Point3 operator *( const Point3 &pnt ) const; + + // Multiply two 3x4 transformation matrices + // + inline const Transform3 operator *( const Transform3 & tfrm ) const; + + // Perform compound assignment and multiplication by a 3x4 transformation matrix + // + inline Transform3 & operator *=( const Transform3 & tfrm ); + + // Construct an identity 3x4 transformation matrix + // + static inline const Transform3 identity( ); + + // Construct a 3x4 transformation matrix to rotate around the x axis + // + static inline const Transform3 rotationX( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis + // + static inline const Transform3 rotationY( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis + // + static inline const Transform3 rotationZ( float radians ); + + // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type) + // + static inline const Transform3 rotationX( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type) + // + static inline const Transform3 rotationY( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type) + // + static inline const Transform3 rotationZ( const floatInVec &radians ); + + // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes + // + static inline const Transform3 rotationZYX( const Vector3 &radiansXYZ ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector + // + static inline const Transform3 rotation( float radians, const Vector3 &unitVec ); + + // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type) + // + static inline const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec ); + + // Construct a rotation matrix from a unit-length quaternion + // + static inline const Transform3 rotation( const Quat &unitQuat ); + + // Construct a 3x4 transformation matrix to perform scaling + // + static inline const Transform3 scale( const Vector3 &scaleVec ); + + // Construct a 3x4 transformation matrix to perform translation + // + static inline const Transform3 translation( const Vector3 &translateVec ); + +}; +// Append (post-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec ); + +// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix +// NOTE: +// Faster than creating and multiplying a scale transformation matrix. +// +inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm ); + +// Multiply two 3x4 transformation matrices per element +// +inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 ); + +// Compute the absolute value of a 3x4 transformation matrix per element +// +inline const Transform3 absPerElem( const Transform3 & tfrm ); + +// Inverse of a 3x4 transformation matrix +// NOTE: +// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0. +// +inline const Transform3 inverse( const Transform3 & tfrm ); + +// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix +// NOTE: +// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions. +// +inline const Transform3 orthoInverse( const Transform3 & tfrm ); + +// Conditionally select between two 3x4 transformation matrices +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// However, the transfer of select1 to a VMX register may use more processing time than a branch. +// Use the boolInVec version for better performance. +// +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 ); + +// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type) +// NOTE: +// This function uses a conditional select instruction to avoid a branch. +// +inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 ); + +#ifdef _VECTORMATH_DEBUG + +// Print a 3x4 transformation matrix +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm ); + +// Print a 3x4 transformation matrix and an associated string identifier +// NOTE: +// Function is only defined when _VECTORMATH_DEBUG is defined. +// +inline void print( const Transform3 & tfrm, const char * name ); + +#endif + +} // namespace Aos +} // namespace Vectormath + +#include "vec_aos.h" +#include "quat_aos.h" +#include "mat_aos.h" + +#endif