bt->b3
This commit is contained in:
879
src/Bullet3Common/b3Quaternion.h
Normal file
879
src/Bullet3Common/b3Quaternion.h
Normal file
@@ -0,0 +1,879 @@
|
||||
/*
|
||||
Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef BT_SIMD__QUATERNION_H_
|
||||
#define BT_SIMD__QUATERNION_H_
|
||||
|
||||
|
||||
#include "b3Vector3.h"
|
||||
#include "b3QuadWord.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef BT_USE_SSE
|
||||
|
||||
const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
|
||||
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
|
||||
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
|
||||
|
||||
#endif
|
||||
|
||||
/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
|
||||
class b3Quaternion : public b3QuadWord {
|
||||
public:
|
||||
/**@brief No initialization constructor */
|
||||
b3Quaternion() {}
|
||||
|
||||
#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
|
||||
// Set Vector
|
||||
SIMD_FORCE_INLINE b3Quaternion(const btSimdFloat4 vec)
|
||||
{
|
||||
mVec128 = vec;
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
SIMD_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
|
||||
{
|
||||
mVec128 = rhs.mVec128;
|
||||
}
|
||||
|
||||
// Assignment Operator
|
||||
SIMD_FORCE_INLINE b3Quaternion&
|
||||
operator=(const b3Quaternion& v)
|
||||
{
|
||||
mVec128 = v.mVec128;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// template <typename b3Scalar>
|
||||
// explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
|
||||
/**@brief Constructor from scalars */
|
||||
b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
|
||||
: b3QuadWord(_x, _y, _z, _w)
|
||||
{
|
||||
btAssert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
|
||||
}
|
||||
/**@brief Axis angle Constructor
|
||||
* @param axis The axis which the rotation is around
|
||||
* @param angle The magnitude of the rotation around the angle (Radians) */
|
||||
b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
|
||||
{
|
||||
setRotation(_axis, _angle);
|
||||
}
|
||||
/**@brief Constructor from Euler angles
|
||||
* @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
|
||||
* @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
|
||||
* @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
|
||||
b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
|
||||
{
|
||||
#ifndef BT_EULER_DEFAULT_ZYX
|
||||
setEuler(yaw, pitch, roll);
|
||||
#else
|
||||
setEulerZYX(yaw, pitch, roll);
|
||||
#endif
|
||||
}
|
||||
/**@brief Set the rotation using axis angle notation
|
||||
* @param axis The axis around which to rotate
|
||||
* @param angle The magnitude of the rotation in Radians */
|
||||
void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
|
||||
{
|
||||
b3Scalar d = axis.length();
|
||||
btAssert(d != b3Scalar(0.0));
|
||||
b3Scalar s = btSin(_angle * b3Scalar(0.5)) / d;
|
||||
setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
|
||||
btCos(_angle * b3Scalar(0.5)));
|
||||
}
|
||||
/**@brief Set the quaternion using Euler angles
|
||||
* @param yaw Angle around Y
|
||||
* @param pitch Angle around X
|
||||
* @param roll Angle around Z */
|
||||
void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
|
||||
{
|
||||
b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
|
||||
b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
|
||||
b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
|
||||
b3Scalar cosYaw = btCos(halfYaw);
|
||||
b3Scalar sinYaw = btSin(halfYaw);
|
||||
b3Scalar cosPitch = btCos(halfPitch);
|
||||
b3Scalar sinPitch = btSin(halfPitch);
|
||||
b3Scalar cosRoll = btCos(halfRoll);
|
||||
b3Scalar sinRoll = btSin(halfRoll);
|
||||
setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
|
||||
cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
|
||||
sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
|
||||
cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
|
||||
}
|
||||
/**@brief Set the quaternion using euler angles
|
||||
* @param yaw Angle around Z
|
||||
* @param pitch Angle around Y
|
||||
* @param roll Angle around X */
|
||||
void setEulerZYX(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
|
||||
{
|
||||
b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
|
||||
b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
|
||||
b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
|
||||
b3Scalar cosYaw = btCos(halfYaw);
|
||||
b3Scalar sinYaw = btSin(halfYaw);
|
||||
b3Scalar cosPitch = btCos(halfPitch);
|
||||
b3Scalar sinPitch = btSin(halfPitch);
|
||||
b3Scalar cosRoll = btCos(halfRoll);
|
||||
b3Scalar sinRoll = btSin(halfRoll);
|
||||
setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
|
||||
cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
|
||||
cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
|
||||
cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
|
||||
}
|
||||
/**@brief Add two quaternions
|
||||
* @param q The quaternion to add to this one */
|
||||
SIMD_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
mVec128 = _mm_add_ps(mVec128, q.mVec128);
|
||||
#elif defined(BT_USE_NEON)
|
||||
mVec128 = vaddq_f32(mVec128, q.mVec128);
|
||||
#else
|
||||
m_floats[0] += q.getX();
|
||||
m_floats[1] += q.getY();
|
||||
m_floats[2] += q.getZ();
|
||||
m_floats[3] += q.m_floats[3];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**@brief Subtract out a quaternion
|
||||
* @param q The quaternion to subtract from this one */
|
||||
b3Quaternion& operator-=(const b3Quaternion& q)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
mVec128 = _mm_sub_ps(mVec128, q.mVec128);
|
||||
#elif defined(BT_USE_NEON)
|
||||
mVec128 = vsubq_f32(mVec128, q.mVec128);
|
||||
#else
|
||||
m_floats[0] -= q.getX();
|
||||
m_floats[1] -= q.getY();
|
||||
m_floats[2] -= q.getZ();
|
||||
m_floats[3] -= q.m_floats[3];
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**@brief Scale this quaternion
|
||||
* @param s The scalar to scale by */
|
||||
b3Quaternion& operator*=(const b3Scalar& s)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vs = _mm_load_ss(&s); // (S 0 0 0)
|
||||
vs = bt_pshufd_ps(vs, 0); // (S S S S)
|
||||
mVec128 = _mm_mul_ps(mVec128, vs);
|
||||
#elif defined(BT_USE_NEON)
|
||||
mVec128 = vmulq_n_f32(mVec128, s);
|
||||
#else
|
||||
m_floats[0] *= s;
|
||||
m_floats[1] *= s;
|
||||
m_floats[2] *= s;
|
||||
m_floats[3] *= s;
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**@brief Multiply this quaternion by q on the right
|
||||
* @param q The other quaternion
|
||||
* Equivilant to this = this * q */
|
||||
b3Quaternion& operator*=(const b3Quaternion& q)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vQ2 = q.get128();
|
||||
|
||||
__m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
|
||||
__m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
|
||||
|
||||
A1 = A1 * B1;
|
||||
|
||||
__m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
|
||||
__m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
|
||||
|
||||
A2 = A2 * B2;
|
||||
|
||||
B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
|
||||
B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
|
||||
|
||||
B1 = B1 * B2; // A3 *= B3
|
||||
|
||||
mVec128 = bt_splat_ps(mVec128, 3); // A0
|
||||
mVec128 = mVec128 * vQ2; // A0 * B0
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
mVec128 = mVec128+ A1; // AB03 + AB12
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
||||
float32x4_t vQ1 = mVec128;
|
||||
float32x4_t vQ2 = q.get128();
|
||||
float32x4_t A0, A1, B1, A2, B2, A3, B3;
|
||||
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
|
||||
|
||||
{
|
||||
float32x2x2_t tmp;
|
||||
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
|
||||
vQ1zx = tmp.val[0];
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
|
||||
vQ2zx = tmp.val[0];
|
||||
}
|
||||
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
|
||||
|
||||
vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
|
||||
|
||||
vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
|
||||
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
|
||||
|
||||
A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
|
||||
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
|
||||
|
||||
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
|
||||
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
|
||||
|
||||
A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
|
||||
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
|
||||
|
||||
A1 = vmulq_f32(A1, B1);
|
||||
A2 = vmulq_f32(A2, B2);
|
||||
A3 = vmulq_f32(A3, B3); // A3 *= B3
|
||||
A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
|
||||
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A0 = vaddq_f32(A0, A1); // AB03 + AB12
|
||||
|
||||
mVec128 = A0;
|
||||
#else
|
||||
setValue(
|
||||
m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
|
||||
m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
|
||||
m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
|
||||
m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
/**@brief Return the dot product between this quaternion and another
|
||||
* @param q The other quaternion */
|
||||
b3Scalar dot(const b3Quaternion& q) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vd;
|
||||
|
||||
vd = _mm_mul_ps(mVec128, q.mVec128);
|
||||
|
||||
__m128 t = _mm_movehl_ps(vd, vd);
|
||||
vd = _mm_add_ps(vd, t);
|
||||
t = _mm_shuffle_ps(vd, vd, 0x55);
|
||||
vd = _mm_add_ss(vd, t);
|
||||
|
||||
return _mm_cvtss_f32(vd);
|
||||
#elif defined(BT_USE_NEON)
|
||||
float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
|
||||
float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
|
||||
x = vpadd_f32(x, x);
|
||||
return vget_lane_f32(x, 0);
|
||||
#else
|
||||
return m_floats[0] * q.getX() +
|
||||
m_floats[1] * q.getY() +
|
||||
m_floats[2] * q.getZ() +
|
||||
m_floats[3] * q.m_floats[3];
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return the length squared of the quaternion */
|
||||
b3Scalar length2() const
|
||||
{
|
||||
return dot(*this);
|
||||
}
|
||||
|
||||
/**@brief Return the length of the quaternion */
|
||||
b3Scalar length() const
|
||||
{
|
||||
return btSqrt(length2());
|
||||
}
|
||||
|
||||
/**@brief Normalize the quaternion
|
||||
* Such that x^2 + y^2 + z^2 +w^2 = 1 */
|
||||
b3Quaternion& normalize()
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vd;
|
||||
|
||||
vd = _mm_mul_ps(mVec128, mVec128);
|
||||
|
||||
__m128 t = _mm_movehl_ps(vd, vd);
|
||||
vd = _mm_add_ps(vd, t);
|
||||
t = _mm_shuffle_ps(vd, vd, 0x55);
|
||||
vd = _mm_add_ss(vd, t);
|
||||
|
||||
vd = _mm_sqrt_ss(vd);
|
||||
vd = _mm_div_ss(vOnes, vd);
|
||||
vd = bt_pshufd_ps(vd, 0); // splat
|
||||
mVec128 = _mm_mul_ps(mVec128, vd);
|
||||
|
||||
return *this;
|
||||
#else
|
||||
return *this /= length();
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return a scaled version of this quaternion
|
||||
* @param s The scale factor */
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator*(const b3Scalar& s) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vs = _mm_load_ss(&s); // (S 0 0 0)
|
||||
vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
|
||||
|
||||
return b3Quaternion(_mm_mul_ps(mVec128, vs));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Quaternion(vmulq_n_f32(mVec128, s));
|
||||
#else
|
||||
return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return an inversely scaled versionof this quaternion
|
||||
* @param s The inverse scale factor */
|
||||
b3Quaternion operator/(const b3Scalar& s) const
|
||||
{
|
||||
btAssert(s != b3Scalar(0.0));
|
||||
return *this * (b3Scalar(1.0) / s);
|
||||
}
|
||||
|
||||
/**@brief Inversely scale this quaternion
|
||||
* @param s The scale factor */
|
||||
b3Quaternion& operator/=(const b3Scalar& s)
|
||||
{
|
||||
btAssert(s != b3Scalar(0.0));
|
||||
return *this *= b3Scalar(1.0) / s;
|
||||
}
|
||||
|
||||
/**@brief Return a normalized version of this quaternion */
|
||||
b3Quaternion normalized() const
|
||||
{
|
||||
return *this / length();
|
||||
}
|
||||
/**@brief Return the angle between this quaternion and the other
|
||||
* @param q The other quaternion */
|
||||
b3Scalar angle(const b3Quaternion& q) const
|
||||
{
|
||||
b3Scalar s = btSqrt(length2() * q.length2());
|
||||
btAssert(s != b3Scalar(0.0));
|
||||
return btAcos(dot(q) / s);
|
||||
}
|
||||
/**@brief Return the angle of rotation represented by this quaternion */
|
||||
b3Scalar getAngle() const
|
||||
{
|
||||
b3Scalar s = b3Scalar(2.) * btAcos(m_floats[3]);
|
||||
return s;
|
||||
}
|
||||
|
||||
/**@brief Return the axis of the rotation represented by this quaternion */
|
||||
b3Vector3 getAxis() const
|
||||
{
|
||||
b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
|
||||
|
||||
if (s_squared < b3Scalar(10.) * SIMD_EPSILON) //Check for divide by zero
|
||||
return b3Vector3(1.0, 0.0, 0.0); // Arbitrary
|
||||
b3Scalar s = 1.f/btSqrt(s_squared);
|
||||
return b3Vector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
|
||||
}
|
||||
|
||||
/**@brief Return the inverse of this quaternion */
|
||||
b3Quaternion inverse() const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return b3Quaternion(_mm_xor_ps(mVec128, vQInv));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Quaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
|
||||
#else
|
||||
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return the sum of this quaternion and the other
|
||||
* @param q2 The other quaternion */
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator+(const b3Quaternion& q2) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
|
||||
#else
|
||||
const b3Quaternion& q1 = *this;
|
||||
return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return the difference between this quaternion and the other
|
||||
* @param q2 The other quaternion */
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator-(const b3Quaternion& q2) const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
|
||||
#else
|
||||
const b3Quaternion& q1 = *this;
|
||||
return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Return the negative of this quaternion
|
||||
* This simply negates each element */
|
||||
SIMD_FORCE_INLINE b3Quaternion operator-() const
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return b3Quaternion(_mm_xor_ps(mVec128, btvMzeroMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Quaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
|
||||
#else
|
||||
const b3Quaternion& q2 = *this;
|
||||
return b3Quaternion( - q2.getX(), - q2.getY(), - q2.getZ(), - q2.m_floats[3]);
|
||||
#endif
|
||||
}
|
||||
/**@todo document this and it's use */
|
||||
SIMD_FORCE_INLINE b3Quaternion farthest( const b3Quaternion& qd) const
|
||||
{
|
||||
b3Quaternion diff,sum;
|
||||
diff = *this - qd;
|
||||
sum = *this + qd;
|
||||
if( diff.dot(diff) > sum.dot(sum) )
|
||||
return qd;
|
||||
return (-qd);
|
||||
}
|
||||
|
||||
/**@todo document this and it's use */
|
||||
SIMD_FORCE_INLINE b3Quaternion nearest( const b3Quaternion& qd) const
|
||||
{
|
||||
b3Quaternion diff,sum;
|
||||
diff = *this - qd;
|
||||
sum = *this + qd;
|
||||
if( diff.dot(diff) < sum.dot(sum) )
|
||||
return qd;
|
||||
return (-qd);
|
||||
}
|
||||
|
||||
|
||||
/**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
|
||||
* @param q The other quaternion to interpolate with
|
||||
* @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
|
||||
* Slerp interpolates assuming constant velocity. */
|
||||
b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
|
||||
{
|
||||
b3Scalar magnitude = btSqrt(length2() * q.length2());
|
||||
btAssert(magnitude > b3Scalar(0));
|
||||
|
||||
b3Scalar product = dot(q) / magnitude;
|
||||
if (btFabs(product) < b3Scalar(1))
|
||||
{
|
||||
// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
|
||||
const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
|
||||
|
||||
const b3Scalar theta = btAcos(sign * product);
|
||||
const b3Scalar s1 = btSin(sign * t * theta);
|
||||
const b3Scalar d = b3Scalar(1.0) / btSin(theta);
|
||||
const b3Scalar s0 = btSin((b3Scalar(1.0) - t) * theta);
|
||||
|
||||
return b3Quaternion(
|
||||
(m_floats[0] * s0 + q.getX() * s1) * d,
|
||||
(m_floats[1] * s0 + q.getY() * s1) * d,
|
||||
(m_floats[2] * s0 + q.getZ() * s1) * d,
|
||||
(m_floats[3] * s0 + q.m_floats[3] * s1) * d);
|
||||
}
|
||||
else
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
static const b3Quaternion& getIdentity()
|
||||
{
|
||||
static const b3Quaternion identityQuat(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.),b3Scalar(1.));
|
||||
return identityQuat;
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**@brief Return the product of two quaternions */
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator*(const b3Quaternion& q1, const b3Quaternion& q2)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vQ1 = q1.get128();
|
||||
__m128 vQ2 = q2.get128();
|
||||
__m128 A0, A1, B1, A2, B2;
|
||||
|
||||
A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
|
||||
B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
|
||||
|
||||
A1 = A1 * B1;
|
||||
|
||||
A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
|
||||
B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
|
||||
|
||||
A2 = A2 * B2;
|
||||
|
||||
B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
|
||||
B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
|
||||
|
||||
B1 = B1 * B2; // A3 *= B3
|
||||
|
||||
A0 = bt_splat_ps(vQ1, 3); // A0
|
||||
A0 = A0 * vQ2; // A0 * B0
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
A0 = A0 - B1; // AB03 = AB0 - AB3
|
||||
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A0 = A0 + A1; // AB03 + AB12
|
||||
|
||||
return b3Quaternion(A0);
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
||||
float32x4_t vQ1 = q1.get128();
|
||||
float32x4_t vQ2 = q2.get128();
|
||||
float32x4_t A0, A1, B1, A2, B2, A3, B3;
|
||||
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
|
||||
|
||||
{
|
||||
float32x2x2_t tmp;
|
||||
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
|
||||
vQ1zx = tmp.val[0];
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
|
||||
vQ2zx = tmp.val[0];
|
||||
}
|
||||
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
|
||||
|
||||
vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
|
||||
|
||||
vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
|
||||
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
|
||||
|
||||
A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
|
||||
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
|
||||
|
||||
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
|
||||
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
|
||||
|
||||
A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
|
||||
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
|
||||
|
||||
A1 = vmulq_f32(A1, B1);
|
||||
A2 = vmulq_f32(A2, B2);
|
||||
A3 = vmulq_f32(A3, B3); // A3 *= B3
|
||||
A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
|
||||
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A0 = vaddq_f32(A0, A1); // AB03 + AB12
|
||||
|
||||
return b3Quaternion(A0);
|
||||
|
||||
#else
|
||||
return b3Quaternion(
|
||||
q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
|
||||
q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
|
||||
q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
|
||||
q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator*(const b3Quaternion& q, const b3Vector3& w)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vQ1 = q.get128();
|
||||
__m128 vQ2 = w.get128();
|
||||
__m128 A1, B1, A2, B2, A3, B3;
|
||||
|
||||
A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
|
||||
B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
|
||||
|
||||
A1 = A1 * B1;
|
||||
|
||||
A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
|
||||
B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
|
||||
|
||||
A2 = A2 * B2;
|
||||
|
||||
A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
|
||||
B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
|
||||
|
||||
A3 = A3 * B3; // A3 *= B3
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = A1 - A3; // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
||||
float32x4_t vQ1 = q.get128();
|
||||
float32x4_t vQ2 = w.get128();
|
||||
float32x4_t A1, B1, A2, B2, A3, B3;
|
||||
float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
|
||||
|
||||
vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
|
||||
{
|
||||
float32x2x2_t tmp;
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
|
||||
vQ2zx = tmp.val[0];
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
|
||||
vQ1zx = tmp.val[0];
|
||||
}
|
||||
|
||||
vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
|
||||
|
||||
vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
|
||||
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
|
||||
|
||||
A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
|
||||
B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
|
||||
|
||||
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
|
||||
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
|
||||
|
||||
A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
|
||||
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
|
||||
|
||||
A1 = vmulq_f32(A1, B1);
|
||||
A2 = vmulq_f32(A2, B2);
|
||||
A3 = vmulq_f32(A3, B3); // A3 *= B3
|
||||
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
|
||||
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
|
||||
#else
|
||||
return b3Quaternion(
|
||||
q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
|
||||
q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
|
||||
q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
|
||||
-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
operator*(const b3Vector3& w, const b3Quaternion& q)
|
||||
{
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
__m128 vQ1 = w.get128();
|
||||
__m128 vQ2 = q.get128();
|
||||
__m128 A1, B1, A2, B2, A3, B3;
|
||||
|
||||
A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
|
||||
B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
|
||||
|
||||
A1 = A1 * B1;
|
||||
|
||||
A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
|
||||
B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
|
||||
|
||||
A2 = A2 *B2;
|
||||
|
||||
A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
|
||||
B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
|
||||
|
||||
A3 = A3 * B3; // A3 *= B3
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = A1 - A3; // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
|
||||
#elif defined(BT_USE_NEON)
|
||||
|
||||
float32x4_t vQ1 = w.get128();
|
||||
float32x4_t vQ2 = q.get128();
|
||||
float32x4_t A1, B1, A2, B2, A3, B3;
|
||||
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
|
||||
|
||||
{
|
||||
float32x2x2_t tmp;
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
|
||||
vQ1zx = tmp.val[0];
|
||||
|
||||
tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
|
||||
vQ2zx = tmp.val[0];
|
||||
}
|
||||
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
|
||||
|
||||
vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
|
||||
|
||||
vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
|
||||
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
|
||||
|
||||
A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
|
||||
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
|
||||
|
||||
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
|
||||
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
|
||||
|
||||
A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
|
||||
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
|
||||
|
||||
A1 = vmulq_f32(A1, B1);
|
||||
A2 = vmulq_f32(A2, B2);
|
||||
A3 = vmulq_f32(A3, B3); // A3 *= B3
|
||||
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
|
||||
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
|
||||
#else
|
||||
return b3Quaternion(
|
||||
+w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
|
||||
+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
|
||||
+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
|
||||
-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
|
||||
#endif
|
||||
}
|
||||
|
||||
/**@brief Calculate the dot product between two quaternions */
|
||||
SIMD_FORCE_INLINE b3Scalar
|
||||
dot(const b3Quaternion& q1, const b3Quaternion& q2)
|
||||
{
|
||||
return q1.dot(q2);
|
||||
}
|
||||
|
||||
|
||||
/**@brief Return the length of a quaternion */
|
||||
SIMD_FORCE_INLINE b3Scalar
|
||||
length(const b3Quaternion& q)
|
||||
{
|
||||
return q.length();
|
||||
}
|
||||
|
||||
/**@brief Return the angle between two quaternions*/
|
||||
SIMD_FORCE_INLINE b3Scalar
|
||||
btAngle(const b3Quaternion& q1, const b3Quaternion& q2)
|
||||
{
|
||||
return q1.angle(q2);
|
||||
}
|
||||
|
||||
/**@brief Return the inverse of a quaternion*/
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
inverse(const b3Quaternion& q)
|
||||
{
|
||||
return q.inverse();
|
||||
}
|
||||
|
||||
/**@brief Return the result of spherical linear interpolation betwen two quaternions
|
||||
* @param q1 The first quaternion
|
||||
* @param q2 The second quaternion
|
||||
* @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
|
||||
* Slerp assumes constant velocity between positions. */
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
|
||||
{
|
||||
return q1.slerp(q2, t);
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE b3Vector3
|
||||
quatRotate(const b3Quaternion& rotation, const b3Vector3& v)
|
||||
{
|
||||
b3Quaternion q = rotation * v;
|
||||
q *= rotation.inverse();
|
||||
#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
|
||||
return b3Vector3(_mm_and_ps(q.get128(), btvFFF0fMask));
|
||||
#elif defined(BT_USE_NEON)
|
||||
return b3Vector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
|
||||
#else
|
||||
return b3Vector3(q.getX(),q.getY(),q.getZ());
|
||||
#endif
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
shortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
|
||||
{
|
||||
b3Vector3 c = v0.cross(v1);
|
||||
b3Scalar d = v0.dot(v1);
|
||||
|
||||
if (d < -1.0 + SIMD_EPSILON)
|
||||
{
|
||||
b3Vector3 n,unused;
|
||||
btPlaneSpace1(v0,n,unused);
|
||||
return b3Quaternion(n.getX(),n.getY(),n.getZ(),0.0f); // just pick any vector that is orthogonal to v0
|
||||
}
|
||||
|
||||
b3Scalar s = btSqrt((1.0f + d) * 2.0f);
|
||||
b3Scalar rs = 1.0f / s;
|
||||
|
||||
return b3Quaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
|
||||
}
|
||||
|
||||
SIMD_FORCE_INLINE b3Quaternion
|
||||
shortestArcQuatNormalize2(b3Vector3& v0,b3Vector3& v1)
|
||||
{
|
||||
v0.normalize();
|
||||
v1.normalize();
|
||||
return shortestArcQuat(v0,v1);
|
||||
}
|
||||
|
||||
#endif //BT_SIMD__QUATERNION_H_
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user