Add PhysicsEffects to Extras. The build is only tested on Windows and Android.
The Android/NEON optimized version of Physics Effects is thanks to Graham Rhodes and Anthony Hamilton, See Issue 587
This commit is contained in:
238
Extras/PhysicsEffects/include/vecmath/neon/boolInVec.h
Normal file
238
Extras/PhysicsEffects/include/vecmath/neon/boolInVec.h
Normal file
@@ -0,0 +1,238 @@
|
||||
/*
|
||||
Copyright (C) 2006-2010 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _BOOLINVEC_SCALAR_H
|
||||
#define _BOOLINVEC_SCALAR_H
|
||||
|
||||
#include <math.h>
|
||||
namespace Vectormath {
|
||||
|
||||
class floatInVec;
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// boolInVec class
|
||||
//
|
||||
|
||||
class boolInVec
|
||||
{
|
||||
private:
|
||||
unsigned int mData;
|
||||
|
||||
public:
|
||||
// Default constructor; does no initialization
|
||||
//
|
||||
inline boolInVec( ) { };
|
||||
|
||||
// Construct from a value converted from float
|
||||
//
|
||||
inline boolInVec(floatInVec vec);
|
||||
|
||||
// Explicit cast from bool
|
||||
//
|
||||
explicit inline boolInVec(bool scalar);
|
||||
|
||||
// Explicit cast to bool
|
||||
//
|
||||
inline bool getAsBool() const;
|
||||
|
||||
#ifndef _VECTORMATH_NO_SCALAR_CAST
|
||||
// Implicit cast to bool
|
||||
//
|
||||
inline operator bool() const;
|
||||
#endif
|
||||
|
||||
// Boolean negation operator
|
||||
//
|
||||
inline const boolInVec operator ! () const;
|
||||
|
||||
// Assignment operator
|
||||
//
|
||||
inline boolInVec& operator = (boolInVec vec);
|
||||
|
||||
// Boolean and assignment operator
|
||||
//
|
||||
inline boolInVec& operator &= (boolInVec vec);
|
||||
|
||||
// Boolean exclusive or assignment operator
|
||||
//
|
||||
inline boolInVec& operator ^= (boolInVec vec);
|
||||
|
||||
// Boolean or assignment operator
|
||||
//
|
||||
inline boolInVec& operator |= (boolInVec vec);
|
||||
|
||||
};
|
||||
|
||||
// Equal operator
|
||||
//
|
||||
inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
|
||||
|
||||
// Not equal operator
|
||||
//
|
||||
inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
|
||||
|
||||
// And operator
|
||||
//
|
||||
inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
|
||||
|
||||
// Exclusive or operator
|
||||
//
|
||||
inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
|
||||
|
||||
// Or operator
|
||||
//
|
||||
inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
|
||||
|
||||
// Conditionally select between two values
|
||||
//
|
||||
inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
|
||||
|
||||
|
||||
} // namespace Vectormath
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// boolInVec implementation
|
||||
//
|
||||
|
||||
#include "floatInVec.h"
|
||||
|
||||
namespace Vectormath {
|
||||
|
||||
inline
|
||||
boolInVec::boolInVec(floatInVec vec)
|
||||
{
|
||||
*this = (vec != floatInVec(0.0f));
|
||||
}
|
||||
|
||||
inline
|
||||
boolInVec::boolInVec(bool scalar)
|
||||
{
|
||||
mData = -(int)scalar;
|
||||
}
|
||||
|
||||
inline
|
||||
bool
|
||||
boolInVec::getAsBool() const
|
||||
{
|
||||
return (mData > 0);
|
||||
}
|
||||
|
||||
#ifndef _VECTORMATH_NO_SCALAR_CAST
|
||||
inline
|
||||
boolInVec::operator bool() const
|
||||
{
|
||||
return getAsBool();
|
||||
}
|
||||
#endif
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
boolInVec::operator ! () const
|
||||
{
|
||||
return boolInVec(!mData);
|
||||
}
|
||||
|
||||
inline
|
||||
boolInVec&
|
||||
boolInVec::operator = (boolInVec vec)
|
||||
{
|
||||
mData = vec.mData;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
boolInVec&
|
||||
boolInVec::operator &= (boolInVec vec)
|
||||
{
|
||||
*this = *this & vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
boolInVec&
|
||||
boolInVec::operator ^= (boolInVec vec)
|
||||
{
|
||||
*this = *this ^ vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
boolInVec&
|
||||
boolInVec::operator |= (boolInVec vec)
|
||||
{
|
||||
*this = *this | vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator == (boolInVec vec0, boolInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsBool() == vec1.getAsBool());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator != (boolInVec vec0, boolInVec vec1)
|
||||
{
|
||||
return !(vec0 == vec1);
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator & (boolInVec vec0, boolInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsBool() & vec1.getAsBool());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator | (boolInVec vec0, boolInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsBool() | vec1.getAsBool());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator ^ (boolInVec vec0, boolInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
|
||||
{
|
||||
return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
|
||||
}
|
||||
|
||||
} // namespace Vectormath
|
||||
|
||||
#endif // boolInVec_h
|
||||
357
Extras/PhysicsEffects/include/vecmath/neon/floatInVec.h
Normal file
357
Extras/PhysicsEffects/include/vecmath/neon/floatInVec.h
Normal file
@@ -0,0 +1,357 @@
|
||||
/*
|
||||
Copyright (C) 2006-2010 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _FLOATINVEC__SCALAR_H
|
||||
#define _FLOATINVEC__SCALAR_H
|
||||
|
||||
#include <math.h>
|
||||
namespace Vectormath {
|
||||
|
||||
class boolInVec;
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// floatInVec class
|
||||
//
|
||||
|
||||
// A class representing a scalar float value contained in a vector register
|
||||
// This class does not support fastmath
|
||||
class floatInVec
|
||||
{
|
||||
private:
|
||||
float mData;
|
||||
|
||||
public:
|
||||
// Default constructor; does no initialization
|
||||
//
|
||||
inline floatInVec( ) { };
|
||||
|
||||
// Construct from a value converted from bool
|
||||
//
|
||||
inline floatInVec(boolInVec vec);
|
||||
|
||||
// Explicit cast from float
|
||||
//
|
||||
explicit inline floatInVec(float scalar);
|
||||
|
||||
// Explicit cast to float
|
||||
//
|
||||
inline float getAsFloat() const;
|
||||
|
||||
#ifndef _VECTORMATH_NO_SCALAR_CAST
|
||||
// Implicit cast to float
|
||||
//
|
||||
inline operator float() const;
|
||||
#endif
|
||||
|
||||
// Post increment (add 1.0f)
|
||||
//
|
||||
inline const floatInVec operator ++ (int);
|
||||
|
||||
// Post decrement (subtract 1.0f)
|
||||
//
|
||||
inline const floatInVec operator -- (int);
|
||||
|
||||
// Pre increment (add 1.0f)
|
||||
//
|
||||
inline floatInVec& operator ++ ();
|
||||
|
||||
// Pre decrement (subtract 1.0f)
|
||||
//
|
||||
inline floatInVec& operator -- ();
|
||||
|
||||
// Negation operator
|
||||
//
|
||||
inline const floatInVec operator - () const;
|
||||
|
||||
// Assignment operator
|
||||
//
|
||||
inline floatInVec& operator = (floatInVec vec);
|
||||
|
||||
// Multiplication assignment operator
|
||||
//
|
||||
inline floatInVec& operator *= (floatInVec vec);
|
||||
|
||||
// Division assignment operator
|
||||
//
|
||||
inline floatInVec& operator /= (floatInVec vec);
|
||||
|
||||
// Addition assignment operator
|
||||
//
|
||||
inline floatInVec& operator += (floatInVec vec);
|
||||
|
||||
// Subtraction assignment operator
|
||||
//
|
||||
inline floatInVec& operator -= (floatInVec vec);
|
||||
|
||||
};
|
||||
|
||||
// Multiplication operator
|
||||
//
|
||||
inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Division operator
|
||||
//
|
||||
inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Addition operator
|
||||
//
|
||||
inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Subtraction operator
|
||||
//
|
||||
inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Less than operator
|
||||
//
|
||||
inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Less than or equal operator
|
||||
//
|
||||
inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Greater than operator
|
||||
//
|
||||
inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Greater than or equal operator
|
||||
//
|
||||
inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Equal operator
|
||||
//
|
||||
inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Not equal operator
|
||||
//
|
||||
inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
|
||||
|
||||
// Conditionally select between two values
|
||||
//
|
||||
inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
|
||||
|
||||
|
||||
} // namespace Vectormath
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// floatInVec implementation
|
||||
//
|
||||
|
||||
#include "boolInVec.h"
|
||||
|
||||
namespace Vectormath {
|
||||
|
||||
inline
|
||||
floatInVec::floatInVec(boolInVec vec)
|
||||
{
|
||||
mData = float(vec.getAsBool());
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec::floatInVec(float scalar)
|
||||
{
|
||||
mData = scalar;
|
||||
}
|
||||
|
||||
inline
|
||||
float
|
||||
floatInVec::getAsFloat() const
|
||||
{
|
||||
return mData;
|
||||
}
|
||||
|
||||
#ifndef _VECTORMATH_NO_SCALAR_CAST
|
||||
inline
|
||||
floatInVec::operator float() const
|
||||
{
|
||||
return getAsFloat();
|
||||
}
|
||||
#endif
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
floatInVec::operator ++ (int)
|
||||
{
|
||||
float olddata = mData;
|
||||
operator ++();
|
||||
return floatInVec(olddata);
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
floatInVec::operator -- (int)
|
||||
{
|
||||
float olddata = mData;
|
||||
operator --();
|
||||
return floatInVec(olddata);
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator ++ ()
|
||||
{
|
||||
*this += floatInVec(1.0f);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator -- ()
|
||||
{
|
||||
*this -= floatInVec(1.0f);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
floatInVec::operator - () const
|
||||
{
|
||||
return floatInVec(-mData);
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator = (floatInVec vec)
|
||||
{
|
||||
mData = vec.mData;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator *= (floatInVec vec)
|
||||
{
|
||||
*this = *this * vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator /= (floatInVec vec)
|
||||
{
|
||||
*this = *this / vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator += (floatInVec vec)
|
||||
{
|
||||
*this = *this + vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
floatInVec&
|
||||
floatInVec::operator -= (floatInVec vec)
|
||||
{
|
||||
*this = *this - vec;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
operator * (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
operator / (floatInVec num, floatInVec den)
|
||||
{
|
||||
return floatInVec(num.getAsFloat() / den.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
operator + (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
operator - (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator < (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator <= (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return !(vec0 > vec1);
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator > (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator >= (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return !(vec0 < vec1);
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator == (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
|
||||
}
|
||||
|
||||
inline
|
||||
const boolInVec
|
||||
operator != (floatInVec vec0, floatInVec vec1)
|
||||
{
|
||||
return !(vec0 == vec1);
|
||||
}
|
||||
|
||||
inline
|
||||
const floatInVec
|
||||
select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
|
||||
{
|
||||
return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
|
||||
}
|
||||
|
||||
} // namespace Vectormath
|
||||
|
||||
#endif // floatInVec_h
|
||||
1645
Extras/PhysicsEffects/include/vecmath/neon/mat_aos.h
Normal file
1645
Extras/PhysicsEffects/include/vecmath/neon/mat_aos.h
Normal file
File diff suppressed because it is too large
Load Diff
446
Extras/PhysicsEffects/include/vecmath/neon/quat_aos.h
Normal file
446
Extras/PhysicsEffects/include/vecmath/neon/quat_aos.h
Normal file
@@ -0,0 +1,446 @@
|
||||
/*
|
||||
Copyright (C) 2006-2010 Sony Computer Entertainment Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Sony Computer Entertainment Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _VECTORMATH_QUAT_AOS_CPP_H
|
||||
#define _VECTORMATH_QUAT_AOS_CPP_H
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Definitions
|
||||
|
||||
#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
|
||||
#define _VECTORMATH_INTERNAL_FUNCTIONS
|
||||
|
||||
#endif
|
||||
|
||||
namespace Vectormath {
|
||||
namespace Aos {
|
||||
|
||||
inline Quat::Quat( const Quat & quat )
|
||||
{
|
||||
mX = quat.mX;
|
||||
mY = quat.mY;
|
||||
mZ = quat.mZ;
|
||||
mW = quat.mW;
|
||||
}
|
||||
|
||||
inline Quat::Quat( float _x, float _y, float _z, float _w )
|
||||
{
|
||||
mX = _x;
|
||||
mY = _y;
|
||||
mZ = _z;
|
||||
mW = _w;
|
||||
}
|
||||
|
||||
inline Quat::Quat( const Vector3 & xyz, float _w )
|
||||
{
|
||||
this->setXYZ( xyz );
|
||||
this->setW( _w );
|
||||
}
|
||||
|
||||
inline Quat::Quat( const Vector4 & vec )
|
||||
{
|
||||
mX = vec.getX();
|
||||
mY = vec.getY();
|
||||
mZ = vec.getZ();
|
||||
mW = vec.getW();
|
||||
}
|
||||
|
||||
inline Quat::Quat( float scalar )
|
||||
{
|
||||
mX = scalar;
|
||||
mY = scalar;
|
||||
mZ = scalar;
|
||||
mW = scalar;
|
||||
}
|
||||
|
||||
inline const Quat Quat::identity( )
|
||||
{
|
||||
return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
|
||||
}
|
||||
|
||||
inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
|
||||
{
|
||||
return ( quat0 + ( ( quat1 - quat0 ) * t ) );
|
||||
}
|
||||
|
||||
inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
|
||||
{
|
||||
Quat start;
|
||||
float recipSinAngle, scale0, scale1, cosAngle, angle;
|
||||
cosAngle = dot( unitQuat0, unitQuat1 );
|
||||
if ( cosAngle < 0.0f ) {
|
||||
cosAngle = -cosAngle;
|
||||
start = ( -unitQuat0 );
|
||||
} else {
|
||||
start = unitQuat0;
|
||||
}
|
||||
if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
|
||||
angle = acosf( cosAngle );
|
||||
recipSinAngle = ( 1.0f / sinf( angle ) );
|
||||
scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
|
||||
scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
|
||||
} else {
|
||||
scale0 = ( 1.0f - t );
|
||||
scale1 = t;
|
||||
}
|
||||
return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
|
||||
}
|
||||
|
||||
inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
|
||||
{
|
||||
Quat tmp0, tmp1;
|
||||
tmp0 = slerp( t, unitQuat0, unitQuat3 );
|
||||
tmp1 = slerp( t, unitQuat1, unitQuat2 );
|
||||
return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
|
||||
}
|
||||
|
||||
inline void loadXYZW( Quat & quat, const float * fptr )
|
||||
{
|
||||
quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
|
||||
}
|
||||
|
||||
inline void storeXYZW( const Quat & quat, float * fptr )
|
||||
{
|
||||
fptr[0] = quat.getX();
|
||||
fptr[1] = quat.getY();
|
||||
fptr[2] = quat.getZ();
|
||||
fptr[3] = quat.getW();
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator =( const Quat & quat )
|
||||
{
|
||||
mX = quat.mX;
|
||||
mY = quat.mY;
|
||||
mZ = quat.mZ;
|
||||
mW = quat.mW;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Quat & Quat::setXYZ( const Vector3 & vec )
|
||||
{
|
||||
mX = vec.getX();
|
||||
mY = vec.getY();
|
||||
mZ = vec.getZ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline const Vector3 Quat::getXYZ( ) const
|
||||
{
|
||||
return Vector3( mX, mY, mZ );
|
||||
}
|
||||
|
||||
inline Quat & Quat::setX( float _x )
|
||||
{
|
||||
mX = _x;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline float Quat::getX( ) const
|
||||
{
|
||||
return mX;
|
||||
}
|
||||
|
||||
inline Quat & Quat::setY( float _y )
|
||||
{
|
||||
mY = _y;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline float Quat::getY( ) const
|
||||
{
|
||||
return mY;
|
||||
}
|
||||
|
||||
inline Quat & Quat::setZ( float _z )
|
||||
{
|
||||
mZ = _z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline float Quat::getZ( ) const
|
||||
{
|
||||
return mZ;
|
||||
}
|
||||
|
||||
inline Quat & Quat::setW( float _w )
|
||||
{
|
||||
mW = _w;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline float Quat::getW( ) const
|
||||
{
|
||||
return mW;
|
||||
}
|
||||
|
||||
inline Quat & Quat::setElem( int idx, float value )
|
||||
{
|
||||
*(&mX + idx) = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline float Quat::getElem( int idx ) const
|
||||
{
|
||||
return *(&mX + idx);
|
||||
}
|
||||
|
||||
inline float & Quat::operator []( int idx )
|
||||
{
|
||||
return *(&mX + idx);
|
||||
}
|
||||
|
||||
inline float Quat::operator []( int idx ) const
|
||||
{
|
||||
return *(&mX + idx);
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator +( const Quat & quat ) const
|
||||
{
|
||||
return Quat(
|
||||
( mX + quat.mX ),
|
||||
( mY + quat.mY ),
|
||||
( mZ + quat.mZ ),
|
||||
( mW + quat.mW )
|
||||
);
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator -( const Quat & quat ) const
|
||||
{
|
||||
return Quat(
|
||||
( mX - quat.mX ),
|
||||
( mY - quat.mY ),
|
||||
( mZ - quat.mZ ),
|
||||
( mW - quat.mW )
|
||||
);
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator *( float scalar ) const
|
||||
{
|
||||
return Quat(
|
||||
( mX * scalar ),
|
||||
( mY * scalar ),
|
||||
( mZ * scalar ),
|
||||
( mW * scalar )
|
||||
);
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator +=( const Quat & quat )
|
||||
{
|
||||
*this = *this + quat;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator -=( const Quat & quat )
|
||||
{
|
||||
*this = *this - quat;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator *=( float scalar )
|
||||
{
|
||||
*this = *this * scalar;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator /( float scalar ) const
|
||||
{
|
||||
return Quat(
|
||||
( mX / scalar ),
|
||||
( mY / scalar ),
|
||||
( mZ / scalar ),
|
||||
( mW / scalar )
|
||||
);
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator /=( float scalar )
|
||||
{
|
||||
*this = *this / scalar;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator -( ) const
|
||||
{
|
||||
return Quat(
|
||||
-mX,
|
||||
-mY,
|
||||
-mZ,
|
||||
-mW
|
||||
);
|
||||
}
|
||||
|
||||
inline const Quat operator *( float scalar, const Quat & quat )
|
||||
{
|
||||
return quat * scalar;
|
||||
}
|
||||
|
||||
inline float dot( const Quat & quat0, const Quat & quat1 )
|
||||
{
|
||||
float result;
|
||||
result = ( quat0.getX() * quat1.getX() );
|
||||
result = ( result + ( quat0.getY() * quat1.getY() ) );
|
||||
result = ( result + ( quat0.getZ() * quat1.getZ() ) );
|
||||
result = ( result + ( quat0.getW() * quat1.getW() ) );
|
||||
return result;
|
||||
}
|
||||
|
||||
inline float norm( const Quat & quat )
|
||||
{
|
||||
float result;
|
||||
result = ( quat.getX() * quat.getX() );
|
||||
result = ( result + ( quat.getY() * quat.getY() ) );
|
||||
result = ( result + ( quat.getZ() * quat.getZ() ) );
|
||||
result = ( result + ( quat.getW() * quat.getW() ) );
|
||||
return result;
|
||||
}
|
||||
|
||||
inline float length( const Quat & quat )
|
||||
{
|
||||
return ::sqrtf( norm( quat ) );
|
||||
}
|
||||
|
||||
inline const Quat normalize( const Quat & quat )
|
||||
{
|
||||
float lenSqr, lenInv;
|
||||
lenSqr = norm( quat );
|
||||
lenInv = ( 1.0f / sqrtf( lenSqr ) );
|
||||
return Quat(
|
||||
( quat.getX() * lenInv ),
|
||||
( quat.getY() * lenInv ),
|
||||
( quat.getZ() * lenInv ),
|
||||
( quat.getW() * lenInv )
|
||||
);
|
||||
}
|
||||
|
||||
inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
|
||||
{
|
||||
float cosHalfAngleX2, recipCosHalfAngleX2;
|
||||
cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
|
||||
recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
|
||||
return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
|
||||
}
|
||||
|
||||
inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
|
||||
{
|
||||
float s, c, angle;
|
||||
angle = ( radians * 0.5f );
|
||||
s = sinf( angle );
|
||||
c = cosf( angle );
|
||||
return Quat( ( unitVec * s ), c );
|
||||
}
|
||||
|
||||
inline const Quat Quat::rotationX( float radians )
|
||||
{
|
||||
float s, c, angle;
|
||||
angle = ( radians * 0.5f );
|
||||
s = sinf( angle );
|
||||
c = cosf( angle );
|
||||
return Quat( s, 0.0f, 0.0f, c );
|
||||
}
|
||||
|
||||
inline const Quat Quat::rotationY( float radians )
|
||||
{
|
||||
float s, c, angle;
|
||||
angle = ( radians * 0.5f );
|
||||
s = sinf( angle );
|
||||
c = cosf( angle );
|
||||
return Quat( 0.0f, s, 0.0f, c );
|
||||
}
|
||||
|
||||
inline const Quat Quat::rotationZ( float radians )
|
||||
{
|
||||
float s, c, angle;
|
||||
angle = ( radians * 0.5f );
|
||||
s = sinf( angle );
|
||||
c = cosf( angle );
|
||||
return Quat( 0.0f, 0.0f, s, c );
|
||||
}
|
||||
|
||||
inline const Quat Quat::operator *( const Quat & quat ) const
|
||||
{
|
||||
return Quat(
|
||||
( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
|
||||
( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
|
||||
( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
|
||||
( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
|
||||
);
|
||||
}
|
||||
|
||||
inline Quat & Quat::operator *=( const Quat & quat )
|
||||
{
|
||||
*this = *this * quat;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
|
||||
{
|
||||
float tmpX, tmpY, tmpZ, tmpW;
|
||||
tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
|
||||
tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
|
||||
tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
|
||||
tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
|
||||
return Vector3(
|
||||
( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
|
||||
( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
|
||||
( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
|
||||
);
|
||||
}
|
||||
|
||||
inline const Quat conj( const Quat & quat )
|
||||
{
|
||||
return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
|
||||
}
|
||||
|
||||
inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
|
||||
{
|
||||
return Quat(
|
||||
( select1 )? quat1.getX() : quat0.getX(),
|
||||
( select1 )? quat1.getY() : quat0.getY(),
|
||||
( select1 )? quat1.getZ() : quat0.getZ(),
|
||||
( select1 )? quat1.getW() : quat0.getW()
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef _VECTORMATH_DEBUG
|
||||
|
||||
inline void print( const Quat & quat )
|
||||
{
|
||||
printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
|
||||
}
|
||||
|
||||
inline void print( const Quat & quat, const char * name )
|
||||
{
|
||||
printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace Aos
|
||||
} // namespace Vectormath
|
||||
|
||||
#endif
|
||||
1526
Extras/PhysicsEffects/include/vecmath/neon/vec_aos.h
Normal file
1526
Extras/PhysicsEffects/include/vecmath/neon/vec_aos.h
Normal file
File diff suppressed because it is too large
Load Diff
1893
Extras/PhysicsEffects/include/vecmath/neon/vectormath_aos.h
Normal file
1893
Extras/PhysicsEffects/include/vecmath/neon/vectormath_aos.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,310 @@
|
||||
@
|
||||
@ Applied Research Associates Inc. (c)2011
|
||||
@
|
||||
@ Redistribution and use in source and binary forms,
|
||||
@ with or without modification, are permitted provided that the
|
||||
@ following conditions are met:
|
||||
@ * Redistributions of source code must retain the above copyright
|
||||
@ notice, this list of conditions and the following disclaimer.
|
||||
@ * Redistributions in binary form must reproduce the above copyright
|
||||
@ notice, this list of conditions and the following disclaimer in the
|
||||
@ documentation and/or other materials provided with the distribution.
|
||||
@ * Neither the name of the Applied Research Associates Inc nor the names
|
||||
@ of its contributors may be used to endorse or promote products derived
|
||||
@ from this software without specific prior written permission.
|
||||
@
|
||||
@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
@ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
@ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
@ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
@ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
@ POSSIBILITY OF SUCH DAMAGE.
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
@
|
||||
@ This file contains ARM/NEON assembly versions of some vectormath
|
||||
@ atomic functions. We have implemented here instead of inline assembly
|
||||
@ because we have found gcc 4.4.3 to be too inconsistent and inadequate
|
||||
@ to properly support either NEON intrinsics or inline assembly. (See
|
||||
@ the inline assembly version of the vector3 dot product, which is
|
||||
@ contained in vec_aos.h but compiled out, for an example of a simple
|
||||
@ inline assembly function that wreaks havok if used.)
|
||||
@
|
||||
@ Note that the certain NEON registers must be preserved across
|
||||
@ function calls according to the following document:
|
||||
@
|
||||
@ "Procedure Call Standard for the ARM? Architecture," ARM document
|
||||
@ number ARM IHI 0042D, current through ABI release 2.08,
|
||||
@ 16th October, 2009, section 5.1.2.1
|
||||
@
|
||||
@ The registers are: q4-q7 (and their double-word and single word
|
||||
@ counterparts)
|
||||
@
|
||||
@ These functions preserve all non-scratch general purpose registers
|
||||
@ as well as the ones listed, and so we do not need to have extra
|
||||
@ code to do the register preservation
|
||||
@
|
||||
.syntax unified
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
.thumb
|
||||
.text
|
||||
.align 2
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxVector3DotProductNEON
|
||||
@
|
||||
@ Vector3 dot product, result stored directly to memory
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store two 32 bit floats, though only the
|
||||
@ first value is of interest
|
||||
@----------------------------------------------------------------
|
||||
.global pfxVector3DotProductNEON
|
||||
.thumb_func
|
||||
pfxVector3DotProductNEON:
|
||||
.fnstart
|
||||
vld1.32 {d0,d1}, [r1] @ input <x2,y2,z2,?> = d0,d1\n\t"
|
||||
vld1.32 {d2,d3}, [r0] @ input <x1,y1,z1,?> = d2,d3\n\t"
|
||||
vmul.f32 d4, d0, d2 @ d4 = <x1*x2,y1*y2>\n\t"
|
||||
vpadd.f32 d4, d4, d4 @ d4 = <x1*x2 + y1*y2, x1*x2 + y1*y2>\n\t"
|
||||
vmla.f32 s8, s2, s6 @ s8 = <x1*x2 + y1*y2 + z1*z2\n\t"
|
||||
vst1.32 {d4}, [r2] @ save result to memory. supports double/quad word only. We only care about first word\n\t"
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxVector4DotProductNEON
|
||||
@
|
||||
@ Vector4 dot product, result stored directly to memory
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store two 32 bit floats, though only the
|
||||
@ first value is of interest
|
||||
@----------------------------------------------------------------
|
||||
.global pfxVector4DotProductNEON
|
||||
.thumb_func
|
||||
pfxVector4DotProductNEON:
|
||||
.fnstart
|
||||
vld1.32 {d16,d17}, [r0] @ input <x1,y1,z1,w1>
|
||||
vld1.32 {d18,d19}, [r1] @ input <x2,y2,z2,w2>
|
||||
vmul.f32 d14, d16, d18 @ d14=<x1*x2,y1*y2>
|
||||
@ non-fused multiple accumulate
|
||||
vmla.f32 d14, d17, d19 @ d14=d14+<z1*z2,w1*w2>=<x1*x2+z1*z2,y1*y2+w1*w2>
|
||||
@ fused multiple accumulate - listed here for reference but we use vmla above
|
||||
@ instead since the fused version is not recognized by GNU assembler (as part
|
||||
@ of the gcc 4.4.3 Android distribution)
|
||||
@ vfma.f32 {d14}, d17, d19 @ d14=d14+<z1*z2,w1*w2>=<x1*x2+z1*z2,y1*y2+w1*w2>
|
||||
vpadd.f32 d14, d14, d14 @ add the two halves of d14 together, same result in each lane
|
||||
vst1.32 {d14}, [r2]
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxVector3CrossProductNEON
|
||||
@
|
||||
@ Vector3 cross product, result stored directly to memory
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store four 32 bit floats, though only the
|
||||
@ first 3 values are of interest
|
||||
@----------------------------------------------------------------
|
||||
.global pfxVector3CrossProductNEON
|
||||
.thumb_func
|
||||
pfxVector3CrossProductNEON:
|
||||
.fnstart
|
||||
vld1.32 {d18,d19}, [r1] @ input <x2,y2,z2,w2> = d18,d19
|
||||
vld1.32 {d16,d17}, [r0] @ input <x1,y1,z1,w1> = d16,d17
|
||||
@ rearrange inputs into convenient order
|
||||
vtrn.32 d18,d19 @ q9 = <x2,z2,y2,w2> = d18,d19
|
||||
vrev64.32 d16,d16 @ q8 = <y1,x1,z1,w1> = d16,d17
|
||||
vrev64.32 d18,d18 @ q9 = <z2,x2,y2,w2> = d18,d19
|
||||
vtrn.32 d16,d17 @ q8 = <y1,z1,x1,w1> = d16,d17
|
||||
@ perform first half of cross product using rearranged inputs
|
||||
vmul.f32 q10, q8, q9 @ q10 = <y1*z2,z1*x2,x1*y2,w1*w2>
|
||||
@ rearrange inputs again
|
||||
vtrn.32 d18,d19 @ q9 = <z2,y2,x2,w2> = d18,d19
|
||||
vrev64.32 d16,d16 @ q8 = <z1,y1,x1,w1> = d16,d17
|
||||
vrev64.32 d18,d18 @ q9 = <y2,z2,x2,w2> = d18,d19
|
||||
vtrn.32 d16,d17 @ q8 = <z1,x1,y1,w1> = d16,d17
|
||||
@ perform last half of cross product using rearranged inputs
|
||||
vmls.f32 q10, q8, q9 @ q10 = <y1*z2-y2*z1,z1*x2-z2*x1,x1*y2-x2*y1,w1*w2-w2*w1>
|
||||
@ and store the result
|
||||
vst1.32 {q10}, [r2]
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxMatrix3Matrix3ProductNEON
|
||||
@
|
||||
@ Matrix3 * Matrix3 product, result stored directly to memory
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store 12 32-bit floats. The reason for
|
||||
@ 12 rather than 9 is that each column vector actually has
|
||||
@ 4 32-bit floats rather than just 3....since NEON works with
|
||||
@ double and quad vectors.
|
||||
@
|
||||
@ Note that, since the inputs are loaded into registers then
|
||||
@ never used again, r2 can point to one of the inputs, e.g.,
|
||||
@ result can be stored back out to one of the input memory
|
||||
@ locations.
|
||||
@----------------------------------------------------------------
|
||||
.global pfxMatrix3Matrix3ProductNEON
|
||||
.thumb_func
|
||||
pfxMatrix3Matrix3ProductNEON:
|
||||
.fnstart
|
||||
vld1.32 {d16-d19}, [r0]! @ load first eight elements of matrix 0
|
||||
vld1.32 {d20-d21}, [r0] @ load second eight elements of matrix 0
|
||||
vld1.32 {d0-d3}, [r1]! @ load first eight elements of matrix 1
|
||||
vld1.32 {d4-d5}, [r1] @ load second eight elements of matrix 1
|
||||
vmul.f32 q12, q8, d0[0] @ rslt col0 = (mat0 col0) * (mat1 col0 elt0)
|
||||
vmul.f32 q13, q8, d2[0] @ rslt col1 = (mat0 col0) * (mat1 col1 elt0)
|
||||
vmul.f32 q14, q8, d4[0] @ rslt col2 = (mat0 col0) * (mat1 col2 elt0)
|
||||
vmla.f32 q12, q9, d0[1] @ rslt col0 += (mat0 col1) * (mat1 col0 elt1)
|
||||
vmla.f32 q13, q9, d2[1] @ rslt col1 += (mat0 col1) * (mat1 col1 elt1)
|
||||
vmla.f32 q14, q9, d4[1] @ rslt col2 += (mat0 col1) * (mat1 col2 elt1)
|
||||
vmla.f32 q12, q10, d1[0] @ rslt col0 += (mat0 col2) * (mat1 col0 elt2)
|
||||
vmla.f32 q13, q10, d3[0] @ rslt col1 += (mat0 col2) * (mat1 col1 elt2)
|
||||
vmla.f32 q14, q10, d5[0] @ rslt col2 += (mat0 col2) * (mat1 col2 elt2)
|
||||
vst1.32 {d24-d27}, [r2]! @ store first eight elements of result (each column has an extra float as stored)
|
||||
vst1.32 {d28-d29}, [r2] @ store last four elements of result (each column has an extra float as stored)
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxMatrix4Matrix4ProductNEON
|
||||
@
|
||||
@ Matrix4 * Matrix4 product, result stored directly to memory
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store 16 32 bit floats.
|
||||
@
|
||||
@ Note that, since the inputs are loaded into registers then
|
||||
@ never used again, r2 can point to one of the inputs, e.g.,
|
||||
@ result can be stored back out to one of the input memory
|
||||
@ locations.
|
||||
@----------------------------------------------------------------
|
||||
.global pfxMatrix4Matrix4ProductNEON
|
||||
.thumb_func
|
||||
pfxMatrix4Matrix4ProductNEON:
|
||||
.fnstart
|
||||
vld1.32 {d16-d19}, [r0]! @ load first eight elements of matrix 0
|
||||
vld1.32 {d20-d23}, [r0] @ load second eight elements of matrix 0
|
||||
vld1.32 {d0-d3}, [r1]! @ load first eight elements of matrix 1
|
||||
vld1.32 {d4-d7}, [r1] @ load second eight elements of matrix 1
|
||||
vmul.f32 q12, q8, d0[0] @ rslt col0 = (mat0 col0) * (mat1 col0 elt0)
|
||||
vmul.f32 q13, q8, d2[0] @ rslt col1 = (mat0 col0) * (mat1 col1 elt0)
|
||||
vmul.f32 q14, q8, d4[0] @ rslt col2 = (mat0 col0) * (mat1 col2 elt0)
|
||||
vmul.f32 q15, q8, d6[0] @ rslt col3 = (mat0 col0) * (mat1 col3 elt0)
|
||||
vmla.f32 q12, q9, d0[1] @ rslt col0 += (mat0 col1) * (mat1 col0 elt1)
|
||||
vmla.f32 q13, q9, d2[1] @ rslt col1 += (mat0 col1) * (mat1 col1 elt1)
|
||||
vmla.f32 q14, q9, d4[1] @ rslt col2 += (mat0 col1) * (mat1 col2 elt1)
|
||||
vmla.f32 q15, q9, d6[1] @ rslt col3 += (mat0 col1) * (mat1 col3 elt1)
|
||||
vmla.f32 q12, q10, d1[0] @ rslt col0 += (mat0 col2) * (mat1 col0 elt2)
|
||||
vmla.f32 q13, q10, d3[0] @ rslt col1 += (mat0 col2) * (mat1 col1 elt2)
|
||||
vmla.f32 q14, q10, d5[0] @ rslt col2 += (mat0 col2) * (mat1 col2 elt2)
|
||||
vmla.f32 q15, q10, d7[0] @ rslt col3 += (mat0 col2) * (mat1 col2 elt2)
|
||||
vmla.f32 q12, q11, d1[1] @ rslt col0 += (mat0 col3) * (mat1 col0 elt3)
|
||||
vmla.f32 q13, q11, d3[1] @ rslt col1 += (mat0 col3) * (mat1 col1 elt3)
|
||||
vmla.f32 q14, q11, d5[1] @ rslt col2 += (mat0 col3) * (mat1 col2 elt3)
|
||||
vmla.f32 q15, q11, d7[1] @ rslt col3 += (mat0 col3) * (mat1 col3 elt3)
|
||||
vst1.32 {d24-d27}, [r2]! @ store first eight elements of result
|
||||
vst1.32 {d28-d31}, [r2] @ store second eight elements of result
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxTransform3OrthoInverseNEON
|
||||
@
|
||||
@ Computes the ortho inverse of a Transform 3.
|
||||
@
|
||||
@ Result stored in memory pointed to by r1. r1 must point to
|
||||
@ memory sufficient to store 16 32 bit floats.
|
||||
@
|
||||
@ Note that, since the inputs are loaded into registers then
|
||||
@ never used again, r1 can point to one of the inputs, e.g.,
|
||||
@ result can be stored back out to one of the input memory
|
||||
@ locations.
|
||||
@
|
||||
@ Note that this expects the inputs to have 4 floats per row,
|
||||
@ (to be consistent with NEON quad vector), and the last float
|
||||
@ in each row should be 0.0 for the math to work out.
|
||||
@----------------------------------------------------------------
|
||||
.global pfxTransform3OrthoInverseNEON
|
||||
.thumb_func
|
||||
pfxTransform3OrthoInverseNEON:
|
||||
.fnstart
|
||||
@ direct load the first column of the ortho inverse result
|
||||
vld1.f32 d0[0], [r0]!
|
||||
vld1.f32 d2[0], [r0]!
|
||||
vld1.f32 d4[0], [r0]!
|
||||
vld1.f32 d1[1], [r0]!
|
||||
|
||||
@ direct load the second column of the ortho inverse result
|
||||
vld1.f32 d0[1], [r0]!
|
||||
vld1.f32 d2[1], [r0]!
|
||||
vld1.f32 d4[1], [r0]!
|
||||
vld1.f32 d3[1], [r0]!
|
||||
|
||||
@ direct load the third column of the ortho inverse result
|
||||
vld1.f32 d1[0], [r0]!
|
||||
vld1.f32 d3[0], [r0]!
|
||||
|
||||
vst1.f32 {d0-d3}, [r1]! @ store first eight elements of result (1st two columns)
|
||||
|
||||
vld1.f32 d5[0], [r0]!
|
||||
vld1.f32 d5[1], [r0]!
|
||||
|
||||
vst1.f32 {d4-d5}, [r1]! @ store next four elements of result (3rd column)
|
||||
|
||||
@ move q0 into q8 so we can reuse q0 to load fourth column
|
||||
@ of input. We do this to avoid using q4-q7 (which have to
|
||||
@ be preserved during the function call)....needed to work
|
||||
@ around some limitation rules that prevent us from accessing
|
||||
@ single s registers associated with q8 and above.
|
||||
vmov.f32 q8, q0
|
||||
|
||||
@ direct load the last column of the input
|
||||
vld1.f32 {q0}, [r0]
|
||||
|
||||
@ prepare the last column of the output
|
||||
vmul.f32 q3, q8, d0[0] @ multiply result column 1 by x coord of input column 3
|
||||
vmla.f32 q3, q1, d0[1] @ multiply result column 2 by y coord of input column 3 and add
|
||||
vmla.f32 q3, q2, d1[0] @ multiply result column 3 by z coord of input column 3 and add
|
||||
vneg.f32 q3, q3 @ negate final column
|
||||
|
||||
vst1.f32 {q3}, [r1] @ store last four elements of result (4th column)
|
||||
|
||||
bx lr
|
||||
.fnend
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ pfxTransform3Vector3MultiplyNEON
|
||||
@
|
||||
@ Computes the product of a Transform3 and a Vector3, e.g., it
|
||||
@ applies the transform to the vector.
|
||||
@
|
||||
@ Result stored in memory pointed to by r2. r2 must point to
|
||||
@ memory sufficient to store 4 32-bit floats.
|
||||
@----------------------------------------------------------------
|
||||
.global pfxTransform3Vector3MultiplyNEON
|
||||
.thumb_func
|
||||
pfxTransform3Vector3MultiplyNEON:
|
||||
.fnstart
|
||||
|
||||
vld1.32 {d16-d19}, [r0]! @ load first eight elements of transform matrix
|
||||
vld1.32 {d20-d21}, [r0] @ load second eight elements of transform matrix
|
||||
vld1.32 {d0-d1}, [r1] @ load the four elements of vector3 (last one is just padding)
|
||||
vmul.f32 q12, q8, d0[0] @ rslt col0 = (mat0 col0) * (mat1 col0 elt0)
|
||||
vmla.f32 q12, q9, d0[1] @ rslt col0 += (mat0 col1) * (mat1 col0 elt1)
|
||||
vmla.f32 q12, q10, d1[0] @ rslt col0 += (mat0 col2) * (mat1 col0 elt2)
|
||||
vst1.32 {d24-d25}, [r2] @ store four elements of result (last one is padding)
|
||||
|
||||
bx lr
|
||||
.fnend
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
Applied Research Associates Inc. (c)2011
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Applied Research Associates Inc nor the names
|
||||
of its contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _VECTORMATH_NEON_ASSEMBLY_PROTOTYPES_H
|
||||
#define _VECTORMATH_NEON_ASSEMBLY_PROTOTYPES_H
|
||||
|
||||
// Prototypes for NEON assembly implementations
|
||||
extern "C"
|
||||
{
|
||||
// NEON assembly implementations of Vector3 functions
|
||||
void pfxVector3DotProductNEON(const Vectormath::Aos::Vector3 &vec0, const Vectormath::Aos::Vector3 &vec1, float *result);
|
||||
void pfxVector3CrossProductNEON(const Vectormath::Aos::Vector3 &vec0, const Vectormath::Aos::Vector3 &vec1,
|
||||
Vectormath::Aos::Vector3 &result);
|
||||
|
||||
// NEON assembly implementations of Matrix3 functions
|
||||
void pfxMatrix3Matrix3ProductNEON(const Vectormath::Aos::Matrix3 &mat0, const Vectormath::Aos::Matrix3 &mat1,
|
||||
Vectormath::Aos::Matrix3 &result);
|
||||
|
||||
// NEON assembly implementations of Transform3 functions
|
||||
void pfxTransform3OrthoInverseNEON(const Vectormath::Aos::Transform3 &trn, Vectormath::Aos::Transform3 &result);
|
||||
void pfxTransform3Vector3MultiplyNEON(const Vectormath::Aos::Transform3 &trn, const Vectormath::Aos::Vector3 &vec,
|
||||
Vectormath::Aos::Vector3 &result);
|
||||
|
||||
// NEON assembly implementations of Vector4 functions
|
||||
void pfxVector4DotProductNEON(const Vectormath::Aos::Vector4 &vec0, const Vectormath::Aos::Vector4 &vec1, float *result);
|
||||
|
||||
// NEON assembly implementations of Matrix4 functions
|
||||
void pfxMatrix4Matrix4ProductNEON(const Vectormath::Aos::Matrix4 &mat0, const Vectormath::Aos::Matrix4 &mat1,
|
||||
Vectormath::Aos::Matrix4 &result);
|
||||
}
|
||||
|
||||
#endif // _VECTORMATH_NEON_ASSEMBLY_PROTOTYPES_H
|
||||
Reference in New Issue
Block a user