Apple contribution for OSX SSE and iOS NEON optimizations unit tests, thanks to Jordan Hubbard, Ian Ollmann and Hristo Hristov.
For OSX: cd build ./premake_osx xcode4 for iOS: cd build ./ios_build.sh ./ios_run.sh Also integrated the branches/StackAllocation to make it easier to multi-thread collision detection in the near future. It avoids changing the btCollisionObject while performing collision detection. As this is a large patch, some stuff might be temporarily broken, I'll keep an eye out on issues.
This commit is contained in:
@@ -69,6 +69,15 @@ inline int btGetVersion()
|
||||
|
||||
#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
|
||||
#define BT_USE_SSE
|
||||
#ifdef BT_USE_SSE
|
||||
//BT_USE_SSE_IN_API is disabled under Windows by default, because
|
||||
//it makes it harder to integrate Bullet into your application under Windows
|
||||
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
|
||||
//with relatively little performance gain
|
||||
//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
|
||||
//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
|
||||
//#define BT_USE_SSE_IN_API
|
||||
#endif //BT_USE_SSE
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@@ -143,11 +152,39 @@ inline int btGetVersion()
|
||||
#else
|
||||
//non-windows systems
|
||||
|
||||
#if (defined (__APPLE__) && defined (__i386__) && (!defined (BT_USE_DOUBLE_PRECISION)))
|
||||
#define BT_USE_SSE
|
||||
#include <emmintrin.h>
|
||||
#if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#define BT_USE_SSE
|
||||
//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
|
||||
//if apps run into issues, we will disable the next line
|
||||
#define BT_USE_SSE_IN_API
|
||||
#ifdef BT_USE_SSE
|
||||
// include appropriate SSE level
|
||||
#if defined (__SSE4_1__)
|
||||
#include <smmintrin.h>
|
||||
#elif defined (__SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined (__SSE3__)
|
||||
#include <pmmintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#endif //BT_USE_SSE
|
||||
#elif defined( __arm__ )
|
||||
#ifdef __clang__
|
||||
#define BT_USE_NEON 1
|
||||
#if defined BT_USE_NEON && defined (__clang__)
|
||||
#if! defined( ARM_NEON_GCC_COMPATIBILITY )
|
||||
// -DARM_NEON_GCC_COMPATIBILITY=1 changes neon vector types to raw vectors, syntactically similar to SSE and AltiVec
|
||||
// instead of vectors wrapped up in structs. This code base assumes GCC style raw vectors are used.
|
||||
#error The C preprocessor macro ARM_NEON_GCC_COMPATIBILITY must be defined. Pass -DARM_NEON_GCC_COMPATIBILITY=1 to the compiler.
|
||||
#endif//!ARM_NEON_GCC_COMPATIBILITY
|
||||
#include <arm_neon.h>
|
||||
#endif//BT_USE_NEON
|
||||
#endif //__clang__
|
||||
#endif//__arm__
|
||||
|
||||
#define SIMD_FORCE_INLINE inline
|
||||
#define SIMD_FORCE_INLINE inline __attribute__ ((always_inline))
|
||||
///@todo: check out alignment methods for other platforms/compilers
|
||||
#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
|
||||
#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
|
||||
@@ -210,6 +247,69 @@ typedef float btScalar;
|
||||
#define BT_LARGE_FLOAT 1e18f
|
||||
#endif
|
||||
|
||||
#ifdef BT_USE_SSE
|
||||
typedef __m128 btSimdFloat4;
|
||||
#endif//BT_USE_SSE
|
||||
|
||||
#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifndef BT_NAN
|
||||
static int btNanMask = 0x7F800001;
|
||||
#define BT_NAN (*(float*)&btNanMask)
|
||||
#endif
|
||||
|
||||
#ifndef BT_INFINITY
|
||||
static int btInfinityMask = 0x7F800000;
|
||||
#define BT_INFINITY (*(float*)&btInfinityMask)
|
||||
#endif
|
||||
|
||||
inline __m128 operator + (const __m128 A, const __m128 B)
|
||||
{
|
||||
return _mm_add_ps(A, B);
|
||||
}
|
||||
|
||||
inline __m128 operator - (const __m128 A, const __m128 B)
|
||||
{
|
||||
return _mm_sub_ps(A, B);
|
||||
}
|
||||
|
||||
inline __m128 operator * (const __m128 A, const __m128 B)
|
||||
{
|
||||
return _mm_mul_ps(A, B);
|
||||
}
|
||||
|
||||
#define btCastfTo128i(a) (_mm_castps_si128(a))
|
||||
#define btCastfTo128d(a) (_mm_castps_pd(a))
|
||||
#define btCastiTo128f(a) (_mm_castsi128_ps(a))
|
||||
#define btCastdTo128f(a) (_mm_castpd_ps(a))
|
||||
#define btCastdTo128i(a) (_mm_castpd_si128(a))
|
||||
#define btAssign128(r0,r1,r2,r3) _mm_setr_ps(r0,r1,r2,r3)
|
||||
|
||||
#else//_WIN32
|
||||
|
||||
#define btCastfTo128i(a) ((__m128i)(a))
|
||||
#define btCastfTo128d(a) ((__m128d)(a))
|
||||
#define btCastiTo128f(a) ((__m128) (a))
|
||||
#define btCastdTo128f(a) ((__m128) (a))
|
||||
#define btCastdTo128i(a) ((__m128i)(a))
|
||||
#define btAssign128(r0,r1,r2,r3) (__m128){r0,r1,r2,r3}
|
||||
#define BT_INFINITY INFINITY
|
||||
#define BT_NAN NAN
|
||||
#endif//_WIN32
|
||||
#endif //BT_USE_SSE_IN_API
|
||||
|
||||
#ifdef BT_USE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef float32x4_t btSimdFloat4;
|
||||
#define BT_INFINITY INFINITY
|
||||
#define BT_NAN NAN
|
||||
#define btAssign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define BT_DECLARE_ALIGNED_ALLOCATOR() \
|
||||
|
||||
Reference in New Issue
Block a user