Apple contribution for OSX SSE and iOS NEON optimizations unit tests, thanks to Jordan Hubbard, Ian Ollmann and Hristo Hristov.

For OSX:
cd build
./premake_osx xcode4
for iOS:
cd build
./ios_build.sh
./ios_run.sh

Also integrated the branches/StackAllocation to make it easier to multi-thread collision detection in the near future. It avoids changing the btCollisionObject while performing collision detection.

As this is a large patch, some stuff might be temporarily broken, I'll keep an eye out on issues.
This commit is contained in:
erwin.coumans
2012-06-07 00:56:30 +00:00
parent 777b92a2ad
commit 73b217fb07
323 changed files with 30730 additions and 13635 deletions

View File

@@ -69,6 +69,15 @@ inline int btGetVersion()
#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
#define BT_USE_SSE
#ifdef BT_USE_SSE
//BT_USE_SSE_IN_API is disabled under Windows by default, because
//it makes it harder to integrate Bullet into your application under Windows
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
//with relatively little performance gain
//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
//#define BT_USE_SSE_IN_API
#endif //BT_USE_SSE
#include <emmintrin.h>
#endif
@@ -143,11 +152,39 @@ inline int btGetVersion()
#else
//non-windows systems
#if (defined (__APPLE__) && defined (__i386__) && (!defined (BT_USE_DOUBLE_PRECISION)))
#define BT_USE_SSE
#include <emmintrin.h>
#if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
#if defined (__i386__) || defined (__x86_64__)
#define BT_USE_SSE
//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
//if apps run into issues, we will disable the next line
#define BT_USE_SSE_IN_API
#ifdef BT_USE_SSE
// include appropriate SSE level
#if defined (__SSE4_1__)
#include <smmintrin.h>
#elif defined (__SSSE3__)
#include <tmmintrin.h>
#elif defined (__SSE3__)
#include <pmmintrin.h>
#else
#include <emmintrin.h>
#endif
#endif //BT_USE_SSE
#elif defined( __arm__ )
#ifdef __clang__
#define BT_USE_NEON 1
#if defined BT_USE_NEON && defined (__clang__)
#if! defined( ARM_NEON_GCC_COMPATIBILITY )
// -DARM_NEON_GCC_COMPATIBILITY=1 changes neon vector types to raw vectors, syntactically similar to SSE and AltiVec
// instead of vectors wrapped up in structs. This code base assumes GCC style raw vectors are used.
#error The C preprocessor macro ARM_NEON_GCC_COMPATIBILITY must be defined. Pass -DARM_NEON_GCC_COMPATIBILITY=1 to the compiler.
#endif//!ARM_NEON_GCC_COMPATIBILITY
#include <arm_neon.h>
#endif//BT_USE_NEON
#endif //__clang__
#endif//__arm__
#define SIMD_FORCE_INLINE inline
#define SIMD_FORCE_INLINE inline __attribute__ ((always_inline))
///@todo: check out alignment methods for other platforms/compilers
#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
@@ -210,6 +247,69 @@ typedef float btScalar;
#define BT_LARGE_FLOAT 1e18f
#endif
#ifdef BT_USE_SSE
typedef __m128 btSimdFloat4;
#endif//BT_USE_SSE
#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
#ifdef _WIN32
#ifndef BT_NAN
static int btNanMask = 0x7F800001;
#define BT_NAN (*(float*)&btNanMask)
#endif
#ifndef BT_INFINITY
static int btInfinityMask = 0x7F800000;
#define BT_INFINITY (*(float*)&btInfinityMask)
#endif
inline __m128 operator + (const __m128 A, const __m128 B)
{
return _mm_add_ps(A, B);
}
inline __m128 operator - (const __m128 A, const __m128 B)
{
return _mm_sub_ps(A, B);
}
inline __m128 operator * (const __m128 A, const __m128 B)
{
return _mm_mul_ps(A, B);
}
#define btCastfTo128i(a) (_mm_castps_si128(a))
#define btCastfTo128d(a) (_mm_castps_pd(a))
#define btCastiTo128f(a) (_mm_castsi128_ps(a))
#define btCastdTo128f(a) (_mm_castpd_ps(a))
#define btCastdTo128i(a) (_mm_castpd_si128(a))
#define btAssign128(r0,r1,r2,r3) _mm_setr_ps(r0,r1,r2,r3)
#else//_WIN32
#define btCastfTo128i(a) ((__m128i)(a))
#define btCastfTo128d(a) ((__m128d)(a))
#define btCastiTo128f(a) ((__m128) (a))
#define btCastdTo128f(a) ((__m128) (a))
#define btCastdTo128i(a) ((__m128i)(a))
#define btAssign128(r0,r1,r2,r3) (__m128){r0,r1,r2,r3}
#define BT_INFINITY INFINITY
#define BT_NAN NAN
#endif//_WIN32
#endif //BT_USE_SSE_IN_API
#ifdef BT_USE_NEON
#include <arm_neon.h>
typedef float32x4_t btSimdFloat4;
#define BT_INFINITY INFINITY
#define BT_NAN NAN
#define btAssign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
#endif
#define BT_DECLARE_ALIGNED_ALLOCATOR() \