resolve some naming conflicts between Bullet 3.x and Bullet 2.x

add BasicGpuDemo that bridges Bullet 2.x and Bullet 3.x (still preliminary/crashing/instable)
This commit is contained in:
erwincoumans
2013-04-30 18:17:44 -07:00
parent bf7a779562
commit e999903c15
27 changed files with 1581 additions and 51 deletions

View File

@@ -21,14 +21,14 @@ subject to the following restrictions:
#include <stdio.h>
#ifdef B3_USE_SSE
//const __m128 B3_ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
#endif
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
#endif
#ifdef B3_USE_DOUBLE_PRECISION
@@ -219,7 +219,7 @@ public:
V1 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,2,3))); // Y X Z W
V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0,0,1,3)); // -X -X Y W
V3 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(2,1,0,3))); // Z Y X W
V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element
V11 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,1,0,3))); // Y Y X W
V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
@@ -231,9 +231,9 @@ public:
V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2,3,1,3)); // -Z -W Y W
V11 = V11 * V21; //
V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element
V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3,3,1,3)); // W W -Y -W
V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element
Y = b3CastiTo128f(_mm_shuffle_epi32 (NQi, B3_SHUFFLE(3,2,0,3))); // -W -Z -X -W
Z = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,1,3))); // Y X Y W
@@ -251,9 +251,9 @@ public:
V2 = V2 * vs;
V3 = V3 * vs;
V1 = V1 + v1000;
V2 = V2 + v0100;
V3 = V3 + v0010;
V1 = V1 + b3v1000;
V2 = V2 + b3v0100;
V3 = V3 + b3v0010;
m_el[0] = V1;
m_el[1] = V2;
@@ -312,9 +312,9 @@ public:
void setIdentity()
{
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
m_el[0] = v1000;
m_el[1] = v0100;
m_el[2] = v0010;
m_el[0] = b3v1000;
m_el[1] = b3v0100;
m_el[2] = b3v0010;
#else
setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
@@ -326,7 +326,7 @@ public:
{
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
static const b3Matrix3x3
identityMatrix(v1000, v0100, v0010);
identityMatrix(b3v1000, b3v0100, b3v0010);
#else
static const b3Matrix3x3
identityMatrix(

View File

@@ -52,6 +52,7 @@ public:
#else //__CELLOS_LV2__ __SPU__
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
public:
union {
b3SimdFloat4 mVec128;
b3Scalar m_floats[4];

View File

@@ -27,14 +27,14 @@ subject to the following restrictions:
#ifdef B3_USE_SSE
const __m128 B3_ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
#endif
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
#endif
@@ -227,7 +227,7 @@ public:
A1 = A1 + A2; // AB12
mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
mVec128 = mVec128+ A1; // AB03 + AB12
#elif defined(B3_USE_NEON)
@@ -270,7 +270,7 @@ public:
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
mVec128 = A0;
@@ -338,7 +338,7 @@ public:
vd = _mm_add_ss(vd, t);
vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(vOnes, vd);
vd = _mm_div_ss(b3vOnes, vd);
vd = b3_pshufd_ps(vd, 0); // splat
mVec128 = _mm_mul_ps(mVec128, vd);
@@ -416,9 +416,9 @@ public:
b3Quaternion inverse() const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, vQInv));
return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
#elif defined(B3_USE_NEON)
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
#else
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
#endif
@@ -567,7 +567,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
A1 = A1 + A2; // AB12
A0 = A0 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A0 = A0 + A1; // AB03 + AB12
return b3Quaternion(A0);
@@ -612,7 +612,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
return b3Quaternion(A0);
@@ -650,7 +650,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
A3 = A3 * B3; // A3 *= B3
A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);
@@ -694,7 +694,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
@@ -733,7 +733,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
A3 = A3 * B3; // A3 *= B3
A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);
@@ -777,7 +777,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3

View File

@@ -272,9 +272,9 @@ static int b3NanMask = 0x7F800001;
#define B3_NAN (*(float*)&b3NanMask)
#endif
#ifndef B3_INFINITY
#ifndef B3_INFINITY_MASK
static int b3InfinityMask = 0x7F800000;
#define B3_INFINITY (*(float*)&b3InfinityMask)
#define B3_INFINITY_MASK (*(float*)&b3InfinityMask)
#endif
inline __m128 operator + (const __m128 A, const __m128 B)

View File

@@ -56,9 +56,9 @@ subject to the following restrictions:
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
#endif
@@ -216,7 +216,7 @@ public:
#if 0 //defined(B3_USE_SSE_IN_API)
// this code is not faster !
__m128 vs = _mm_load_ss(&s);
vs = _mm_div_ss(v1110, vs);
vs = _mm_div_ss(b3v1110, vs);
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
mVec128 = _mm_mul_ps(mVec128, vs);
@@ -297,7 +297,7 @@ public:
#if 0
vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(v1110, vd);
vd = _mm_div_ss(b3v1110, vd);
vd = b3_splat_ps(vd, 0x80);
mVec128 = _mm_mul_ps(mVec128, vd);
#else
@@ -306,8 +306,8 @@ public:
y = _mm_rsqrt_ss(vd); // estimate
// one step NR
z = v1_5;
vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
z = b3v1_5;
vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
//x2 = vd;
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
@@ -826,7 +826,7 @@ operator/(const b3Vector3& v, const b3Scalar& s)
#if 0 //defined(B3_USE_SSE_IN_API)
// this code is not faster !
__m128 vs = _mm_load_ss(&s);
vs = _mm_div_ss(v1110, vs);
vs = _mm_div_ss(b3v1110, vs);
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
return b3Vector3(_mm_mul_ps(v.mVec128, vs));