resolve some naming conflicts between Bullet 3.x and Bullet 2.x

add BasicGpuDemo that bridges Bullet 2.x and Bullet 3.x (still preliminary/crashing/instable)
This commit is contained in:
erwincoumans
2013-04-30 18:17:44 -07:00
parent bf7a779562
commit e999903c15
27 changed files with 1581 additions and 51 deletions

View File

@@ -21,14 +21,14 @@ subject to the following restrictions:
#include <stdio.h>
#ifdef B3_USE_SSE
//const __m128 B3_ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
#endif
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
#endif
#ifdef B3_USE_DOUBLE_PRECISION
@@ -219,7 +219,7 @@ public:
V1 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,2,3))); // Y X Z W
V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0,0,1,3)); // -X -X Y W
V3 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(2,1,0,3))); // Z Y X W
V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element
V11 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,1,0,3))); // Y Y X W
V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
@@ -231,9 +231,9 @@ public:
V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2,3,1,3)); // -Z -W Y W
V11 = V11 * V21; //
V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element
V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3,3,1,3)); // W W -Y -W
V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element
Y = b3CastiTo128f(_mm_shuffle_epi32 (NQi, B3_SHUFFLE(3,2,0,3))); // -W -Z -X -W
Z = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,1,3))); // Y X Y W
@@ -251,9 +251,9 @@ public:
V2 = V2 * vs;
V3 = V3 * vs;
V1 = V1 + v1000;
V2 = V2 + v0100;
V3 = V3 + v0010;
V1 = V1 + b3v1000;
V2 = V2 + b3v0100;
V3 = V3 + b3v0010;
m_el[0] = V1;
m_el[1] = V2;
@@ -312,9 +312,9 @@ public:
void setIdentity()
{
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
m_el[0] = v1000;
m_el[1] = v0100;
m_el[2] = v0010;
m_el[0] = b3v1000;
m_el[1] = b3v0100;
m_el[2] = b3v0010;
#else
setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
@@ -326,7 +326,7 @@ public:
{
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
static const b3Matrix3x3
identityMatrix(v1000, v0100, v0010);
identityMatrix(b3v1000, b3v0100, b3v0010);
#else
static const b3Matrix3x3
identityMatrix(

View File

@@ -52,6 +52,7 @@ public:
#else //__CELLOS_LV2__ __SPU__
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
public:
union {
b3SimdFloat4 mVec128;
b3Scalar m_floats[4];

View File

@@ -27,14 +27,14 @@ subject to the following restrictions:
#ifdef B3_USE_SSE
const __m128 B3_ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
#endif
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
#endif
@@ -227,7 +227,7 @@ public:
A1 = A1 + A2; // AB12
mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
mVec128 = mVec128+ A1; // AB03 + AB12
#elif defined(B3_USE_NEON)
@@ -270,7 +270,7 @@ public:
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
mVec128 = A0;
@@ -338,7 +338,7 @@ public:
vd = _mm_add_ss(vd, t);
vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(vOnes, vd);
vd = _mm_div_ss(b3vOnes, vd);
vd = b3_pshufd_ps(vd, 0); // splat
mVec128 = _mm_mul_ps(mVec128, vd);
@@ -416,9 +416,9 @@ public:
b3Quaternion inverse() const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, vQInv));
return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
#elif defined(B3_USE_NEON)
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
#else
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
#endif
@@ -567,7 +567,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
A1 = A1 + A2; // AB12
A0 = A0 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A0 = A0 + A1; // AB03 + AB12
return b3Quaternion(A0);
@@ -612,7 +612,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
return b3Quaternion(A0);
@@ -650,7 +650,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
A3 = A3 * B3; // A3 *= B3
A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);
@@ -694,7 +694,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
@@ -733,7 +733,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
A3 = A3 * B3; // A3 *= B3
A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);
@@ -777,7 +777,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3

View File

@@ -272,9 +272,9 @@ static int b3NanMask = 0x7F800001;
#define B3_NAN (*(float*)&b3NanMask)
#endif
#ifndef B3_INFINITY
#ifndef B3_INFINITY_MASK
static int b3InfinityMask = 0x7F800000;
#define B3_INFINITY (*(float*)&b3InfinityMask)
#define B3_INFINITY_MASK (*(float*)&b3InfinityMask)
#endif
inline __m128 operator + (const __m128 A, const __m128 B)

View File

@@ -56,9 +56,9 @@ subject to the following restrictions:
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
#endif
@@ -216,7 +216,7 @@ public:
#if 0 //defined(B3_USE_SSE_IN_API)
// this code is not faster !
__m128 vs = _mm_load_ss(&s);
vs = _mm_div_ss(v1110, vs);
vs = _mm_div_ss(b3v1110, vs);
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
mVec128 = _mm_mul_ps(mVec128, vs);
@@ -297,7 +297,7 @@ public:
#if 0
vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(v1110, vd);
vd = _mm_div_ss(b3v1110, vd);
vd = b3_splat_ps(vd, 0x80);
mVec128 = _mm_mul_ps(mVec128, vd);
#else
@@ -306,8 +306,8 @@ public:
y = _mm_rsqrt_ss(vd); // estimate
// one step NR
z = v1_5;
vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
z = b3v1_5;
vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
//x2 = vd;
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
@@ -826,7 +826,7 @@ operator/(const b3Vector3& v, const b3Scalar& s)
#if 0 //defined(B3_USE_SSE_IN_API)
// this code is not faster !
__m128 vs = _mm_load_ss(&s);
vs = _mm_div_ss(v1110, vs);
vs = _mm_div_ss(b3v1110, vs);
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
return b3Vector3(_mm_mul_ps(v.mVec128, vs));

View File

@@ -346,7 +346,7 @@ void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallb
}
int maxIterations = 0;
static int b3s_maxIterations = 0;
void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
@@ -387,8 +387,8 @@ void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const
curIndex += escapeIndex;
}
}
if (maxIterations < walkIterations)
maxIterations = walkIterations;
if (b3s_maxIterations < walkIterations)
b3s_maxIterations = walkIterations;
}
@@ -530,8 +530,8 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall
curIndex += escapeIndex;
}
}
if (maxIterations < walkIterations)
maxIterations = walkIterations;
if (b3s_maxIterations < walkIterations)
b3s_maxIterations = walkIterations;
}
@@ -658,8 +658,8 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*
curIndex += escapeIndex;
}
}
if (maxIterations < walkIterations)
maxIterations = walkIterations;
if (b3s_maxIterations < walkIterations)
b3s_maxIterations = walkIterations;
}
@@ -723,8 +723,8 @@ void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallb
curIndex += escapeIndex;
}
}
if (maxIterations < walkIterations)
maxIterations = walkIterations;
if (b3s_maxIterations < walkIterations)
b3s_maxIterations = walkIterations;
}

View File

@@ -899,3 +899,20 @@ void b3GpuNarrowPhase::writeAllBodiesToGpu()
}
void b3GpuNarrowPhase::readbackAllBodiesToCpu()
{
m_data->m_bodyBufferGPU->copyToHostPointer(&m_data->m_bodyBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies);
}
void b3GpuNarrowPhase::getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const
{
position[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.x;
position[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.y;
position[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.z;
position[3] = 1.f;//or 1
orientation[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.x;
orientation[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.y;
orientation[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.z;
orientation[3] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.w;
}

View File

@@ -37,7 +37,7 @@ bool dumpContactStats = false;
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , class b3DynamicBvhBroadphase* broadphaseDbvt)
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt)
{
m_data = new b3GpuRigidBodyPipelineInternalData;
m_data->m_context = ctx;

View File

@@ -13,7 +13,7 @@ protected:
public:
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, class b3DynamicBvhBroadphase* broadphaseDbvt);
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt);
virtual ~b3GpuRigidBodyPipeline();
void stepSimulation(float deltaTime);

View File

@@ -30,7 +30,7 @@ struct b3GpuRigidBodyPipelineInternalData
class b3GpuSapBroadphase* m_broadphaseSap;
class b3DynamicBvhBroadphase* m_broadphaseDbvt;
struct b3DynamicBvhBroadphase* m_broadphaseDbvt;
b3OpenCLArray<b3SapAabb>* m_allAabbsGPU;
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
b3OpenCLArray<b3BroadphasePair>* m_overlappingPairsGPU;