resolve some naming conflicts between Bullet 3.x and Bullet 2.x
add BasicGpuDemo that bridges Bullet 2.x and Bullet 3.x (still preliminary/crashing/instable)
This commit is contained in:
@@ -21,14 +21,14 @@ subject to the following restrictions:
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef B3_USE_SSE
|
||||
//const __m128 B3_ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
|
||||
//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
|
||||
#endif
|
||||
|
||||
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
|
||||
#endif
|
||||
|
||||
#ifdef B3_USE_DOUBLE_PRECISION
|
||||
@@ -219,7 +219,7 @@ public:
|
||||
V1 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,2,3))); // Y X Z W
|
||||
V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0,0,1,3)); // -X -X Y W
|
||||
V3 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(2,1,0,3))); // Z Y X W
|
||||
V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
|
||||
V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element
|
||||
|
||||
V11 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,1,0,3))); // Y Y X W
|
||||
V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
|
||||
@@ -231,9 +231,9 @@ public:
|
||||
|
||||
V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2,3,1,3)); // -Z -W Y W
|
||||
V11 = V11 * V21; //
|
||||
V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
|
||||
V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element
|
||||
V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3,3,1,3)); // W W -Y -W
|
||||
V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
|
||||
V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element
|
||||
Y = b3CastiTo128f(_mm_shuffle_epi32 (NQi, B3_SHUFFLE(3,2,0,3))); // -W -Z -X -W
|
||||
Z = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,1,3))); // Y X Y W
|
||||
|
||||
@@ -251,9 +251,9 @@ public:
|
||||
V2 = V2 * vs;
|
||||
V3 = V3 * vs;
|
||||
|
||||
V1 = V1 + v1000;
|
||||
V2 = V2 + v0100;
|
||||
V3 = V3 + v0010;
|
||||
V1 = V1 + b3v1000;
|
||||
V2 = V2 + b3v0100;
|
||||
V3 = V3 + b3v0010;
|
||||
|
||||
m_el[0] = V1;
|
||||
m_el[1] = V2;
|
||||
@@ -312,9 +312,9 @@ public:
|
||||
void setIdentity()
|
||||
{
|
||||
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
|
||||
m_el[0] = v1000;
|
||||
m_el[1] = v0100;
|
||||
m_el[2] = v0010;
|
||||
m_el[0] = b3v1000;
|
||||
m_el[1] = b3v0100;
|
||||
m_el[2] = b3v0010;
|
||||
#else
|
||||
setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
|
||||
b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
|
||||
@@ -326,7 +326,7 @@ public:
|
||||
{
|
||||
#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
|
||||
static const b3Matrix3x3
|
||||
identityMatrix(v1000, v0100, v0010);
|
||||
identityMatrix(b3v1000, b3v0100, b3v0010);
|
||||
#else
|
||||
static const b3Matrix3x3
|
||||
identityMatrix(
|
||||
|
||||
@@ -52,6 +52,7 @@ public:
|
||||
#else //__CELLOS_LV2__ __SPU__
|
||||
|
||||
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
|
||||
public:
|
||||
union {
|
||||
b3SimdFloat4 mVec128;
|
||||
b3Scalar m_floats[4];
|
||||
|
||||
@@ -27,14 +27,14 @@ subject to the following restrictions:
|
||||
|
||||
#ifdef B3_USE_SSE
|
||||
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
|
||||
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
|
||||
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -227,7 +227,7 @@ public:
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
|
||||
mVec128 = mVec128+ A1; // AB03 + AB12
|
||||
|
||||
#elif defined(B3_USE_NEON)
|
||||
@@ -270,7 +270,7 @@ public:
|
||||
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
|
||||
A0 = vaddq_f32(A0, A1); // AB03 + AB12
|
||||
|
||||
mVec128 = A0;
|
||||
@@ -338,7 +338,7 @@ public:
|
||||
vd = _mm_add_ss(vd, t);
|
||||
|
||||
vd = _mm_sqrt_ss(vd);
|
||||
vd = _mm_div_ss(vOnes, vd);
|
||||
vd = _mm_div_ss(b3vOnes, vd);
|
||||
vd = b3_pshufd_ps(vd, 0); // splat
|
||||
mVec128 = _mm_mul_ps(mVec128, vd);
|
||||
|
||||
@@ -416,9 +416,9 @@ public:
|
||||
b3Quaternion inverse() const
|
||||
{
|
||||
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
|
||||
return b3Quaternion(_mm_xor_ps(mVec128, vQInv));
|
||||
return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
|
||||
#elif defined(B3_USE_NEON)
|
||||
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
|
||||
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
|
||||
#else
|
||||
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
|
||||
#endif
|
||||
@@ -567,7 +567,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
|
||||
A1 = A1 + A2; // AB12
|
||||
A0 = A0 - B1; // AB03 = AB0 - AB3
|
||||
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
|
||||
A0 = A0 + A1; // AB03 + AB12
|
||||
|
||||
return b3Quaternion(A0);
|
||||
@@ -612,7 +612,7 @@ operator*(const b3Quaternion& q1, const b3Quaternion& q2)
|
||||
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
|
||||
A0 = vaddq_f32(A0, A1); // AB03 + AB12
|
||||
|
||||
return b3Quaternion(A0);
|
||||
@@ -650,7 +650,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
|
||||
A3 = A3 * B3; // A3 *= B3
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
|
||||
A1 = A1 - A3; // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
@@ -694,7 +694,7 @@ operator*(const b3Quaternion& q, const b3Vector3& w)
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
|
||||
|
||||
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
|
||||
|
||||
@@ -733,7 +733,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
|
||||
A3 = A3 * B3; // A3 *= B3
|
||||
|
||||
A1 = A1 + A2; // AB12
|
||||
A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
|
||||
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
|
||||
A1 = A1 - A3; // AB123 = AB12 - AB3
|
||||
|
||||
return b3Quaternion(A1);
|
||||
@@ -777,7 +777,7 @@ operator*(const b3Vector3& w, const b3Quaternion& q)
|
||||
A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
|
||||
|
||||
// change the sign of the last element
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
|
||||
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
|
||||
|
||||
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
|
||||
|
||||
|
||||
@@ -272,9 +272,9 @@ static int b3NanMask = 0x7F800001;
|
||||
#define B3_NAN (*(float*)&b3NanMask)
|
||||
#endif
|
||||
|
||||
#ifndef B3_INFINITY
|
||||
#ifndef B3_INFINITY_MASK
|
||||
static int b3InfinityMask = 0x7F800000;
|
||||
#define B3_INFINITY (*(float*)&b3InfinityMask)
|
||||
#define B3_INFINITY_MASK (*(float*)&b3InfinityMask)
|
||||
#endif
|
||||
|
||||
inline __m128 operator + (const __m128 A, const __m128 B)
|
||||
|
||||
@@ -56,9 +56,9 @@ subject to the following restrictions:
|
||||
|
||||
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -216,7 +216,7 @@ public:
|
||||
#if 0 //defined(B3_USE_SSE_IN_API)
|
||||
// this code is not faster !
|
||||
__m128 vs = _mm_load_ss(&s);
|
||||
vs = _mm_div_ss(v1110, vs);
|
||||
vs = _mm_div_ss(b3v1110, vs);
|
||||
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
|
||||
|
||||
mVec128 = _mm_mul_ps(mVec128, vs);
|
||||
@@ -297,7 +297,7 @@ public:
|
||||
|
||||
#if 0
|
||||
vd = _mm_sqrt_ss(vd);
|
||||
vd = _mm_div_ss(v1110, vd);
|
||||
vd = _mm_div_ss(b3v1110, vd);
|
||||
vd = b3_splat_ps(vd, 0x80);
|
||||
mVec128 = _mm_mul_ps(mVec128, vd);
|
||||
#else
|
||||
@@ -306,8 +306,8 @@ public:
|
||||
y = _mm_rsqrt_ss(vd); // estimate
|
||||
|
||||
// one step NR
|
||||
z = v1_5;
|
||||
vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
|
||||
z = b3v1_5;
|
||||
vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
|
||||
//x2 = vd;
|
||||
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
|
||||
vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
|
||||
@@ -826,7 +826,7 @@ operator/(const b3Vector3& v, const b3Scalar& s)
|
||||
#if 0 //defined(B3_USE_SSE_IN_API)
|
||||
// this code is not faster !
|
||||
__m128 vs = _mm_load_ss(&s);
|
||||
vs = _mm_div_ss(v1110, vs);
|
||||
vs = _mm_div_ss(b3v1110, vs);
|
||||
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
|
||||
|
||||
return b3Vector3(_mm_mul_ps(v.mVec128, vs));
|
||||
|
||||
@@ -346,7 +346,7 @@ void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallb
|
||||
}
|
||||
|
||||
|
||||
int maxIterations = 0;
|
||||
static int b3s_maxIterations = 0;
|
||||
|
||||
|
||||
void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const
|
||||
@@ -387,8 +387,8 @@ void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const
|
||||
curIndex += escapeIndex;
|
||||
}
|
||||
}
|
||||
if (maxIterations < walkIterations)
|
||||
maxIterations = walkIterations;
|
||||
if (b3s_maxIterations < walkIterations)
|
||||
b3s_maxIterations = walkIterations;
|
||||
|
||||
}
|
||||
|
||||
@@ -530,8 +530,8 @@ void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall
|
||||
curIndex += escapeIndex;
|
||||
}
|
||||
}
|
||||
if (maxIterations < walkIterations)
|
||||
maxIterations = walkIterations;
|
||||
if (b3s_maxIterations < walkIterations)
|
||||
b3s_maxIterations = walkIterations;
|
||||
|
||||
}
|
||||
|
||||
@@ -658,8 +658,8 @@ void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*
|
||||
curIndex += escapeIndex;
|
||||
}
|
||||
}
|
||||
if (maxIterations < walkIterations)
|
||||
maxIterations = walkIterations;
|
||||
if (b3s_maxIterations < walkIterations)
|
||||
b3s_maxIterations = walkIterations;
|
||||
|
||||
}
|
||||
|
||||
@@ -723,8 +723,8 @@ void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallb
|
||||
curIndex += escapeIndex;
|
||||
}
|
||||
}
|
||||
if (maxIterations < walkIterations)
|
||||
maxIterations = walkIterations;
|
||||
if (b3s_maxIterations < walkIterations)
|
||||
b3s_maxIterations = walkIterations;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -899,3 +899,20 @@ void b3GpuNarrowPhase::writeAllBodiesToGpu()
|
||||
|
||||
|
||||
}
|
||||
|
||||
void b3GpuNarrowPhase::readbackAllBodiesToCpu()
|
||||
{
|
||||
m_data->m_bodyBufferGPU->copyToHostPointer(&m_data->m_bodyBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies);
|
||||
}
|
||||
void b3GpuNarrowPhase::getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const
|
||||
{
|
||||
position[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.x;
|
||||
position[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.y;
|
||||
position[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.z;
|
||||
position[3] = 1.f;//or 1
|
||||
|
||||
orientation[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.x;
|
||||
orientation[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.y;
|
||||
orientation[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.z;
|
||||
orientation[3] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.w;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ bool dumpContactStats = false;
|
||||
|
||||
|
||||
|
||||
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , class b3DynamicBvhBroadphase* broadphaseDbvt)
|
||||
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt)
|
||||
{
|
||||
m_data = new b3GpuRigidBodyPipelineInternalData;
|
||||
m_data->m_context = ctx;
|
||||
|
||||
@@ -13,7 +13,7 @@ protected:
|
||||
public:
|
||||
|
||||
|
||||
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, class b3DynamicBvhBroadphase* broadphaseDbvt);
|
||||
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt);
|
||||
virtual ~b3GpuRigidBodyPipeline();
|
||||
|
||||
void stepSimulation(float deltaTime);
|
||||
|
||||
@@ -30,7 +30,7 @@ struct b3GpuRigidBodyPipelineInternalData
|
||||
|
||||
class b3GpuSapBroadphase* m_broadphaseSap;
|
||||
|
||||
class b3DynamicBvhBroadphase* m_broadphaseDbvt;
|
||||
struct b3DynamicBvhBroadphase* m_broadphaseDbvt;
|
||||
b3OpenCLArray<b3SapAabb>* m_allAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
|
||||
b3OpenCLArray<b3BroadphasePair>* m_overlappingPairsGPU;
|
||||
|
||||
Reference in New Issue
Block a user