some rayTest improvements in btDbvt::rayTestInternal, it avoids/reduces memory allocations during stack allocation (by sharing a persistent m_stack)

and rayTestInternal re-uses precomputed invRayDirection/signs.
also did some performance comparison with different ray-AABB test, from 
http://jgt.akpeters.com/papers/EisemannEtAl07/

In short: it is faster, but it is not clear how to cull ray segments using ray slopes: when rays starts inside the AABB, we get a negative t value, but negatives also get false t-values...
This commit is contained in:
erwin.coumans
2008-10-16 20:00:47 +00:00
parent 60ce828419
commit 675c45f42d
4 changed files with 203 additions and 27 deletions

View File

@@ -21,10 +21,23 @@ subject to the following restrictions:
struct btDispatcherInfo; struct btDispatcherInfo;
class btDispatcher; class btDispatcher;
#include "btBroadphaseProxy.h" #include "btBroadphaseProxy.h"
#include "LinearMath/btAabbUtil2.h"//for fast ray-slope algorithm
class btOverlappingPairCache; class btOverlappingPairCache;
struct btBroadphaseRayCallback struct btBroadphaseRayCallback
{ {
///added some cached data to accelerate ray-AABB tests
///m_ray is used to accerate ray-AABB tests, see btDbvt.h, when TEST_RAY_SLOPES is enabled in LinearMath/btAabbUtil2.h
btRaySlope m_ray;
///otherwise this data is used to accelerate ray-AABB tests
btVector3 m_rayDirectionInverse;
unsigned int m_signs[3];
btScalar m_lambda_max;
virtual ~btBroadphaseRayCallback() {} virtual ~btBroadphaseRayCallback() {}
virtual bool process(const btBroadphaseProxy* proxy) = 0; virtual bool process(const btBroadphaseProxy* proxy) = 0;
}; };

View File

@@ -263,6 +263,9 @@ struct btDbvt
int m_lkhd; int m_lkhd;
int m_leaves; int m_leaves;
unsigned m_opath; unsigned m_opath;
mutable btAlignedObjectArray<const btDbvtNode*> m_stack;
// Methods // Methods
btDbvt(); btDbvt();
~btDbvt(); ~btDbvt();
@@ -314,11 +317,25 @@ struct btDbvt
static void collideTV( const btDbvtNode* root, static void collideTV( const btDbvtNode* root,
const btDbvtVolume& volume, const btDbvtVolume& volume,
DBVT_IPOLICY); DBVT_IPOLICY);
///rayTest is a re-entrant ray test, and can be called in parallel as long as the btAlignedAlloc is thread-safe (uses locking etc)
///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time
DBVT_PREFIX DBVT_PREFIX
static void rayTest( const btDbvtNode* root, static void rayTest( const btDbvtNode* root,
const btVector3& rayFrom, const btVector3& rayFrom,
const btVector3& rayTo, const btVector3& rayTo,
DBVT_IPOLICY); DBVT_IPOLICY);
///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections
///rayTestInternal is used by btDbvtBroadphase to accelerate world ray casts
DBVT_PREFIX
void rayTestInternal( const btDbvtNode* root,
const btVector3& rayFrom,
const btVector3& rayTo,
const btVector3& rayDirectionInverse,
unsigned int signs[3],
btScalar lambda_max,
const btRaySlope& raySlope,
DBVT_IPOLICY) const;
DBVT_PREFIX DBVT_PREFIX
static void collideKDOP(const btDbvtNode* root, static void collideKDOP(const btDbvtNode* root,
const btVector3* normals, const btVector3* normals,
@@ -848,6 +865,110 @@ inline void btDbvt::collideTV( const btDbvtNode* root,
} }
} }
DBVT_PREFIX
inline void btDbvt::rayTestInternal( const btDbvtNode* root,
const btVector3& rayFrom,
const btVector3& rayTo,
const btVector3& rayDirectionInverse,
unsigned int signs[3],
btScalar lambda_max,
const btRaySlope& raySlope,
DBVT_IPOLICY) const
{
DBVT_CHECKTYPE
if(root)
{
btVector3 resultNormal;
int depth=1;
int treshold=DOUBLE_STACKSIZE-2;
m_stack.resize(DOUBLE_STACKSIZE);
m_stack[0]=root;
btVector3 bounds[2];
do
{
const btDbvtNode* node=m_stack[--depth];
//m_stack.pop_back();
bounds[0] = node->volume.Mins();
bounds[1] = node->volume.Maxs();
btScalar tmin=1.f,lambda_min=0.f;
unsigned int result1=false;
/// A comparison test for ray-AABB test:
/// "Fast Ray/Axis-Aligned Bounding Box Overlap Tests using Ray Slopes"
/// http://jgt.akpeters.com/papers/EisemannEtAl07/
/// The algorithm seems indeed a bit faster, but the code complexity doesn't make it attractive for future optimizations.
///
/// Enable/disable #define TEST_RAY_SLOPES in btBroadphaseInterface.h
///
#ifndef TEST_RAY_SLOPES
result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
#else
btScalar t=1.f;
btAaboxSlope aabbSlope;
aabbSlope.x0 = node->volume.Mins().getX();
aabbSlope.y0 = node->volume.Mins().getY();
aabbSlope.z0 = node->volume.Mins().getZ();
aabbSlope.x1 = node->volume.Maxs().getX();
aabbSlope.y1 = node->volume.Maxs().getY();
aabbSlope.z1 = node->volume.Maxs().getZ();
//ray starts or ends within AABB
// if (!result1)
// if (TestPointAgainstAabb2(node->volume.Mins(),node->volume.Maxs(),rayTo))
// result1=true;
// if (!result1)
result1 = btRaySlopeAabb(&raySlope,&aabbSlope,&t);
if (result1)
{
//if fromRay is inside the AABB, t can still be negative, so we need an additional check.
if (t>1.0)
continue;
if (t<0.)
{
if (!TestPointAgainstAabb2(node->volume.Mins(),node->volume.Maxs(),rayFrom))
continue;
}
}
#endif //USE_ORIGINAL_RAY_AABB
#ifdef COMPARE_BTRAY_AABB2//slower version using in/outcodes
btScalar param=1.f;
bool result2 = btRayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal);
btAssert(result1 == result2);
#endif //TEST_BTRAY_AABB2
if(result1)
{
if(node->isinternal())
{
if(depth>treshold)
{
m_stack.resize(m_stack.size()*2);
treshold=m_stack.size()-2;
}
m_stack[depth++]=node->childs[0];
m_stack[depth++]=node->childs[1];
}
else
{
policy.Process(node);
}
}
} while(depth);
}
}
// //
DBVT_PREFIX DBVT_PREFIX
@@ -869,41 +990,53 @@ inline void btDbvt::rayTest( const btDbvtNode* root,
rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2]; rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
unsigned int signs[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; unsigned int signs[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
btScalar lambda_max = rayDir.dot(rayTo-rayFrom);
btVector3 resultNormal; btVector3 resultNormal;
btAlignedObjectArray<const btDbvtNode*> stack; btAlignedObjectArray<const btDbvtNode*> stack;
stack.reserve(SIMPLE_STACKSIZE);
stack.push_back(root); int depth=1;
int treshold=DOUBLE_STACKSIZE-2;
stack.resize(DOUBLE_STACKSIZE);
stack[0]=root;
btVector3 bounds[2];
do { do {
const btDbvtNode* node=stack[stack.size()-1]; const btDbvtNode* node=stack[--depth];
stack.pop_back(); //m_stack.pop_back();
bounds[0] = node->volume.Mins();
bounds[1] = node->volume.Maxs();
btVector3 bounds[2] = {node->volume.Mins(),node->volume.Maxs()};
btScalar lambda_max = rayDir.dot(rayTo-rayFrom);
btScalar tmin=1.f,lambda_min=0.f; btScalar tmin=1.f,lambda_min=0.f;
bool result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max); unsigned int result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
#ifdef COMPARE_BTRAY_AABB2 #ifdef COMPARE_BTRAY_AABB2
btScalar param=1.f; btScalar param=1.f;
bool result2 = btRayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal); bool result2 = btRayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal);
btAssert(result1 == result2); btAssert(result1 == result2);
#endif //TEST_BTRAY_AABB2 #endif //TEST_BTRAY_AABB2
if(result1) if(result1)
{ {
if(node->isinternal()) if(node->isinternal())
{ {
stack.push_back(node->childs[0]); if(depth>treshold)
stack.push_back(node->childs[1]); {
stack.resize(stack.size()*2);
treshold=stack.size()-2;
}
stack[depth++]=node->childs[0];
stack[depth++]=node->childs[1];
} }
else else
{ {
policy.Process(node); policy.Process(node);
} }
} }
} while(stack.size()); } while(depth);
} }
} }

View File

@@ -229,14 +229,22 @@ void btDbvtBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo,
BroadphaseRayTester callback(rayCallback); BroadphaseRayTester callback(rayCallback);
m_sets[0].rayTest( m_sets[0].m_root, m_sets[0].rayTestInternal( m_sets[0].m_root,
rayFrom, rayFrom,
rayTo, rayTo,
rayCallback.m_rayDirectionInverse,
rayCallback.m_signs,
rayCallback.m_lambda_max,
rayCallback.m_ray,
callback); callback);
m_sets[1].rayTest( m_sets[1].m_root, m_sets[1].rayTestInternal( m_sets[1].m_root,
rayFrom, rayFrom,
rayTo, rayTo,
rayCallback.m_rayDirectionInverse,
rayCallback.m_signs,
rayCallback.m_lambda_max,
rayCallback.m_ray,
callback); callback);
} }

View File

@@ -414,6 +414,7 @@ void btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
{ {
if (collisionShape->isConvex()) if (collisionShape->isConvex())
{ {
btConvexCast::CastResult castResult; btConvexCast::CastResult castResult;
castResult.m_allowedPenetration = allowedPenetration; castResult.m_allowedPenetration = allowedPenetration;
castResult.m_fraction = btScalar(1.);//?? castResult.m_fraction = btScalar(1.);//??
@@ -608,6 +609,10 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
btVector3 m_rayFromWorld; btVector3 m_rayFromWorld;
btVector3 m_rayToWorld; btVector3 m_rayToWorld;
btTransform m_rayFromTrans;
btTransform m_rayToTrans;
btVector3 m_hitNormal;
const btCollisionWorld* m_world; const btCollisionWorld* m_world;
btCollisionWorld::RayResultCallback& m_resultCallback; btCollisionWorld::RayResultCallback& m_resultCallback;
@@ -617,9 +622,32 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
m_world(world), m_world(world),
m_resultCallback(resultCallback) m_resultCallback(resultCallback)
{ {
m_rayFromTrans.setIdentity();
m_rayFromTrans.setOrigin(m_rayFromWorld);
m_rayToTrans.setIdentity();
m_rayToTrans.setOrigin(m_rayToWorld);
btVector3 rayDir = (rayToWorld-rayFromWorld);
#ifdef TEST_RAY_SLOPES
btMakeRaySlope(rayFromWorld.getX(),rayFromWorld.getY(),rayFromWorld.getZ(),rayDir.getX(),rayDir.getY(),rayDir.getZ(),&m_ray);
#else
rayDir.normalize ();
///what about division by zero? --> just set rayDirection[i] to INF/1e30
m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
m_signs[0] = m_rayDirectionInverse[0] < 0.0;
m_signs[1] = m_rayDirectionInverse[1] < 0.0;
m_signs[2] = m_rayDirectionInverse[2] < 0.0;
#endif
m_lambda_max = rayDir.dot(m_rayToWorld-m_rayFromWorld);
} }
virtual bool process(const btBroadphaseProxy* proxy) virtual bool process(const btBroadphaseProxy* proxy)
{ {
///terminate further ray tests, once the closestHitFraction reached zero ///terminate further ray tests, once the closestHitFraction reached zero
@@ -639,20 +667,14 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax); collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
#else #else
//getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax); //getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax);
btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin; const btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin;
btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax; const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
#endif #endif
btScalar hitLambda = m_resultCallback.m_closestHitFraction; //btScalar hitLambda = m_resultCallback.m_closestHitFraction;
btVector3 hitNormal; //culling already done by broadphase
if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,hitNormal)) //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
{ {
btTransform rayFromTrans,rayToTrans; m_world->rayTestSingle(m_rayFromTrans,m_rayToTrans,
rayFromTrans.setIdentity();
rayFromTrans.setOrigin(m_rayFromWorld);
rayToTrans.setIdentity();
rayToTrans.setOrigin(m_rayToWorld);
m_world->rayTestSingle(rayFromTrans,rayToTrans,
collisionObject, collisionObject,
collisionObject->getCollisionShape(), collisionObject->getCollisionShape(),
collisionObject->getWorldTransform(), collisionObject->getWorldTransform(),