Improved Dbvt speed slightly.

This commit is contained in:
id0x1234
2008-09-11 11:23:55 +00:00
parent 8b3270f22f
commit 8c6788ad6a
2 changed files with 40 additions and 58 deletions

View File

@@ -679,8 +679,7 @@ Benchmarking dbvt...
[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s) [14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s) [15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s) [16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
[17] btDbvtVolume proximity: 2887 ms (1%) [17] btDbvtVolume select: 3419 ms (0%)
[18] btDbvtVolume select: 3419 ms (0%)
*/ */
struct btDbvtBenchmark struct btDbvtBenchmark
@@ -782,7 +781,7 @@ static const btScalar cfgVolumeCenterScale = 100;
static const btScalar cfgVolumeExentsBase = 1; static const btScalar cfgVolumeExentsBase = 1;
static const btScalar cfgVolumeExentsScale = 4; static const btScalar cfgVolumeExentsScale = 4;
static const int cfgLeaves = 8192; static const int cfgLeaves = 8192;
static const bool cfgEnable = false; static const bool cfgEnable = true;
//[1] btDbvtVolume intersections //[1] btDbvtVolume intersections
bool cfgBenchmark1_Enable = cfgEnable; bool cfgBenchmark1_Enable = cfgEnable;
@@ -857,14 +856,10 @@ bool cfgBenchmark16_Enable = cfgEnable;
static const int cfgBenchmark16_BatchCount = 256; static const int cfgBenchmark16_BatchCount = 256;
static const int cfgBenchmark16_Passes = 16384; static const int cfgBenchmark16_Passes = 16384;
static const int cfgBenchmark16_Reference = 5138; static const int cfgBenchmark16_Reference = 5138;
//[17] proximity //[17] select
bool cfgBenchmark17_Enable = cfgEnable; bool cfgBenchmark17_Enable = cfgEnable;
static const int cfgBenchmark17_Iterations = 8; static const int cfgBenchmark17_Iterations = 4;
static const int cfgBenchmark17_Reference = 2842; static const int cfgBenchmark17_Reference = 3390;
//[18] select
bool cfgBenchmark18_Enable = true;
static const int cfgBenchmark18_Iterations = 4;
static const int cfgBenchmark18_Reference = 3390;
btClock wallclock; btClock wallclock;
printf("Benchmarking dbvt...\r\n"); printf("Benchmarking dbvt...\r\n");
@@ -1259,32 +1254,6 @@ if(cfgBenchmark17_Enable)
{// Benchmark 17 {// Benchmark 17
srand(380843); srand(380843);
btAlignedObjectArray<btDbvtVolume> volumes; btAlignedObjectArray<btDbvtVolume> volumes;
btAlignedObjectArray<btScalar> results;
volumes.resize(cfgLeaves);
results.resize(cfgLeaves);
for(int i=0;i<cfgLeaves;++i)
{
volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
}
printf("[17] btDbvtVolume proximity: ");
wallclock.reset();
for(int i=0;i<cfgBenchmark17_Iterations;++i)
{
for(int j=0;j<cfgLeaves;++j)
{
for(int k=0;k<cfgLeaves;++k)
{
results[k]=Proximity(volumes[j],volumes[k]);
}
}
}
const int time=(int)wallclock.getTimeMilliseconds();
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
}
if(cfgBenchmark18_Enable)
{// Benchmark 18
srand(380843);
btAlignedObjectArray<btDbvtVolume> volumes;
btAlignedObjectArray<int> results; btAlignedObjectArray<int> results;
btAlignedObjectArray<int> indices; btAlignedObjectArray<int> indices;
volumes.resize(cfgLeaves); volumes.resize(cfgLeaves);
@@ -1299,9 +1268,9 @@ if(cfgBenchmark18_Enable)
{ {
btSwap(indices[i],indices[rand()%cfgLeaves]); btSwap(indices[i],indices[rand()%cfgLeaves]);
} }
printf("[18] btDbvtVolume select: "); printf("[17] btDbvtVolume select: ");
wallclock.reset(); wallclock.reset();
for(int i=0;i<cfgBenchmark18_Iterations;++i) for(int i=0;i<cfgBenchmark17_Iterations;++i)
{ {
for(int j=0;j<cfgLeaves;++j) for(int j=0;j<cfgLeaves;++j)
{ {
@@ -1313,7 +1282,7 @@ if(cfgBenchmark18_Enable)
} }
} }
const int time=(int)wallclock.getTimeMilliseconds(); const int time=(int)wallclock.getTimeMilliseconds();
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark18_Reference)*100/time); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
} }
printf("\r\n\r\n"); printf("\r\n\r\n");
} }

View File

@@ -64,13 +64,17 @@ subject to the following restrictions:
#ifdef WIN32 #ifdef WIN32
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE #define DBVT_SELECT_IMPL DBVT_IMPL_SSE
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE #define DBVT_MERGE_IMPL DBVT_IMPL_SSE
#define DBVT_INT0_IMPL DBVT_IMPL_SSE
#else #else
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC #define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC #define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
#define DBVT_INT0_IMPL DBVT_IMPL_GENERIC
#endif #endif
#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)||(DBVT_MERGE_IMPL==DBVT_IMPL_SSE) #if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \
#include <xmmintrin.h> (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \
(DBVT_INT0_IMPL==DBVT_IMPL_SSE)
#include <emmintrin.h>
#endif #endif
// //
@@ -118,6 +122,10 @@ subject to the following restrictions:
#error "DBVT_MERGE_IMPL undefined" #error "DBVT_MERGE_IMPL undefined"
#endif #endif
#ifndef DBVT_INT0_IMPL
#error "DBVT_INT0_IMPL undefined"
#endif
// //
// Defaults volumes // Defaults volumes
// //
@@ -135,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx); static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n); static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n); static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
DBVT_INLINE void Expand(const btVector3 e); DBVT_INLINE void Expand(const btVector3& e);
DBVT_INLINE void SignedExpand(const btVector3 e); DBVT_INLINE void SignedExpand(const btVector3& e);
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const; DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const; DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const; DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
@@ -175,8 +183,8 @@ struct btDbvtNode
{ {
btDbvtVolume volume; btDbvtVolume volume;
btDbvtNode* parent; btDbvtNode* parent;
bool isleaf() const { return(childs[1]==0); } DBVT_INLINE bool isleaf() const { return(childs[1]==0); }
bool isinternal() const { return(!isleaf()); } DBVT_INLINE bool isinternal() const { return(!isleaf()); }
union { union {
btDbvtNode* childs[2]; btDbvtNode* childs[2];
void* data; void* data;
@@ -188,8 +196,6 @@ struct btDbvtNode
///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure. ///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
struct btDbvt struct btDbvt
{ {
/* Stack element */ /* Stack element */
struct sStkNN struct sStkNN
{ {
@@ -410,17 +416,17 @@ return(box);
} }
// //
DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e) DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e)
{ {
mi-=e;mx+=e; mi-=e;mx+=e;
} }
// //
DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e) DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e)
{ {
if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x()); if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]);
if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y()); if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]);
if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z()); if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]);
} }
// //
@@ -488,12 +494,18 @@ for(int i=0;i<3;++i)
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
const btDbvtAabbMm& b) const btDbvtAabbMm& b)
{ {
#if DBVT_INT0_IMPL == DBVT_IMPL_SSE
const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
_mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
return((rt.m128_u32[0]|rt.m128_u32[1]|rt.m128_u32[2])==0);
#else
return( (a.mi.x()<=b.mx.x())&& return( (a.mi.x()<=b.mx.x())&&
(a.mx.x()>=b.mi.x())&& (a.mx.x()>=b.mi.x())&&
(a.mi.y()<=b.mx.y())&& (a.mi.y()<=b.mx.y())&&
(a.mx.y()>=b.mi.y())&& (a.mx.y()>=b.mi.y())&&
(a.mi.z()<=b.mx.z())&& (a.mi.z()<=b.mx.z())&&
(a.mx.z()>=b.mi.z())); (a.mx.z()>=b.mi.z()));
#endif
} }
// //
@@ -1092,5 +1104,6 @@ if(root)
#undef DBVT_USE_INTRINSIC_SSE #undef DBVT_USE_INTRINSIC_SSE
#undef DBVT_SELECT_IMPL #undef DBVT_SELECT_IMPL
#undef DBVT_MERGE_IMPL #undef DBVT_MERGE_IMPL
#undef DBVT_INT0_IMPL
#endif #endif