Improved Dbvt speed slightly.
This commit is contained in:
@@ -679,8 +679,7 @@ Benchmarking dbvt...
|
|||||||
[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
|
[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
|
||||||
[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
|
[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
|
||||||
[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
|
[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
|
||||||
[17] btDbvtVolume proximity: 2887 ms (1%)
|
[17] btDbvtVolume select: 3419 ms (0%)
|
||||||
[18] btDbvtVolume select: 3419 ms (0%)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct btDbvtBenchmark
|
struct btDbvtBenchmark
|
||||||
@@ -782,7 +781,7 @@ static const btScalar cfgVolumeCenterScale = 100;
|
|||||||
static const btScalar cfgVolumeExentsBase = 1;
|
static const btScalar cfgVolumeExentsBase = 1;
|
||||||
static const btScalar cfgVolumeExentsScale = 4;
|
static const btScalar cfgVolumeExentsScale = 4;
|
||||||
static const int cfgLeaves = 8192;
|
static const int cfgLeaves = 8192;
|
||||||
static const bool cfgEnable = false;
|
static const bool cfgEnable = true;
|
||||||
|
|
||||||
//[1] btDbvtVolume intersections
|
//[1] btDbvtVolume intersections
|
||||||
bool cfgBenchmark1_Enable = cfgEnable;
|
bool cfgBenchmark1_Enable = cfgEnable;
|
||||||
@@ -857,14 +856,10 @@ bool cfgBenchmark16_Enable = cfgEnable;
|
|||||||
static const int cfgBenchmark16_BatchCount = 256;
|
static const int cfgBenchmark16_BatchCount = 256;
|
||||||
static const int cfgBenchmark16_Passes = 16384;
|
static const int cfgBenchmark16_Passes = 16384;
|
||||||
static const int cfgBenchmark16_Reference = 5138;
|
static const int cfgBenchmark16_Reference = 5138;
|
||||||
//[17] proximity
|
//[17] select
|
||||||
bool cfgBenchmark17_Enable = cfgEnable;
|
bool cfgBenchmark17_Enable = cfgEnable;
|
||||||
static const int cfgBenchmark17_Iterations = 8;
|
static const int cfgBenchmark17_Iterations = 4;
|
||||||
static const int cfgBenchmark17_Reference = 2842;
|
static const int cfgBenchmark17_Reference = 3390;
|
||||||
//[18] select
|
|
||||||
bool cfgBenchmark18_Enable = true;
|
|
||||||
static const int cfgBenchmark18_Iterations = 4;
|
|
||||||
static const int cfgBenchmark18_Reference = 3390;
|
|
||||||
|
|
||||||
btClock wallclock;
|
btClock wallclock;
|
||||||
printf("Benchmarking dbvt...\r\n");
|
printf("Benchmarking dbvt...\r\n");
|
||||||
@@ -1259,32 +1254,6 @@ if(cfgBenchmark17_Enable)
|
|||||||
{// Benchmark 17
|
{// Benchmark 17
|
||||||
srand(380843);
|
srand(380843);
|
||||||
btAlignedObjectArray<btDbvtVolume> volumes;
|
btAlignedObjectArray<btDbvtVolume> volumes;
|
||||||
btAlignedObjectArray<btScalar> results;
|
|
||||||
volumes.resize(cfgLeaves);
|
|
||||||
results.resize(cfgLeaves);
|
|
||||||
for(int i=0;i<cfgLeaves;++i)
|
|
||||||
{
|
|
||||||
volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
|
|
||||||
}
|
|
||||||
printf("[17] btDbvtVolume proximity: ");
|
|
||||||
wallclock.reset();
|
|
||||||
for(int i=0;i<cfgBenchmark17_Iterations;++i)
|
|
||||||
{
|
|
||||||
for(int j=0;j<cfgLeaves;++j)
|
|
||||||
{
|
|
||||||
for(int k=0;k<cfgLeaves;++k)
|
|
||||||
{
|
|
||||||
results[k]=Proximity(volumes[j],volumes[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const int time=(int)wallclock.getTimeMilliseconds();
|
|
||||||
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
|
|
||||||
}
|
|
||||||
if(cfgBenchmark18_Enable)
|
|
||||||
{// Benchmark 18
|
|
||||||
srand(380843);
|
|
||||||
btAlignedObjectArray<btDbvtVolume> volumes;
|
|
||||||
btAlignedObjectArray<int> results;
|
btAlignedObjectArray<int> results;
|
||||||
btAlignedObjectArray<int> indices;
|
btAlignedObjectArray<int> indices;
|
||||||
volumes.resize(cfgLeaves);
|
volumes.resize(cfgLeaves);
|
||||||
@@ -1299,9 +1268,9 @@ if(cfgBenchmark18_Enable)
|
|||||||
{
|
{
|
||||||
btSwap(indices[i],indices[rand()%cfgLeaves]);
|
btSwap(indices[i],indices[rand()%cfgLeaves]);
|
||||||
}
|
}
|
||||||
printf("[18] btDbvtVolume select: ");
|
printf("[17] btDbvtVolume select: ");
|
||||||
wallclock.reset();
|
wallclock.reset();
|
||||||
for(int i=0;i<cfgBenchmark18_Iterations;++i)
|
for(int i=0;i<cfgBenchmark17_Iterations;++i)
|
||||||
{
|
{
|
||||||
for(int j=0;j<cfgLeaves;++j)
|
for(int j=0;j<cfgLeaves;++j)
|
||||||
{
|
{
|
||||||
@@ -1313,7 +1282,7 @@ if(cfgBenchmark18_Enable)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
const int time=(int)wallclock.getTimeMilliseconds();
|
const int time=(int)wallclock.getTimeMilliseconds();
|
||||||
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark18_Reference)*100/time);
|
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
|
||||||
}
|
}
|
||||||
printf("\r\n\r\n");
|
printf("\r\n\r\n");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,13 +64,17 @@ subject to the following restrictions:
|
|||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE
|
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE
|
||||||
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE
|
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE
|
||||||
|
#define DBVT_INT0_IMPL DBVT_IMPL_SSE
|
||||||
#else
|
#else
|
||||||
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
|
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
|
||||||
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
|
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
|
||||||
|
#define DBVT_INT0_IMPL DBVT_IMPL_GENERIC
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)||(DBVT_MERGE_IMPL==DBVT_IMPL_SSE)
|
#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \
|
||||||
#include <xmmintrin.h>
|
(DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \
|
||||||
|
(DBVT_INT0_IMPL==DBVT_IMPL_SSE)
|
||||||
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -118,6 +122,10 @@ subject to the following restrictions:
|
|||||||
#error "DBVT_MERGE_IMPL undefined"
|
#error "DBVT_MERGE_IMPL undefined"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef DBVT_INT0_IMPL
|
||||||
|
#error "DBVT_INT0_IMPL undefined"
|
||||||
|
#endif
|
||||||
|
|
||||||
//
|
//
|
||||||
// Defaults volumes
|
// Defaults volumes
|
||||||
//
|
//
|
||||||
@@ -135,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
|
|||||||
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
|
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
|
||||||
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
|
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
|
||||||
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
|
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
|
||||||
DBVT_INLINE void Expand(const btVector3 e);
|
DBVT_INLINE void Expand(const btVector3& e);
|
||||||
DBVT_INLINE void SignedExpand(const btVector3 e);
|
DBVT_INLINE void SignedExpand(const btVector3& e);
|
||||||
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
|
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
|
||||||
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
|
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
|
||||||
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
|
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
|
||||||
@@ -175,12 +183,12 @@ struct btDbvtNode
|
|||||||
{
|
{
|
||||||
btDbvtVolume volume;
|
btDbvtVolume volume;
|
||||||
btDbvtNode* parent;
|
btDbvtNode* parent;
|
||||||
bool isleaf() const { return(childs[1]==0); }
|
DBVT_INLINE bool isleaf() const { return(childs[1]==0); }
|
||||||
bool isinternal() const { return(!isleaf()); }
|
DBVT_INLINE bool isinternal() const { return(!isleaf()); }
|
||||||
union {
|
union {
|
||||||
btDbvtNode* childs[2];
|
btDbvtNode* childs[2];
|
||||||
void* data;
|
void* data;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
|
///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
|
||||||
@@ -188,8 +196,6 @@ struct btDbvtNode
|
|||||||
///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
|
///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
|
||||||
struct btDbvt
|
struct btDbvt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
/* Stack element */
|
/* Stack element */
|
||||||
struct sStkNN
|
struct sStkNN
|
||||||
{
|
{
|
||||||
@@ -252,8 +258,8 @@ struct btDbvt
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Fields
|
// Fields
|
||||||
btDbvtNode* m_root;
|
btDbvtNode* m_root;
|
||||||
btDbvtNode* m_free;
|
btDbvtNode* m_free;
|
||||||
int m_lkhd;
|
int m_lkhd;
|
||||||
int m_leaves;
|
int m_leaves;
|
||||||
unsigned m_opath;
|
unsigned m_opath;
|
||||||
@@ -410,17 +416,17 @@ return(box);
|
|||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e)
|
DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e)
|
||||||
{
|
{
|
||||||
mi-=e;mx+=e;
|
mi-=e;mx+=e;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e)
|
DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e)
|
||||||
{
|
{
|
||||||
if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x());
|
if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]);
|
||||||
if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y());
|
if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]);
|
||||||
if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z());
|
if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -488,12 +494,18 @@ for(int i=0;i<3;++i)
|
|||||||
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
||||||
const btDbvtAabbMm& b)
|
const btDbvtAabbMm& b)
|
||||||
{
|
{
|
||||||
|
#if DBVT_INT0_IMPL == DBVT_IMPL_SSE
|
||||||
|
const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
|
||||||
|
_mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
|
||||||
|
return((rt.m128_u32[0]|rt.m128_u32[1]|rt.m128_u32[2])==0);
|
||||||
|
#else
|
||||||
return( (a.mi.x()<=b.mx.x())&&
|
return( (a.mi.x()<=b.mx.x())&&
|
||||||
(a.mx.x()>=b.mi.x())&&
|
(a.mx.x()>=b.mi.x())&&
|
||||||
(a.mi.y()<=b.mx.y())&&
|
(a.mi.y()<=b.mx.y())&&
|
||||||
(a.mx.y()>=b.mi.y())&&
|
(a.mx.y()>=b.mi.y())&&
|
||||||
(a.mi.z()<=b.mx.z())&&
|
(a.mi.z()<=b.mx.z())&&
|
||||||
(a.mx.z()>=b.mi.z()));
|
(a.mx.z()>=b.mi.z()));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -1092,5 +1104,6 @@ if(root)
|
|||||||
#undef DBVT_USE_INTRINSIC_SSE
|
#undef DBVT_USE_INTRINSIC_SSE
|
||||||
#undef DBVT_SELECT_IMPL
|
#undef DBVT_SELECT_IMPL
|
||||||
#undef DBVT_MERGE_IMPL
|
#undef DBVT_MERGE_IMPL
|
||||||
|
#undef DBVT_INT0_IMPL
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user