diff --git a/src/BulletCollision/BroadphaseCollision/btDbvt.cpp b/src/BulletCollision/BroadphaseCollision/btDbvt.cpp index 4c5d451a7..66f97dc14 100644 --- a/src/BulletCollision/BroadphaseCollision/btDbvt.cpp +++ b/src/BulletCollision/BroadphaseCollision/btDbvt.cpp @@ -679,8 +679,7 @@ Benchmarking dbvt... [14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s) [15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s) [16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s) -[17] btDbvtVolume proximity: 2887 ms (1%) -[18] btDbvtVolume select: 3419 ms (0%) +[17] btDbvtVolume select: 3419 ms (0%) */ struct btDbvtBenchmark @@ -782,7 +781,7 @@ static const btScalar cfgVolumeCenterScale = 100; static const btScalar cfgVolumeExentsBase = 1; static const btScalar cfgVolumeExentsScale = 4; static const int cfgLeaves = 8192; -static const bool cfgEnable = false; +static const bool cfgEnable = true; //[1] btDbvtVolume intersections bool cfgBenchmark1_Enable = cfgEnable; @@ -857,14 +856,10 @@ bool cfgBenchmark16_Enable = cfgEnable; static const int cfgBenchmark16_BatchCount = 256; static const int cfgBenchmark16_Passes = 16384; static const int cfgBenchmark16_Reference = 5138; -//[17] proximity +//[17] select bool cfgBenchmark17_Enable = cfgEnable; -static const int cfgBenchmark17_Iterations = 8; -static const int cfgBenchmark17_Reference = 2842; -//[18] select -bool cfgBenchmark18_Enable = true; -static const int cfgBenchmark18_Iterations = 4; -static const int cfgBenchmark18_Reference = 3390; +static const int cfgBenchmark17_Iterations = 4; +static const int cfgBenchmark17_Reference = 3390; btClock wallclock; printf("Benchmarking dbvt...\r\n"); @@ -1259,32 +1254,6 @@ if(cfgBenchmark17_Enable) {// Benchmark 17 srand(380843); btAlignedObjectArray volumes; - btAlignedObjectArray results; - volumes.resize(cfgLeaves); - results.resize(cfgLeaves); - for(int i=0;i volumes; btAlignedObjectArray results; btAlignedObjectArray indices; volumes.resize(cfgLeaves); @@ -1299,9 +1268,9 @@ if(cfgBenchmark18_Enable) { btSwap(indices[i],indices[rand()%cfgLeaves]); } - printf("[18] btDbvtVolume select: "); + printf("[17] btDbvtVolume select: "); wallclock.reset(); - for(int i=0;i +#if (DBVT_SELECT_IMPL==DBVT_IMPL_SSE)|| \ + (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)|| \ + (DBVT_INT0_IMPL==DBVT_IMPL_SSE) +#include #endif // @@ -118,6 +122,10 @@ subject to the following restrictions: #error "DBVT_MERGE_IMPL undefined" #endif +#ifndef DBVT_INT0_IMPL +#error "DBVT_INT0_IMPL undefined" +#endif + // // Defaults volumes // @@ -135,8 +143,8 @@ static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r); static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx); static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n); static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n); -DBVT_INLINE void Expand(const btVector3 e); -DBVT_INLINE void SignedExpand(const btVector3 e); +DBVT_INLINE void Expand(const btVector3& e); +DBVT_INLINE void SignedExpand(const btVector3& e); DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const; DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const; DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const; @@ -175,12 +183,12 @@ struct btDbvtNode { btDbvtVolume volume; btDbvtNode* parent; - bool isleaf() const { return(childs[1]==0); } - bool isinternal() const { return(!isleaf()); } + DBVT_INLINE bool isleaf() const { return(childs[1]==0); } + DBVT_INLINE bool isinternal() const { return(!isleaf()); } union { - btDbvtNode* childs[2]; - void* data; - }; + btDbvtNode* childs[2]; + void* data; + }; }; ///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree). @@ -188,8 +196,6 @@ struct btDbvtNode ///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure. struct btDbvt { - - /* Stack element */ struct sStkNN { @@ -252,8 +258,8 @@ struct btDbvt }; // Fields - btDbvtNode* m_root; - btDbvtNode* m_free; + btDbvtNode* m_root; + btDbvtNode* m_free; int m_lkhd; int m_leaves; unsigned m_opath; @@ -410,17 +416,17 @@ return(box); } // -DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3& e) { mi-=e;mx+=e; } // -DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3& e) { -if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x()); -if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y()); -if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z()); +if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]); +if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]); +if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]); } // @@ -488,12 +494,18 @@ for(int i=0;i<3;++i) DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { +#if DBVT_INT0_IMPL == DBVT_IMPL_SSE +const __m128 rt(_mm_or_ps( _mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)), + _mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi)))); +return((rt.m128_u32[0]|rt.m128_u32[1]|rt.m128_u32[2])==0); +#else return( (a.mi.x()<=b.mx.x())&& (a.mx.x()>=b.mi.x())&& (a.mi.y()<=b.mx.y())&& (a.mx.y()>=b.mi.y())&& (a.mi.z()<=b.mx.z())&& (a.mx.z()>=b.mi.z())); +#endif } // @@ -710,7 +722,7 @@ if(root0&&root1) int treshold=DOUBLE_STACKSIZE-4; stack.resize(DOUBLE_STACKSIZE); stack[0]=sStkNN(root0,root1); - do { + do { sStkNN p=stack[--depth]; if(depth>treshold) { @@ -1092,5 +1104,6 @@ if(root) #undef DBVT_USE_INTRINSIC_SSE #undef DBVT_SELECT_IMPL #undef DBVT_MERGE_IMPL +#undef DBVT_INT0_IMPL #endif