diff --git a/src/BulletCollision/BroadphaseCollision/btDbvt.cpp b/src/BulletCollision/BroadphaseCollision/btDbvt.cpp index 2630ceb29..15b90d987 100644 --- a/src/BulletCollision/BroadphaseCollision/btDbvt.cpp +++ b/src/BulletCollision/BroadphaseCollision/btDbvt.cpp @@ -28,22 +28,27 @@ void Process(const btDbvtNode* n) { nodes.push_back(n); } }; // -static inline int indexof(const btDbvtNode* node) +static DBVT_INLINE int indexof(const btDbvtNode* node) { return(node->parent->childs[1]==node); } // -static inline btDbvtVolume merge( const btDbvtVolume& a, - const btDbvtVolume& b) +static DBVT_INLINE btDbvtVolume merge( const btDbvtVolume& a, + const btDbvtVolume& b) { +#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE +DBVT_ALIGN char locals[sizeof(btDbvtAabbMm)]; +btDbvtVolume& res=*(btDbvtVolume*)locals; +#else btDbvtVolume res; +#endif Merge(a,b,res); return(res); } // volume+edge lengths -static inline btScalar size(const btDbvtVolume& a) +static DBVT_INLINE btScalar size(const btDbvtVolume& a) { const btVector3 edges=a.Lengths(); return( edges.x()*edges.y()*edges.z()+ @@ -51,7 +56,17 @@ return( edges.x()*edges.y()*edges.z()+ } // -static inline void deletenode( btDbvt* pdbvt, +static void getmaxdepth(const btDbvtNode* node,int depth,int& maxdepth) +{ +if(node->isinternal()) + { + getmaxdepth(node->childs[0],depth+1,maxdepth); + getmaxdepth(node->childs[0],depth+1,maxdepth); + } else maxdepth=btMax(maxdepth,depth); +} + +// +static DBVT_INLINE void deletenode( btDbvt* pdbvt, btDbvtNode* node) { btAlignedFree(pdbvt->m_free); @@ -59,7 +74,7 @@ pdbvt->m_free=node; } // -static inline void recursedeletenode( btDbvt* pdbvt, +static void recursedeletenode( btDbvt* pdbvt, btDbvtNode* node) { if(!node->isleaf()) @@ -72,9 +87,8 @@ deletenode(pdbvt,node); } // -static inline btDbvtNode* createnode( btDbvt* pdbvt, +static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt, btDbvtNode* parent, - const btDbvtVolume& volume, void* data) { btDbvtNode* node; @@ -83,14 +97,36 @@ if(pdbvt->m_free) else { node=new(btAlignedAlloc(sizeof(btDbvtNode),16)) btDbvtNode(); } node->parent = parent; -node->volume = volume; node->data = data; node->childs[1] = 0; return(node); } // -static inline void insertleaf( btDbvt* pdbvt, +static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt, + btDbvtNode* parent, + const btDbvtVolume& volume, + void* data) +{ +btDbvtNode* node=createnode(pdbvt,parent,data); +node->volume=volume; +return(node); +} + +// +static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt, + btDbvtNode* parent, + const btDbvtVolume& volume0, + const btDbvtVolume& volume1, + void* data) +{ +btDbvtNode* node=createnode(pdbvt,parent,data); +Merge(volume0,volume1,node->volume); +return(node); +} + +// +static void insertleaf( btDbvt* pdbvt, btDbvtNode* root, btDbvtNode* leaf) { @@ -104,15 +140,13 @@ if(!pdbvt->m_root) if(!root->isleaf()) { do { - if( Proximity(root->childs[0]->volume,leaf->volume)< - Proximity(root->childs[1]->volume,leaf->volume)) - root=root->childs[0]; - else - root=root->childs[1]; + root=root->childs[Select( leaf->volume, + root->childs[0]->volume, + root->childs[1]->volume)]; } while(!root->isleaf()); } btDbvtNode* prev=root->parent; - btDbvtNode* node=createnode(pdbvt,prev,merge(leaf->volume,root->volume),0); + btDbvtNode* node=createnode(pdbvt,prev,leaf->volume,root->volume,0); if(prev) { prev->childs[indexof(root)] = node; @@ -136,7 +170,7 @@ if(!pdbvt->m_root) } // -static inline btDbvtNode* removeleaf( btDbvt* pdbvt, +static btDbvtNode* removeleaf( btDbvt* pdbvt, btDbvtNode* leaf) { if(leaf==pdbvt->m_root) @@ -212,12 +246,18 @@ for(int i=0,ni=leaves.size();ivolume; +#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE +DBVT_ALIGN char locals[sizeof(btDbvtVolume)]; +btDbvtVolume& volume=*(btDbvtVolume*)locals; +volume=leaves[0]->volume; +#else +btDbvtVolume volume=leaves[0]->volume; +#endif for(int i=1,ni=leaves.size();ivolume); + Merge(volume,leaves[i]->volume,volume); } return(volume); } @@ -244,7 +284,7 @@ while(leaves.size()>1) } } btDbvtNode* n[] = {leaves[minidx[0]],leaves[minidx[1]]}; - btDbvtNode* p = createnode(pdbvt,0,merge(n[0]->volume,n[1]->volume),0); + btDbvtNode* p = createnode(pdbvt,0,n[0]->volume,n[1]->volume,0); p->childs[0] = n[0]; p->childs[1] = n[1]; n[0]->parent = p; @@ -257,8 +297,8 @@ while(leaves.size()>1) // static btDbvtNode* topdown(btDbvt* pdbvt, - tNodeArray& leaves, - int bu_treshold) + tNodeArray& leaves, + int bu_treshold) { static const btVector3 axis[]={btVector3(1,0,0), btVector3(0,1,0), @@ -326,7 +366,7 @@ return(leaves[0]); } // -static inline btDbvtNode* sort(btDbvtNode* n,btDbvtNode*& r) +static DBVT_INLINE btDbvtNode* sort(btDbvtNode* n,btDbvtNode*& r) { btDbvtNode* p=n->parent; btAssert(n->isinternal()); @@ -354,7 +394,7 @@ return(n); } // -static inline btDbvtNode* walkup(btDbvtNode* n,int count) +static DBVT_INLINE btDbvtNode* walkup(btDbvtNode* n,int count) { while(n&&(count--)) n=n->parent; return(n); @@ -569,6 +609,14 @@ if(m_root!=0) } } +// +int btDbvt::maxdepth(const btDbvtNode* node) +{ +int depth=0; +if(node) getmaxdepth(node,1,depth); +return(depth); +} + // int btDbvt::countLeaves(const btDbvtNode* node) { @@ -613,21 +661,26 @@ Benchmarking dbvt... Extents base: 1.000000 Extents range: 4.000000 Leaves: 8192 -[1] btDbvtVolume intersections: 3986 ms (0%) -[2] btDbvtVolume merges: 5815 ms (-1%) -[3] btDbvt::collideTT: 3267 ms (0%) -[4] btDbvt::collideTT self: 1657 ms (0%) -[5] btDbvt::collideTT xform: 7201 ms (0%) -[6] btDbvt::collideTT xform,self: 7382 ms (0%) -[7] btDbvt::collideRAY: 8855 ms (-1%),(236832 r/s) -[8] insert/remove: 3574 ms (0%),(586780 ir/s) -[9] updates (teleport): 3281 ms (0%),(639180 u/s) -[10] updates (jitter): 2658 ms (0%),(788996 u/s) -[11] optimize (incremental): 5091 ms (0%),(823000 o/s) -[12] btDbvtVolume notequal: 4151 ms (0%) -[13] culling(OCL): 2486 ms (0%),(411 t/s) -[14] culling(OCL+qsort): 599 ms (-2%),(1709 t/s) -[15] culling(KDOP+qsort): 306 ms (0%),(3346 t/s) + sizeof(btDbvtVolume): 32 bytes + sizeof(btDbvtNode): 44 bytes +[1] btDbvtVolume intersections: 3537 ms (0%) +[2] btDbvtVolume merges: 1945 ms (0%) +[3] btDbvt::collideTT: 6646 ms (0%) +[4] btDbvt::collideTT self: 3389 ms (0%) +[5] btDbvt::collideTT xform: 7505 ms (0%) +[6] btDbvt::collideTT xform,self: 7480 ms (0%) +[7] btDbvt::collideRAY: 6307 ms (0%),(332511 r/s) +[8] insert/remove: 2105 ms (-3%),(996271 ir/s) +[9] updates (teleport): 1943 ms (0%),(1079337 u/s) +[10] updates (jitter): 1301 ms (0%),(1611953 u/s) +[11] optimize (incremental): 2510 ms (0%),(1671000 o/s) +[12] btDbvtVolume notequal: 3677 ms (0%) +[13] culling(OCL+fullsort): 2231 ms (0%),(458 t/s) +[14] culling(OCL+qsort): 3500 ms (0%),(2340 t/s) +[15] culling(KDOP+qsort): 1151 ms (0%),(7117 t/s) +[16] insert/remove batch(256): 5138 ms (0%),(816330 bir/s) +[17] btDbvtVolume proximity: 2842 ms (0%) +[18] btDbvtVolume select: 3390 ms (0%) */ struct btDbvtBenchmark @@ -641,7 +694,7 @@ struct NilPolicy : btDbvt::ICollide { ++m_pcount; if(m_checksort) - { if(depth>=m_depth) m_depth=depth; else printf("wrong depth: %f\r\n",depth); } + { if(depth>=m_depth) m_depth=depth; else printf("wrong depth: %f (should be >= %f)\r\n",depth,m_depth); } } int m_pcount; btScalar m_depth; @@ -649,45 +702,45 @@ struct NilPolicy : btDbvt::ICollide }; struct P14 : btDbvt::ICollide { - struct btDbvtNode + struct Node { const btDbvtNode* leaf; btScalar depth; }; void Process(const btDbvtNode* leaf,btScalar depth) { - btDbvtNode n; + Node n; n.leaf = leaf; n.depth = depth; } - static int sortfnc(const btDbvtNode& a,const btDbvtNode& b) + static int sortfnc(const Node& a,const Node& b) { if(a.depthb.depth) return(-1); return(0); } - btAlignedObjectArray m_nodes; + btAlignedObjectArray m_nodes; }; struct P15 : btDbvt::ICollide { - struct btDbvtNode + struct Node { const btDbvtNode* leaf; btScalar depth; }; void Process(const btDbvtNode* leaf) { - btDbvtNode n; + Node n; n.leaf = leaf; n.depth = dot(leaf->volume.Center(),m_axis); } - static int sortfnc(const btDbvtNode& a,const btDbvtNode& b) + static int sortfnc(const Node& a,const Node& b) { if(a.depthb.depth) return(-1); return(0); } - btAlignedObjectArray m_nodes; + btAlignedObjectArray m_nodes; btVector3 m_axis; }; static btScalar RandUnit() @@ -734,71 +787,84 @@ static const bool cfgEnable = true; //[1] btDbvtVolume intersections bool cfgBenchmark1_Enable = cfgEnable; static const int cfgBenchmark1_Iterations = 8; -static const int cfgBenchmark1_Reference = 3980; +static const int cfgBenchmark1_Reference = 3537; //[2] btDbvtVolume merges bool cfgBenchmark2_Enable = cfgEnable; static const int cfgBenchmark2_Iterations = 4; -static const int cfgBenchmark2_Reference = 5924; +static const int cfgBenchmark2_Reference = 1945; //[3] btDbvt::collideTT bool cfgBenchmark3_Enable = cfgEnable; -static const int cfgBenchmark3_Iterations = 256; -static const int cfgBenchmark3_Reference = 3288; +static const int cfgBenchmark3_Iterations = 512; +static const int cfgBenchmark3_Reference = 6646; //[4] btDbvt::collideTT self bool cfgBenchmark4_Enable = cfgEnable; -static const int cfgBenchmark4_Iterations = 256; -static const int cfgBenchmark4_Reference = 1655; +static const int cfgBenchmark4_Iterations = 512; +static const int cfgBenchmark4_Reference = 3389; //[5] btDbvt::collideTT xform bool cfgBenchmark5_Enable = cfgEnable; -static const int cfgBenchmark5_Iterations = 256; +static const int cfgBenchmark5_Iterations = 512; static const btScalar cfgBenchmark5_OffsetScale = 2; -static const int cfgBenchmark5_Reference = 7201; +static const int cfgBenchmark5_Reference = 7505; //[6] btDbvt::collideTT xform,self bool cfgBenchmark6_Enable = cfgEnable; -static const int cfgBenchmark6_Iterations = 256; +static const int cfgBenchmark6_Iterations = 512; static const btScalar cfgBenchmark6_OffsetScale = 2; -static const int cfgBenchmark6_Reference = 7382; +static const int cfgBenchmark6_Reference = 7480; //[7] btDbvt::collideRAY bool cfgBenchmark7_Enable = cfgEnable; static const int cfgBenchmark7_Passes = 32; static const int cfgBenchmark7_Iterations = 65536; -static const int cfgBenchmark7_Reference = 8954; +static const int cfgBenchmark7_Reference = 6307; //[8] insert/remove bool cfgBenchmark8_Enable = cfgEnable; static const int cfgBenchmark8_Passes = 32; static const int cfgBenchmark8_Iterations = 65536; -static const int cfgBenchmark8_Reference = 3597; +static const int cfgBenchmark8_Reference = 2105; //[9] updates (teleport) bool cfgBenchmark9_Enable = cfgEnable; static const int cfgBenchmark9_Passes = 32; static const int cfgBenchmark9_Iterations = 65536; -static const int cfgBenchmark9_Reference = 3282; +static const int cfgBenchmark9_Reference = 1943; //[10] updates (jitter) bool cfgBenchmark10_Enable = cfgEnable; static const btScalar cfgBenchmark10_Scale = cfgVolumeCenterScale/10000; static const int cfgBenchmark10_Passes = 32; static const int cfgBenchmark10_Iterations = 65536; -static const int cfgBenchmark10_Reference = 2659; +static const int cfgBenchmark10_Reference = 1301; //[11] optimize (incremental) bool cfgBenchmark11_Enable = cfgEnable; static const int cfgBenchmark11_Passes = 64; static const int cfgBenchmark11_Iterations = 65536; -static const int cfgBenchmark11_Reference = 5075; +static const int cfgBenchmark11_Reference = 2510; //[12] btDbvtVolume notequal bool cfgBenchmark12_Enable = cfgEnable; static const int cfgBenchmark12_Iterations = 32; -static const int cfgBenchmark12_Reference = 4118; +static const int cfgBenchmark12_Reference = 3677; //[13] culling(OCL+fullsort) bool cfgBenchmark13_Enable = cfgEnable; static const int cfgBenchmark13_Iterations = 1024; -static const int cfgBenchmark13_Reference = 2483; +static const int cfgBenchmark13_Reference = 2231; //[14] culling(OCL+qsort) bool cfgBenchmark14_Enable = cfgEnable; -static const int cfgBenchmark14_Iterations = 1024; -static const int cfgBenchmark14_Reference = 614; +static const int cfgBenchmark14_Iterations = 8192; +static const int cfgBenchmark14_Reference = 3500; //[15] culling(KDOP+qsort) bool cfgBenchmark15_Enable = cfgEnable; -static const int cfgBenchmark15_Iterations = 1024; -static const int cfgBenchmark15_Reference = 305; +static const int cfgBenchmark15_Iterations = 8192; +static const int cfgBenchmark15_Reference = 1151; +//[16] insert/remove batch +bool cfgBenchmark16_Enable = cfgEnable; +static const int cfgBenchmark16_BatchCount = 256; +static const int cfgBenchmark16_Passes = 16384; +static const int cfgBenchmark16_Reference = 5138; +//[17] proximity +bool cfgBenchmark17_Enable = cfgEnable; +static const int cfgBenchmark17_Iterations = 8; +static const int cfgBenchmark17_Reference = 2842; +//[18] select +bool cfgBenchmark18_Enable = cfgEnable; +static const int cfgBenchmark18_Iterations = 4; +static const int cfgBenchmark18_Reference = 3390; btClock wallclock; printf("Benchmarking dbvt...\r\n"); @@ -806,11 +872,13 @@ printf("\tWorld scale: %f\r\n",cfgVolumeCenterScale); printf("\tExtents base: %f\r\n",cfgVolumeExentsBase); printf("\tExtents range: %f\r\n",cfgVolumeExentsScale); printf("\tLeaves: %u\r\n",cfgLeaves); +printf("\tsizeof(btDbvtVolume): %u bytes\r\n",sizeof(btDbvtVolume)); +printf("\tsizeof(btDbvtNode): %u bytes\r\n",sizeof(btDbvtNode)); if(cfgBenchmark1_Enable) {// Benchmark 1 srand(380843); btAlignedObjectArray volumes; - btAlignedObjectArray results; + btAlignedObjectArray results; volumes.resize(cfgLeaves); results.resize(cfgLeaves); for(int i=0;i batch; + btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); + dbvt.optimizeTopDown(); + batch.reserve(cfgBenchmark16_BatchCount); + printf("[16] insert/remove batch(%u): ",cfgBenchmark16_BatchCount); + wallclock.reset(); + for(int i=0;i volumes; + btAlignedObjectArray results; + volumes.resize(cfgLeaves); + results.resize(cfgLeaves); + for(int i=0;i volumes; + btAlignedObjectArray results; + btAlignedObjectArray indices; + volumes.resize(cfgLeaves); + results.resize(cfgLeaves); + indices.resize(cfgLeaves); + for(int i=0;i= 1400) -#define DBVT_USE_TEMPLATE 1 // Enable template for ICollide -#else -#define DBVT_USE_TEMPLATE 0 // Don't + #if (defined (_MSC_VER) && _MSC_VER >= 1400) + #define DBVT_USE_TEMPLATE 1 + #else + #define DBVT_USE_TEMPLATE 0 #endif #else -#define DBVT_USE_TEMPLATE 0 // Enable template for ICollide +#define DBVT_USE_TEMPLATE 0 #endif -#define DBVT_USE_MEMMOVE 1 // Enable memmove (collideOCL) -#define DBVT_ENABLE_BENCHMARK 0 // Enable benchmarking code +// Using memmov for collideOCL +#define DBVT_USE_MEMMOVE 1 + +// Enable benchmarking code +#define DBVT_ENABLE_BENCHMARK 0 + +// Inlining +#define DBVT_INLINE SIMD_FORCE_INLINE +// Align +#ifdef WIN32 +#define DBVT_ALIGN __declspec(align(16)) +#else +#define DBVT_ALIGN +#endif + +// Specific methods implementation +#ifdef WIN32 +#define DBVT_PROXIMITY_IMPL DBVT_IMPL_SSE +#define DBVT_SELECT_IMPL DBVT_IMPL_SSE +#define DBVT_MERGE_IMPL DBVT_IMPL_SSE +#else +#define DBVT_PROXIMITY_IMPL DBVT_IMPL_GENERIC +#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC +#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC +#endif // // Auto config and checks @@ -76,6 +104,18 @@ subject to the following restrictions: #error "DBVT_ENABLE_BENCHMARK undefined" #endif +#ifndef DBVT_PROXIMITY_IMPL +#error "DBVT_PROXIMITY_IMPL undefined" +#endif + +#ifndef DBVT_SELECT_IMPL +#error "DBVT_SELECT_IMPL undefined" +#endif + +#ifndef DBVT_MERGE_IMPL +#error "DBVT_MERGE_IMPL undefined" +#endif + // // Defaults volumes // @@ -83,41 +123,44 @@ subject to the following restrictions: /* btDbvtAabbMm */ struct btDbvtAabbMm { -inline btVector3 Center() const { return((mi+mx)/2); } -inline btVector3 Lengths() const { return(mx-mi); } -inline btVector3 Extents() const { return((mx-mi)/2); } -inline const btVector3& Mins() const { return(mi); } -inline const btVector3& Maxs() const { return(mx); } -static inline btDbvtAabbMm FromCE(const btVector3& c,const btVector3& e); -static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r); -static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx); -static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n); -static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n); -inline void Expand(const btVector3 e); -inline void SignedExpand(const btVector3 e); -inline bool Contain(const btDbvtAabbMm& a) const; -inline int Classify(const btVector3& n,btScalar o,int s) const; -inline btScalar ProjectMinimum(const btVector3& v,unsigned signs) const; -inline friend bool Intersect( const btDbvtAabbMm& a, - const btDbvtAabbMm& b); -inline friend bool Intersect( const btDbvtAabbMm& a, - const btDbvtAabbMm& b, - const btTransform& xform); -inline friend bool Intersect( const btDbvtAabbMm& a, - const btVector3& b); -inline friend bool Intersect( const btDbvtAabbMm& a, - const btVector3& org, - const btVector3& invdir, - const unsigned* signs); -inline friend btScalar Proximity( const btDbvtAabbMm& a, - const btDbvtAabbMm& b); -inline friend void Merge( const btDbvtAabbMm& a, - const btDbvtAabbMm& b, - btDbvtAabbMm& r); -inline friend bool NotEqual( const btDbvtAabbMm& a, - const btDbvtAabbMm& b); +DBVT_INLINE btVector3 Center() const { return((mi+mx)/2); } +DBVT_INLINE btVector3 Lengths() const { return(mx-mi); } +DBVT_INLINE btVector3 Extents() const { return((mx-mi)/2); } +DBVT_INLINE const btVector3& Mins() const { return(mi); } +DBVT_INLINE const btVector3& Maxs() const { return(mx); } +static inline btDbvtAabbMm FromCE(const btVector3& c,const btVector3& e); +static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r); +static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx); +static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n); +static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n); +DBVT_INLINE void Expand(const btVector3 e); +DBVT_INLINE void SignedExpand(const btVector3 e); +DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const; +DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const; +DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const; +DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a, + const btDbvtAabbMm& b); +DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a, + const btDbvtAabbMm& b, + const btTransform& xform); +DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a, + const btVector3& b); +DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a, + const btVector3& org, + const btVector3& invdir, + const unsigned* signs); +DBVT_INLINE friend btScalar Proximity( const btDbvtAabbMm& a, + const btDbvtAabbMm& b); +DBVT_INLINE friend int Select( const btDbvtAabbMm& o, + const btDbvtAabbMm& a, + const btDbvtAabbMm& b); +DBVT_INLINE friend void Merge( const btDbvtAabbMm& a, + const btDbvtAabbMm& b, + btDbvtAabbMm& r); +DBVT_INLINE friend bool NotEqual( const btDbvtAabbMm& a, + const btDbvtAabbMm& b); private: -inline void AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const; +DBVT_INLINE void AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const; private: btVector3 mi,mx; }; @@ -129,7 +172,7 @@ typedef btDbvtAabbMm btDbvtVolume; struct btDbvtNode { btDbvtVolume volume; - btDbvtNode* parent; + btDbvtNode* parent; bool isleaf() const { return(childs[1]==0); } bool isinternal() const { return(!isleaf()); } union { @@ -150,6 +193,7 @@ struct btDbvt { const btDbvtNode* a; const btDbvtNode* b; + sStkNN() {} sStkNN(const btDbvtNode* na,const btDbvtNode* nb) : a(na),b(nb) {} }; struct sStkNP @@ -219,7 +263,7 @@ struct btDbvt void optimizeBottomUp(); void optimizeTopDown(int bu_treshold=128); void optimizeIncremental(int passes); - btDbvtNode* insert(const btDbvtVolume& box,void* data); + btDbvtNode* insert(const btDbvtVolume& box,void* data); void update(btDbvtNode* leaf,int lookahead=-1); void update(btDbvtNode* leaf,const btDbvtVolume& volume); bool update(btDbvtNode* leaf,btDbvtVolume volume,const btVector3& velocity,btScalar margin); @@ -227,7 +271,8 @@ struct btDbvt bool update(btDbvtNode* leaf,btDbvtVolume volume,btScalar margin); void remove(btDbvtNode* leaf); void write(IWriter* iwriter) const; - void clone(btDbvt& dest,IClone* iclone=0) const; + void clone(btDbvt& dest,IClone* iclone=0) const; + static int maxdepth(const btDbvtNode* node); static int countLeaves(const btDbvtNode* node); static void extractLeaves(const btDbvtNode* node,btAlignedObjectArray& leaves); #if DBVT_ENABLE_BENCHMARK @@ -284,7 +329,7 @@ struct btDbvt static void collideTU( const btDbvtNode* root, DBVT_IPOLICY); // Helpers - static inline int nearest(const int* i,const btDbvt::sStkNPS* a,btScalar v,int l,int h) + static DBVT_INLINE int nearest(const int* i,const btDbvt::sStkNPS* a,btScalar v,int l,int h) { int m=0; while(l& ifree, + static DBVT_INLINE int allocate( btAlignedObjectArray& ifree, btAlignedObjectArray& stock, const sStkNPS& value) { @@ -315,7 +360,7 @@ struct btDbvt // // -inline btDbvtAabbMm btDbvtAabbMm::FromCE(const btVector3& c,const btVector3& e) +inline btDbvtAabbMm btDbvtAabbMm::FromCE(const btVector3& c,const btVector3& e) { btDbvtAabbMm box; box.mi=c-e;box.mx=c+e; @@ -323,13 +368,13 @@ return(box); } // -inline btDbvtAabbMm btDbvtAabbMm::FromCR(const btVector3& c,btScalar r) +inline btDbvtAabbMm btDbvtAabbMm::FromCR(const btVector3& c,btScalar r) { return(FromCE(c,btVector3(r,r,r))); } // -inline btDbvtAabbMm btDbvtAabbMm::FromMM(const btVector3& mi,const btVector3& mx) +inline btDbvtAabbMm btDbvtAabbMm::FromMM(const btVector3& mi,const btVector3& mx) { btDbvtAabbMm box; box.mi=mi;box.mx=mx; @@ -337,7 +382,7 @@ return(box); } // -inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3* pts,int n) +inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3* pts,int n) { btDbvtAabbMm box; box.mi=box.mx=pts[0]; @@ -350,7 +395,7 @@ return(box); } // -inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3** ppts,int n) +inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3** ppts,int n) { btDbvtAabbMm box; box.mi=box.mx=*ppts[0]; @@ -363,13 +408,13 @@ return(box); } // -inline void btDbvtAabbMm::Expand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e) { mi-=e;mx+=e; } // -inline void btDbvtAabbMm::SignedExpand(const btVector3 e) +DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e) { if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x()); if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y()); @@ -377,7 +422,7 @@ if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z()); } // -inline bool btDbvtAabbMm::Contain(const btDbvtAabbMm& a) const +DBVT_INLINE bool btDbvtAabbMm::Contain(const btDbvtAabbMm& a) const { return( (mi.x()<=a.mi.x())&& (mi.y()<=a.mi.y())&& @@ -388,7 +433,7 @@ return( (mi.x()<=a.mi.x())&& } // -inline int btDbvtAabbMm::Classify(const btVector3& n,btScalar o,int s) const +DBVT_INLINE int btDbvtAabbMm::Classify(const btVector3& n,btScalar o,int s) const { btVector3 pi,px; switch(s) @@ -416,7 +461,7 @@ return(0); } // -inline btScalar btDbvtAabbMm::ProjectMinimum(const btVector3& v,unsigned signs) const +DBVT_INLINE btScalar btDbvtAabbMm::ProjectMinimum(const btVector3& v,unsigned signs) const { const btVector3* b[]={&mx,&mi}; const btVector3 p( b[(signs>>0)&1]->x(), @@ -426,7 +471,7 @@ return(dot(p,v)); } // -inline void btDbvtAabbMm::AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const +DBVT_INLINE void btDbvtAabbMm::AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const { for(int i=0;i<3;++i) { @@ -438,7 +483,7 @@ for(int i=0;i<3;++i) } // -inline bool Intersect( const btDbvtAabbMm& a, +DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { return( (a.mi.x()<=b.mx.x())&& @@ -450,7 +495,7 @@ return( (a.mi.x()<=b.mx.x())&& } // -inline bool Intersect( const btDbvtAabbMm& a, +DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btDbvtAabbMm& b, const btTransform& xform) { @@ -466,7 +511,7 @@ return(true); } // -inline bool Intersect( const btDbvtAabbMm& a, +DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btVector3& b) { return( (b.x()>=a.mi.x())&& @@ -478,11 +523,20 @@ return( (b.x()>=a.mi.x())&& } // -inline bool Intersect( const btDbvtAabbMm& a, +DBVT_INLINE bool Intersect( const btDbvtAabbMm& a, const btVector3& org, const btVector3& invdir, const unsigned* signs) { +#if 0 +const btVector3 b0((a.mi-org)*invdir); +const btVector3 b1((a.mx-org)*invdir); +const btVector3 tmin(btMin(b0[0],b1[0]),btMin(b0[1],b1[1]),btMin(b0[2],b1[2])); +const btVector3 tmax(btMax(b0[0],b1[0]),btMax(b0[1],b1[1]),btMax(b0[2],b1[2])); +const btScalar tin=btMax(tmin[0],btMax(tmin[1],tmin[2])); +const btScalar tout=btMin(tmax[0],btMin(tmax[1],tmax[2])); +return(tinx()-org[0])*invdir[0]; btScalar txmax=(bounds[1-signs[0]]->x()-org[0])*invdir[0]; @@ -497,30 +551,113 @@ if((txmin>tzmax)||(tzmin>txmax)) return(false); if(tzmin>txmin) txmin=tzmin; if(tzmax0); +#endif } // -inline btScalar Proximity( const btDbvtAabbMm& a, +DBVT_INLINE btScalar Proximity( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { +#if DBVT_PROXIMITY_IMPL == DBVT_IMPL_SSE +DBVT_ALIGN btScalar r[1]; +static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; +__asm + { + mov eax,a + mov ecx,b + movaps xmm0,[eax] + movaps xmm2,[ecx] + movaps xmm1,[eax+16] + movaps xmm3,[ecx+16] + addps xmm0,xmm1 + addps xmm2,xmm3 + subps xmm0,xmm2 + andps xmm0,mask + movhlps xmm1,xmm0 + addps xmm0,xmm1 + pshufd xmm1,xmm0,1 + addss xmm0,xmm1 + movss r,xmm0 + } +return(r[0]); +#else const btVector3 d=(a.mi+a.mx)-(b.mi+b.mx); return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z())); +#endif } // -inline void Merge( const btDbvtAabbMm& a, +DBVT_INLINE int Select( const btDbvtAabbMm& o, + const btDbvtAabbMm& a, + const btDbvtAabbMm& b) +{ +#if DBVT_SELECT_IMPL == DBVT_IMPL_SSE +DBVT_ALIGN __int32 r[1]; +static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff}; +__asm + { + mov eax,o + mov ecx,a + mov edx,b + movaps xmm0,[eax] + movaps xmm5,mask + addps xmm0,[eax+16] + movaps xmm1,[ecx] + movaps xmm2,[edx] + addps xmm1,[ecx+16] + addps xmm2,[edx+16] + subps xmm1,xmm0 + subps xmm2,xmm0 + andps xmm1,xmm5 + andps xmm2,xmm5 + movhlps xmm3,xmm1 + movhlps xmm4,xmm2 + addps xmm1,xmm3 + addps xmm2,xmm4 + pshufd xmm3,xmm1,1 + pshufd xmm4,xmm2,1 + addss xmm1,xmm3 + addss xmm2,xmm4 + cmpless xmm2,xmm1 + movss r,xmm2 + } +return(r[0]&1); +#else +return(Proximity(o,a)b.mx[i]) r.mx[i]=a.mx[i]; else r.mx[i]=b.mx[i]; } +#endif } // -inline bool NotEqual( const btDbvtAabbMm& a, +DBVT_INLINE bool NotEqual( const btDbvtAabbMm& a, const btDbvtAabbMm& b) { return( (a.mi.x()!=b.mi.x())|| @@ -576,18 +713,24 @@ DBVT_CHECKTYPE if(root0&&root1) { btAlignedObjectArray stack; - stack.reserve(DOUBLE_STACKSIZE); - stack.push_back(sStkNN(root0,root1)); + int depth=1; + int treshold=DOUBLE_STACKSIZE-4; + stack.resize(DOUBLE_STACKSIZE); + stack[0]=sStkNN(root0,root1); do { - sStkNN p=stack[stack.size()-1]; - stack.pop_back(); + sStkNN p=stack[--depth]; + if(depth>treshold) + { + stack.resize(stack.size()*2); + treshold=stack.size()-4; + } if(p.a==p.b) { if(p.a->isinternal()) { - stack.push_back(sStkNN(p.a->childs[0],p.a->childs[0])); - stack.push_back(sStkNN(p.a->childs[1],p.a->childs[1])); - stack.push_back(sStkNN(p.a->childs[0],p.a->childs[1])); + stack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]); + stack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]); + stack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]); } } else if(Intersect(p.a->volume,p.b->volume)) @@ -596,23 +739,23 @@ if(root0&&root1) { if(p.b->isinternal()) { - stack.push_back(sStkNN(p.a->childs[0],p.b->childs[0])); - stack.push_back(sStkNN(p.a->childs[1],p.b->childs[0])); - stack.push_back(sStkNN(p.a->childs[0],p.b->childs[1])); - stack.push_back(sStkNN(p.a->childs[1],p.b->childs[1])); + stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); + stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); + stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); + stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); } else { - stack.push_back(sStkNN(p.a->childs[0],p.b)); - stack.push_back(sStkNN(p.a->childs[1],p.b)); + stack[depth++]=sStkNN(p.a->childs[0],p.b); + stack[depth++]=sStkNN(p.a->childs[1],p.b); } } else { if(p.b->isinternal()) { - stack.push_back(sStkNN(p.a,p.b->childs[0])); - stack.push_back(sStkNN(p.a,p.b->childs[1])); + stack[depth++]=sStkNN(p.a,p.b->childs[0]); + stack[depth++]=sStkNN(p.a,p.b->childs[1]); } else { @@ -620,7 +763,7 @@ if(root0&&root1) } } } - } while(stack.size()>0); + } while(depth); } } @@ -635,34 +778,40 @@ DBVT_CHECKTYPE if(root0&&root1) { btAlignedObjectArray stack; - stack.reserve(DOUBLE_STACKSIZE); - stack.push_back(sStkNN(root0,root1)); + int depth=1; + int treshold=DOUBLE_STACKSIZE-4; + stack.resize(DOUBLE_STACKSIZE); + stack[0]=sStkNN(root0,root1); do { - sStkNN p=stack[stack.size()-1]; - stack.pop_back(); + sStkNN p=stack[--depth]; if(Intersect(p.a->volume,p.b->volume,xform)) { + if(depth>treshold) + { + stack.resize(stack.size()*2); + treshold=stack.size()-4; + } if(p.a->isinternal()) { if(p.b->isinternal()) - { - stack.push_back(sStkNN(p.a->childs[0],p.b->childs[0])); - stack.push_back(sStkNN(p.a->childs[1],p.b->childs[0])); - stack.push_back(sStkNN(p.a->childs[0],p.b->childs[1])); - stack.push_back(sStkNN(p.a->childs[1],p.b->childs[1])); + { + stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); + stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); + stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); + stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); } else { - stack.push_back(sStkNN(p.a->childs[0],p.b)); - stack.push_back(sStkNN(p.a->childs[1],p.b)); + stack[depth++]=sStkNN(p.a->childs[0],p.b); + stack[depth++]=sStkNN(p.a->childs[1],p.b); } } else { if(p.b->isinternal()) { - stack.push_back(sStkNN(p.a,p.b->childs[0])); - stack.push_back(sStkNN(p.a,p.b->childs[1])); + stack[depth++]=sStkNN(p.a,p.b->childs[0]); + stack[depth++]=sStkNN(p.a,p.b->childs[1]); } else { @@ -670,7 +819,7 @@ if(root0&&root1) } } } - } while(stack.size()>0); + } while(depth); } } @@ -945,5 +1094,8 @@ if(root) #undef DBVT_PREFIX #undef DBVT_IPOLICY #undef DBVT_CHECKTYPE +#undef DBVT_IMPL_GENERIC +#undef DBVT_IMPL_FPU0x86 +#undef DBVT_IMPL_SSE #endif diff --git a/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp b/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp index b24935176..3b3a2ebb9 100644 --- a/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp +++ b/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp @@ -112,7 +112,6 @@ void Process(const btDbvtNode* na,const btDbvtNode* nb) // btDbvtBroadphase::btDbvtBroadphase(btOverlappingPairCache* paircache) { -btDbvt::benchmark(); m_releasepaircache = (paircache!=0)?false:true; m_predictedframes = 2; m_stageCurrent = 0;