- General performances improvement (25%) on insert/update/collideTT.
- ~30% additional performance improvement for win32 using SSE implementation for critical methods.
This commit is contained in:
@@ -28,22 +28,27 @@ void Process(const btDbvtNode* n) { nodes.push_back(n); }
|
||||
};
|
||||
|
||||
//
|
||||
static inline int indexof(const btDbvtNode* node)
|
||||
static DBVT_INLINE int indexof(const btDbvtNode* node)
|
||||
{
|
||||
return(node->parent->childs[1]==node);
|
||||
}
|
||||
|
||||
//
|
||||
static inline btDbvtVolume merge( const btDbvtVolume& a,
|
||||
const btDbvtVolume& b)
|
||||
static DBVT_INLINE btDbvtVolume merge( const btDbvtVolume& a,
|
||||
const btDbvtVolume& b)
|
||||
{
|
||||
#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
|
||||
DBVT_ALIGN char locals[sizeof(btDbvtAabbMm)];
|
||||
btDbvtVolume& res=*(btDbvtVolume*)locals;
|
||||
#else
|
||||
btDbvtVolume res;
|
||||
#endif
|
||||
Merge(a,b,res);
|
||||
return(res);
|
||||
}
|
||||
|
||||
// volume+edge lengths
|
||||
static inline btScalar size(const btDbvtVolume& a)
|
||||
static DBVT_INLINE btScalar size(const btDbvtVolume& a)
|
||||
{
|
||||
const btVector3 edges=a.Lengths();
|
||||
return( edges.x()*edges.y()*edges.z()+
|
||||
@@ -51,7 +56,17 @@ return( edges.x()*edges.y()*edges.z()+
|
||||
}
|
||||
|
||||
//
|
||||
static inline void deletenode( btDbvt* pdbvt,
|
||||
static void getmaxdepth(const btDbvtNode* node,int depth,int& maxdepth)
|
||||
{
|
||||
if(node->isinternal())
|
||||
{
|
||||
getmaxdepth(node->childs[0],depth+1,maxdepth);
|
||||
getmaxdepth(node->childs[0],depth+1,maxdepth);
|
||||
} else maxdepth=btMax(maxdepth,depth);
|
||||
}
|
||||
|
||||
//
|
||||
static DBVT_INLINE void deletenode( btDbvt* pdbvt,
|
||||
btDbvtNode* node)
|
||||
{
|
||||
btAlignedFree(pdbvt->m_free);
|
||||
@@ -59,7 +74,7 @@ pdbvt->m_free=node;
|
||||
}
|
||||
|
||||
//
|
||||
static inline void recursedeletenode( btDbvt* pdbvt,
|
||||
static void recursedeletenode( btDbvt* pdbvt,
|
||||
btDbvtNode* node)
|
||||
{
|
||||
if(!node->isleaf())
|
||||
@@ -72,9 +87,8 @@ deletenode(pdbvt,node);
|
||||
}
|
||||
|
||||
//
|
||||
static inline btDbvtNode* createnode( btDbvt* pdbvt,
|
||||
static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt,
|
||||
btDbvtNode* parent,
|
||||
const btDbvtVolume& volume,
|
||||
void* data)
|
||||
{
|
||||
btDbvtNode* node;
|
||||
@@ -83,14 +97,36 @@ if(pdbvt->m_free)
|
||||
else
|
||||
{ node=new(btAlignedAlloc(sizeof(btDbvtNode),16)) btDbvtNode(); }
|
||||
node->parent = parent;
|
||||
node->volume = volume;
|
||||
node->data = data;
|
||||
node->childs[1] = 0;
|
||||
return(node);
|
||||
}
|
||||
|
||||
//
|
||||
static inline void insertleaf( btDbvt* pdbvt,
|
||||
static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt,
|
||||
btDbvtNode* parent,
|
||||
const btDbvtVolume& volume,
|
||||
void* data)
|
||||
{
|
||||
btDbvtNode* node=createnode(pdbvt,parent,data);
|
||||
node->volume=volume;
|
||||
return(node);
|
||||
}
|
||||
|
||||
//
|
||||
static DBVT_INLINE btDbvtNode* createnode( btDbvt* pdbvt,
|
||||
btDbvtNode* parent,
|
||||
const btDbvtVolume& volume0,
|
||||
const btDbvtVolume& volume1,
|
||||
void* data)
|
||||
{
|
||||
btDbvtNode* node=createnode(pdbvt,parent,data);
|
||||
Merge(volume0,volume1,node->volume);
|
||||
return(node);
|
||||
}
|
||||
|
||||
//
|
||||
static void insertleaf( btDbvt* pdbvt,
|
||||
btDbvtNode* root,
|
||||
btDbvtNode* leaf)
|
||||
{
|
||||
@@ -104,15 +140,13 @@ if(!pdbvt->m_root)
|
||||
if(!root->isleaf())
|
||||
{
|
||||
do {
|
||||
if( Proximity(root->childs[0]->volume,leaf->volume)<
|
||||
Proximity(root->childs[1]->volume,leaf->volume))
|
||||
root=root->childs[0];
|
||||
else
|
||||
root=root->childs[1];
|
||||
root=root->childs[Select( leaf->volume,
|
||||
root->childs[0]->volume,
|
||||
root->childs[1]->volume)];
|
||||
} while(!root->isleaf());
|
||||
}
|
||||
btDbvtNode* prev=root->parent;
|
||||
btDbvtNode* node=createnode(pdbvt,prev,merge(leaf->volume,root->volume),0);
|
||||
btDbvtNode* node=createnode(pdbvt,prev,leaf->volume,root->volume,0);
|
||||
if(prev)
|
||||
{
|
||||
prev->childs[indexof(root)] = node;
|
||||
@@ -136,7 +170,7 @@ if(!pdbvt->m_root)
|
||||
}
|
||||
|
||||
//
|
||||
static inline btDbvtNode* removeleaf( btDbvt* pdbvt,
|
||||
static btDbvtNode* removeleaf( btDbvt* pdbvt,
|
||||
btDbvtNode* leaf)
|
||||
{
|
||||
if(leaf==pdbvt->m_root)
|
||||
@@ -212,12 +246,18 @@ for(int i=0,ni=leaves.size();i<ni;++i)
|
||||
}
|
||||
|
||||
//
|
||||
static btDbvtVolume bounds( const tNodeArray& leaves)
|
||||
static btDbvtVolume bounds( const tNodeArray& leaves)
|
||||
{
|
||||
btDbvtVolume volume=leaves[0]->volume;
|
||||
#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
|
||||
DBVT_ALIGN char locals[sizeof(btDbvtVolume)];
|
||||
btDbvtVolume& volume=*(btDbvtVolume*)locals;
|
||||
volume=leaves[0]->volume;
|
||||
#else
|
||||
btDbvtVolume volume=leaves[0]->volume;
|
||||
#endif
|
||||
for(int i=1,ni=leaves.size();i<ni;++i)
|
||||
{
|
||||
volume=merge(volume,leaves[i]->volume);
|
||||
Merge(volume,leaves[i]->volume,volume);
|
||||
}
|
||||
return(volume);
|
||||
}
|
||||
@@ -244,7 +284,7 @@ while(leaves.size()>1)
|
||||
}
|
||||
}
|
||||
btDbvtNode* n[] = {leaves[minidx[0]],leaves[minidx[1]]};
|
||||
btDbvtNode* p = createnode(pdbvt,0,merge(n[0]->volume,n[1]->volume),0);
|
||||
btDbvtNode* p = createnode(pdbvt,0,n[0]->volume,n[1]->volume,0);
|
||||
p->childs[0] = n[0];
|
||||
p->childs[1] = n[1];
|
||||
n[0]->parent = p;
|
||||
@@ -257,8 +297,8 @@ while(leaves.size()>1)
|
||||
|
||||
//
|
||||
static btDbvtNode* topdown(btDbvt* pdbvt,
|
||||
tNodeArray& leaves,
|
||||
int bu_treshold)
|
||||
tNodeArray& leaves,
|
||||
int bu_treshold)
|
||||
{
|
||||
static const btVector3 axis[]={btVector3(1,0,0),
|
||||
btVector3(0,1,0),
|
||||
@@ -326,7 +366,7 @@ return(leaves[0]);
|
||||
}
|
||||
|
||||
//
|
||||
static inline btDbvtNode* sort(btDbvtNode* n,btDbvtNode*& r)
|
||||
static DBVT_INLINE btDbvtNode* sort(btDbvtNode* n,btDbvtNode*& r)
|
||||
{
|
||||
btDbvtNode* p=n->parent;
|
||||
btAssert(n->isinternal());
|
||||
@@ -354,7 +394,7 @@ return(n);
|
||||
}
|
||||
|
||||
//
|
||||
static inline btDbvtNode* walkup(btDbvtNode* n,int count)
|
||||
static DBVT_INLINE btDbvtNode* walkup(btDbvtNode* n,int count)
|
||||
{
|
||||
while(n&&(count--)) n=n->parent;
|
||||
return(n);
|
||||
@@ -569,6 +609,14 @@ if(m_root!=0)
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
int btDbvt::maxdepth(const btDbvtNode* node)
|
||||
{
|
||||
int depth=0;
|
||||
if(node) getmaxdepth(node,1,depth);
|
||||
return(depth);
|
||||
}
|
||||
|
||||
//
|
||||
int btDbvt::countLeaves(const btDbvtNode* node)
|
||||
{
|
||||
@@ -613,21 +661,26 @@ Benchmarking dbvt...
|
||||
Extents base: 1.000000
|
||||
Extents range: 4.000000
|
||||
Leaves: 8192
|
||||
[1] btDbvtVolume intersections: 3986 ms (0%)
|
||||
[2] btDbvtVolume merges: 5815 ms (-1%)
|
||||
[3] btDbvt::collideTT: 3267 ms (0%)
|
||||
[4] btDbvt::collideTT self: 1657 ms (0%)
|
||||
[5] btDbvt::collideTT xform: 7201 ms (0%)
|
||||
[6] btDbvt::collideTT xform,self: 7382 ms (0%)
|
||||
[7] btDbvt::collideRAY: 8855 ms (-1%),(236832 r/s)
|
||||
[8] insert/remove: 3574 ms (0%),(586780 ir/s)
|
||||
[9] updates (teleport): 3281 ms (0%),(639180 u/s)
|
||||
[10] updates (jitter): 2658 ms (0%),(788996 u/s)
|
||||
[11] optimize (incremental): 5091 ms (0%),(823000 o/s)
|
||||
[12] btDbvtVolume notequal: 4151 ms (0%)
|
||||
[13] culling(OCL): 2486 ms (0%),(411 t/s)
|
||||
[14] culling(OCL+qsort): 599 ms (-2%),(1709 t/s)
|
||||
[15] culling(KDOP+qsort): 306 ms (0%),(3346 t/s)
|
||||
sizeof(btDbvtVolume): 32 bytes
|
||||
sizeof(btDbvtNode): 44 bytes
|
||||
[1] btDbvtVolume intersections: 3537 ms (0%)
|
||||
[2] btDbvtVolume merges: 1945 ms (0%)
|
||||
[3] btDbvt::collideTT: 6646 ms (0%)
|
||||
[4] btDbvt::collideTT self: 3389 ms (0%)
|
||||
[5] btDbvt::collideTT xform: 7505 ms (0%)
|
||||
[6] btDbvt::collideTT xform,self: 7480 ms (0%)
|
||||
[7] btDbvt::collideRAY: 6307 ms (0%),(332511 r/s)
|
||||
[8] insert/remove: 2105 ms (-3%),(996271 ir/s)
|
||||
[9] updates (teleport): 1943 ms (0%),(1079337 u/s)
|
||||
[10] updates (jitter): 1301 ms (0%),(1611953 u/s)
|
||||
[11] optimize (incremental): 2510 ms (0%),(1671000 o/s)
|
||||
[12] btDbvtVolume notequal: 3677 ms (0%)
|
||||
[13] culling(OCL+fullsort): 2231 ms (0%),(458 t/s)
|
||||
[14] culling(OCL+qsort): 3500 ms (0%),(2340 t/s)
|
||||
[15] culling(KDOP+qsort): 1151 ms (0%),(7117 t/s)
|
||||
[16] insert/remove batch(256): 5138 ms (0%),(816330 bir/s)
|
||||
[17] btDbvtVolume proximity: 2842 ms (0%)
|
||||
[18] btDbvtVolume select: 3390 ms (0%)
|
||||
*/
|
||||
|
||||
struct btDbvtBenchmark
|
||||
@@ -641,7 +694,7 @@ struct NilPolicy : btDbvt::ICollide
|
||||
{
|
||||
++m_pcount;
|
||||
if(m_checksort)
|
||||
{ if(depth>=m_depth) m_depth=depth; else printf("wrong depth: %f\r\n",depth); }
|
||||
{ if(depth>=m_depth) m_depth=depth; else printf("wrong depth: %f (should be >= %f)\r\n",depth,m_depth); }
|
||||
}
|
||||
int m_pcount;
|
||||
btScalar m_depth;
|
||||
@@ -649,45 +702,45 @@ struct NilPolicy : btDbvt::ICollide
|
||||
};
|
||||
struct P14 : btDbvt::ICollide
|
||||
{
|
||||
struct btDbvtNode
|
||||
struct Node
|
||||
{
|
||||
const btDbvtNode* leaf;
|
||||
btScalar depth;
|
||||
};
|
||||
void Process(const btDbvtNode* leaf,btScalar depth)
|
||||
{
|
||||
btDbvtNode n;
|
||||
Node n;
|
||||
n.leaf = leaf;
|
||||
n.depth = depth;
|
||||
}
|
||||
static int sortfnc(const btDbvtNode& a,const btDbvtNode& b)
|
||||
static int sortfnc(const Node& a,const Node& b)
|
||||
{
|
||||
if(a.depth<b.depth) return(+1);
|
||||
if(a.depth>b.depth) return(-1);
|
||||
return(0);
|
||||
}
|
||||
btAlignedObjectArray<btDbvtNode> m_nodes;
|
||||
btAlignedObjectArray<Node> m_nodes;
|
||||
};
|
||||
struct P15 : btDbvt::ICollide
|
||||
{
|
||||
struct btDbvtNode
|
||||
struct Node
|
||||
{
|
||||
const btDbvtNode* leaf;
|
||||
btScalar depth;
|
||||
};
|
||||
void Process(const btDbvtNode* leaf)
|
||||
{
|
||||
btDbvtNode n;
|
||||
Node n;
|
||||
n.leaf = leaf;
|
||||
n.depth = dot(leaf->volume.Center(),m_axis);
|
||||
}
|
||||
static int sortfnc(const btDbvtNode& a,const btDbvtNode& b)
|
||||
static int sortfnc(const Node& a,const Node& b)
|
||||
{
|
||||
if(a.depth<b.depth) return(+1);
|
||||
if(a.depth>b.depth) return(-1);
|
||||
return(0);
|
||||
}
|
||||
btAlignedObjectArray<btDbvtNode> m_nodes;
|
||||
btAlignedObjectArray<Node> m_nodes;
|
||||
btVector3 m_axis;
|
||||
};
|
||||
static btScalar RandUnit()
|
||||
@@ -734,71 +787,84 @@ static const bool cfgEnable = true;
|
||||
//[1] btDbvtVolume intersections
|
||||
bool cfgBenchmark1_Enable = cfgEnable;
|
||||
static const int cfgBenchmark1_Iterations = 8;
|
||||
static const int cfgBenchmark1_Reference = 3980;
|
||||
static const int cfgBenchmark1_Reference = 3537;
|
||||
//[2] btDbvtVolume merges
|
||||
bool cfgBenchmark2_Enable = cfgEnable;
|
||||
static const int cfgBenchmark2_Iterations = 4;
|
||||
static const int cfgBenchmark2_Reference = 5924;
|
||||
static const int cfgBenchmark2_Reference = 1945;
|
||||
//[3] btDbvt::collideTT
|
||||
bool cfgBenchmark3_Enable = cfgEnable;
|
||||
static const int cfgBenchmark3_Iterations = 256;
|
||||
static const int cfgBenchmark3_Reference = 3288;
|
||||
static const int cfgBenchmark3_Iterations = 512;
|
||||
static const int cfgBenchmark3_Reference = 6646;
|
||||
//[4] btDbvt::collideTT self
|
||||
bool cfgBenchmark4_Enable = cfgEnable;
|
||||
static const int cfgBenchmark4_Iterations = 256;
|
||||
static const int cfgBenchmark4_Reference = 1655;
|
||||
static const int cfgBenchmark4_Iterations = 512;
|
||||
static const int cfgBenchmark4_Reference = 3389;
|
||||
//[5] btDbvt::collideTT xform
|
||||
bool cfgBenchmark5_Enable = cfgEnable;
|
||||
static const int cfgBenchmark5_Iterations = 256;
|
||||
static const int cfgBenchmark5_Iterations = 512;
|
||||
static const btScalar cfgBenchmark5_OffsetScale = 2;
|
||||
static const int cfgBenchmark5_Reference = 7201;
|
||||
static const int cfgBenchmark5_Reference = 7505;
|
||||
//[6] btDbvt::collideTT xform,self
|
||||
bool cfgBenchmark6_Enable = cfgEnable;
|
||||
static const int cfgBenchmark6_Iterations = 256;
|
||||
static const int cfgBenchmark6_Iterations = 512;
|
||||
static const btScalar cfgBenchmark6_OffsetScale = 2;
|
||||
static const int cfgBenchmark6_Reference = 7382;
|
||||
static const int cfgBenchmark6_Reference = 7480;
|
||||
//[7] btDbvt::collideRAY
|
||||
bool cfgBenchmark7_Enable = cfgEnable;
|
||||
static const int cfgBenchmark7_Passes = 32;
|
||||
static const int cfgBenchmark7_Iterations = 65536;
|
||||
static const int cfgBenchmark7_Reference = 8954;
|
||||
static const int cfgBenchmark7_Reference = 6307;
|
||||
//[8] insert/remove
|
||||
bool cfgBenchmark8_Enable = cfgEnable;
|
||||
static const int cfgBenchmark8_Passes = 32;
|
||||
static const int cfgBenchmark8_Iterations = 65536;
|
||||
static const int cfgBenchmark8_Reference = 3597;
|
||||
static const int cfgBenchmark8_Reference = 2105;
|
||||
//[9] updates (teleport)
|
||||
bool cfgBenchmark9_Enable = cfgEnable;
|
||||
static const int cfgBenchmark9_Passes = 32;
|
||||
static const int cfgBenchmark9_Iterations = 65536;
|
||||
static const int cfgBenchmark9_Reference = 3282;
|
||||
static const int cfgBenchmark9_Reference = 1943;
|
||||
//[10] updates (jitter)
|
||||
bool cfgBenchmark10_Enable = cfgEnable;
|
||||
static const btScalar cfgBenchmark10_Scale = cfgVolumeCenterScale/10000;
|
||||
static const int cfgBenchmark10_Passes = 32;
|
||||
static const int cfgBenchmark10_Iterations = 65536;
|
||||
static const int cfgBenchmark10_Reference = 2659;
|
||||
static const int cfgBenchmark10_Reference = 1301;
|
||||
//[11] optimize (incremental)
|
||||
bool cfgBenchmark11_Enable = cfgEnable;
|
||||
static const int cfgBenchmark11_Passes = 64;
|
||||
static const int cfgBenchmark11_Iterations = 65536;
|
||||
static const int cfgBenchmark11_Reference = 5075;
|
||||
static const int cfgBenchmark11_Reference = 2510;
|
||||
//[12] btDbvtVolume notequal
|
||||
bool cfgBenchmark12_Enable = cfgEnable;
|
||||
static const int cfgBenchmark12_Iterations = 32;
|
||||
static const int cfgBenchmark12_Reference = 4118;
|
||||
static const int cfgBenchmark12_Reference = 3677;
|
||||
//[13] culling(OCL+fullsort)
|
||||
bool cfgBenchmark13_Enable = cfgEnable;
|
||||
static const int cfgBenchmark13_Iterations = 1024;
|
||||
static const int cfgBenchmark13_Reference = 2483;
|
||||
static const int cfgBenchmark13_Reference = 2231;
|
||||
//[14] culling(OCL+qsort)
|
||||
bool cfgBenchmark14_Enable = cfgEnable;
|
||||
static const int cfgBenchmark14_Iterations = 1024;
|
||||
static const int cfgBenchmark14_Reference = 614;
|
||||
static const int cfgBenchmark14_Iterations = 8192;
|
||||
static const int cfgBenchmark14_Reference = 3500;
|
||||
//[15] culling(KDOP+qsort)
|
||||
bool cfgBenchmark15_Enable = cfgEnable;
|
||||
static const int cfgBenchmark15_Iterations = 1024;
|
||||
static const int cfgBenchmark15_Reference = 305;
|
||||
static const int cfgBenchmark15_Iterations = 8192;
|
||||
static const int cfgBenchmark15_Reference = 1151;
|
||||
//[16] insert/remove batch
|
||||
bool cfgBenchmark16_Enable = cfgEnable;
|
||||
static const int cfgBenchmark16_BatchCount = 256;
|
||||
static const int cfgBenchmark16_Passes = 16384;
|
||||
static const int cfgBenchmark16_Reference = 5138;
|
||||
//[17] proximity
|
||||
bool cfgBenchmark17_Enable = cfgEnable;
|
||||
static const int cfgBenchmark17_Iterations = 8;
|
||||
static const int cfgBenchmark17_Reference = 2842;
|
||||
//[18] select
|
||||
bool cfgBenchmark18_Enable = cfgEnable;
|
||||
static const int cfgBenchmark18_Iterations = 4;
|
||||
static const int cfgBenchmark18_Reference = 3390;
|
||||
|
||||
btClock wallclock;
|
||||
printf("Benchmarking dbvt...\r\n");
|
||||
@@ -806,11 +872,13 @@ printf("\tWorld scale: %f\r\n",cfgVolumeCenterScale);
|
||||
printf("\tExtents base: %f\r\n",cfgVolumeExentsBase);
|
||||
printf("\tExtents range: %f\r\n",cfgVolumeExentsScale);
|
||||
printf("\tLeaves: %u\r\n",cfgLeaves);
|
||||
printf("\tsizeof(btDbvtVolume): %u bytes\r\n",sizeof(btDbvtVolume));
|
||||
printf("\tsizeof(btDbvtNode): %u bytes\r\n",sizeof(btDbvtNode));
|
||||
if(cfgBenchmark1_Enable)
|
||||
{// Benchmark 1
|
||||
srand(380843);
|
||||
btAlignedObjectArray<btDbvtVolume> volumes;
|
||||
btAlignedObjectArray<bool> results;
|
||||
btAlignedObjectArray<bool> results;
|
||||
volumes.resize(cfgLeaves);
|
||||
results.resize(cfgLeaves);
|
||||
for(int i=0;i<cfgLeaves;++i)
|
||||
@@ -1161,6 +1229,92 @@ if(cfgBenchmark15_Enable)
|
||||
const int t=cfgBenchmark15_Iterations;
|
||||
printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark15_Reference)*100/time,(t*1000)/time);
|
||||
}
|
||||
if(cfgBenchmark16_Enable)
|
||||
{// Benchmark 16
|
||||
srand(380843);
|
||||
btDbvt dbvt;
|
||||
btAlignedObjectArray<btDbvtNode*> batch;
|
||||
btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
|
||||
dbvt.optimizeTopDown();
|
||||
batch.reserve(cfgBenchmark16_BatchCount);
|
||||
printf("[16] insert/remove batch(%u): ",cfgBenchmark16_BatchCount);
|
||||
wallclock.reset();
|
||||
for(int i=0;i<cfgBenchmark16_Passes;++i)
|
||||
{
|
||||
for(int j=0;j<cfgBenchmark16_BatchCount;++j)
|
||||
{
|
||||
batch.push_back(dbvt.insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale),0));
|
||||
}
|
||||
for(int j=0;j<cfgBenchmark16_BatchCount;++j)
|
||||
{
|
||||
dbvt.remove(batch[j]);
|
||||
}
|
||||
batch.resize(0);
|
||||
}
|
||||
const int time=(int)wallclock.getTimeMilliseconds();
|
||||
const int ir=cfgBenchmark16_Passes*cfgBenchmark16_BatchCount;
|
||||
printf("%u ms (%i%%),(%u bir/s)\r\n",time,(time-cfgBenchmark16_Reference)*100/time,int(ir*1000.0/time));
|
||||
}
|
||||
if(cfgBenchmark17_Enable)
|
||||
{// Benchmark 17
|
||||
srand(380843);
|
||||
btAlignedObjectArray<btDbvtVolume> volumes;
|
||||
btAlignedObjectArray<btScalar> results;
|
||||
volumes.resize(cfgLeaves);
|
||||
results.resize(cfgLeaves);
|
||||
for(int i=0;i<cfgLeaves;++i)
|
||||
{
|
||||
volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
|
||||
}
|
||||
printf("[17] btDbvtVolume proximity: ");
|
||||
wallclock.reset();
|
||||
for(int i=0;i<cfgBenchmark17_Iterations;++i)
|
||||
{
|
||||
for(int j=0;j<cfgLeaves;++j)
|
||||
{
|
||||
for(int k=0;k<cfgLeaves;++k)
|
||||
{
|
||||
results[k]=Proximity(volumes[j],volumes[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const int time=(int)wallclock.getTimeMilliseconds();
|
||||
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
|
||||
}
|
||||
if(cfgBenchmark18_Enable)
|
||||
{// Benchmark 18
|
||||
srand(380843);
|
||||
btAlignedObjectArray<btDbvtVolume> volumes;
|
||||
btAlignedObjectArray<int> results;
|
||||
btAlignedObjectArray<int> indices;
|
||||
volumes.resize(cfgLeaves);
|
||||
results.resize(cfgLeaves);
|
||||
indices.resize(cfgLeaves);
|
||||
for(int i=0;i<cfgLeaves;++i)
|
||||
{
|
||||
indices[i]=i;
|
||||
volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
|
||||
}
|
||||
for(int i=0;i<cfgLeaves;++i)
|
||||
{
|
||||
btSwap(indices[i],indices[rand()%cfgLeaves]);
|
||||
}
|
||||
printf("[18] btDbvtVolume select: ");
|
||||
wallclock.reset();
|
||||
for(int i=0;i<cfgBenchmark18_Iterations;++i)
|
||||
{
|
||||
for(int j=0;j<cfgLeaves;++j)
|
||||
{
|
||||
for(int k=0;k<cfgLeaves;++k)
|
||||
{
|
||||
const int idx=indices[k];
|
||||
results[idx]=Select(volumes[idx],volumes[j],volumes[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const int time=(int)wallclock.getTimeMilliseconds();
|
||||
printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark18_Reference)*100/time);
|
||||
}
|
||||
printf("\r\n\r\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -25,19 +25,47 @@ subject to the following restrictions:
|
||||
// Compile time configuration
|
||||
//
|
||||
|
||||
|
||||
// Implementation profiles
|
||||
#define DBVT_IMPL_GENERIC 0 // Generic implementation
|
||||
#define DBVT_IMPL_SSE 1 // SSE
|
||||
|
||||
// Template implementation of ICollide
|
||||
#ifdef WIN32
|
||||
//only define templates for visual studio 2005 and later, it just causes headaches for other compilers
|
||||
#if (defined (_MSC_VER) && _MSC_VER >= 1400)
|
||||
#define DBVT_USE_TEMPLATE 1 // Enable template for ICollide
|
||||
#else
|
||||
#define DBVT_USE_TEMPLATE 0 // Don't
|
||||
#if (defined (_MSC_VER) && _MSC_VER >= 1400)
|
||||
#define DBVT_USE_TEMPLATE 1
|
||||
#else
|
||||
#define DBVT_USE_TEMPLATE 0
|
||||
#endif
|
||||
#else
|
||||
#define DBVT_USE_TEMPLATE 0 // Enable template for ICollide
|
||||
#define DBVT_USE_TEMPLATE 0
|
||||
#endif
|
||||
|
||||
#define DBVT_USE_MEMMOVE 1 // Enable memmove (collideOCL)
|
||||
#define DBVT_ENABLE_BENCHMARK 0 // Enable benchmarking code
|
||||
// Using memmov for collideOCL
|
||||
#define DBVT_USE_MEMMOVE 1
|
||||
|
||||
// Enable benchmarking code
|
||||
#define DBVT_ENABLE_BENCHMARK 0
|
||||
|
||||
// Inlining
|
||||
#define DBVT_INLINE SIMD_FORCE_INLINE
|
||||
// Align
|
||||
#ifdef WIN32
|
||||
#define DBVT_ALIGN __declspec(align(16))
|
||||
#else
|
||||
#define DBVT_ALIGN
|
||||
#endif
|
||||
|
||||
// Specific methods implementation
|
||||
#ifdef WIN32
|
||||
#define DBVT_PROXIMITY_IMPL DBVT_IMPL_SSE
|
||||
#define DBVT_SELECT_IMPL DBVT_IMPL_SSE
|
||||
#define DBVT_MERGE_IMPL DBVT_IMPL_SSE
|
||||
#else
|
||||
#define DBVT_PROXIMITY_IMPL DBVT_IMPL_GENERIC
|
||||
#define DBVT_SELECT_IMPL DBVT_IMPL_GENERIC
|
||||
#define DBVT_MERGE_IMPL DBVT_IMPL_GENERIC
|
||||
#endif
|
||||
|
||||
//
|
||||
// Auto config and checks
|
||||
@@ -76,6 +104,18 @@ subject to the following restrictions:
|
||||
#error "DBVT_ENABLE_BENCHMARK undefined"
|
||||
#endif
|
||||
|
||||
#ifndef DBVT_PROXIMITY_IMPL
|
||||
#error "DBVT_PROXIMITY_IMPL undefined"
|
||||
#endif
|
||||
|
||||
#ifndef DBVT_SELECT_IMPL
|
||||
#error "DBVT_SELECT_IMPL undefined"
|
||||
#endif
|
||||
|
||||
#ifndef DBVT_MERGE_IMPL
|
||||
#error "DBVT_MERGE_IMPL undefined"
|
||||
#endif
|
||||
|
||||
//
|
||||
// Defaults volumes
|
||||
//
|
||||
@@ -83,41 +123,44 @@ subject to the following restrictions:
|
||||
/* btDbvtAabbMm */
|
||||
struct btDbvtAabbMm
|
||||
{
|
||||
inline btVector3 Center() const { return((mi+mx)/2); }
|
||||
inline btVector3 Lengths() const { return(mx-mi); }
|
||||
inline btVector3 Extents() const { return((mx-mi)/2); }
|
||||
inline const btVector3& Mins() const { return(mi); }
|
||||
inline const btVector3& Maxs() const { return(mx); }
|
||||
static inline btDbvtAabbMm FromCE(const btVector3& c,const btVector3& e);
|
||||
static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
|
||||
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
|
||||
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
|
||||
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
|
||||
inline void Expand(const btVector3 e);
|
||||
inline void SignedExpand(const btVector3 e);
|
||||
inline bool Contain(const btDbvtAabbMm& a) const;
|
||||
inline int Classify(const btVector3& n,btScalar o,int s) const;
|
||||
inline btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
|
||||
inline friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
inline friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
const btTransform& xform);
|
||||
inline friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& b);
|
||||
inline friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& org,
|
||||
const btVector3& invdir,
|
||||
const unsigned* signs);
|
||||
inline friend btScalar Proximity( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
inline friend void Merge( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
btDbvtAabbMm& r);
|
||||
inline friend bool NotEqual( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
DBVT_INLINE btVector3 Center() const { return((mi+mx)/2); }
|
||||
DBVT_INLINE btVector3 Lengths() const { return(mx-mi); }
|
||||
DBVT_INLINE btVector3 Extents() const { return((mx-mi)/2); }
|
||||
DBVT_INLINE const btVector3& Mins() const { return(mi); }
|
||||
DBVT_INLINE const btVector3& Maxs() const { return(mx); }
|
||||
static inline btDbvtAabbMm FromCE(const btVector3& c,const btVector3& e);
|
||||
static inline btDbvtAabbMm FromCR(const btVector3& c,btScalar r);
|
||||
static inline btDbvtAabbMm FromMM(const btVector3& mi,const btVector3& mx);
|
||||
static inline btDbvtAabbMm FromPoints(const btVector3* pts,int n);
|
||||
static inline btDbvtAabbMm FromPoints(const btVector3** ppts,int n);
|
||||
DBVT_INLINE void Expand(const btVector3 e);
|
||||
DBVT_INLINE void SignedExpand(const btVector3 e);
|
||||
DBVT_INLINE bool Contain(const btDbvtAabbMm& a) const;
|
||||
DBVT_INLINE int Classify(const btVector3& n,btScalar o,int s) const;
|
||||
DBVT_INLINE btScalar ProjectMinimum(const btVector3& v,unsigned signs) const;
|
||||
DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
const btTransform& xform);
|
||||
DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& b);
|
||||
DBVT_INLINE friend bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& org,
|
||||
const btVector3& invdir,
|
||||
const unsigned* signs);
|
||||
DBVT_INLINE friend btScalar Proximity( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
DBVT_INLINE friend int Select( const btDbvtAabbMm& o,
|
||||
const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
DBVT_INLINE friend void Merge( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
btDbvtAabbMm& r);
|
||||
DBVT_INLINE friend bool NotEqual( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b);
|
||||
private:
|
||||
inline void AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const;
|
||||
DBVT_INLINE void AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const;
|
||||
private:
|
||||
btVector3 mi,mx;
|
||||
};
|
||||
@@ -129,7 +172,7 @@ typedef btDbvtAabbMm btDbvtVolume;
|
||||
struct btDbvtNode
|
||||
{
|
||||
btDbvtVolume volume;
|
||||
btDbvtNode* parent;
|
||||
btDbvtNode* parent;
|
||||
bool isleaf() const { return(childs[1]==0); }
|
||||
bool isinternal() const { return(!isleaf()); }
|
||||
union {
|
||||
@@ -150,6 +193,7 @@ struct btDbvt
|
||||
{
|
||||
const btDbvtNode* a;
|
||||
const btDbvtNode* b;
|
||||
sStkNN() {}
|
||||
sStkNN(const btDbvtNode* na,const btDbvtNode* nb) : a(na),b(nb) {}
|
||||
};
|
||||
struct sStkNP
|
||||
@@ -219,7 +263,7 @@ struct btDbvt
|
||||
void optimizeBottomUp();
|
||||
void optimizeTopDown(int bu_treshold=128);
|
||||
void optimizeIncremental(int passes);
|
||||
btDbvtNode* insert(const btDbvtVolume& box,void* data);
|
||||
btDbvtNode* insert(const btDbvtVolume& box,void* data);
|
||||
void update(btDbvtNode* leaf,int lookahead=-1);
|
||||
void update(btDbvtNode* leaf,const btDbvtVolume& volume);
|
||||
bool update(btDbvtNode* leaf,btDbvtVolume volume,const btVector3& velocity,btScalar margin);
|
||||
@@ -227,7 +271,8 @@ struct btDbvt
|
||||
bool update(btDbvtNode* leaf,btDbvtVolume volume,btScalar margin);
|
||||
void remove(btDbvtNode* leaf);
|
||||
void write(IWriter* iwriter) const;
|
||||
void clone(btDbvt& dest,IClone* iclone=0) const;
|
||||
void clone(btDbvt& dest,IClone* iclone=0) const;
|
||||
static int maxdepth(const btDbvtNode* node);
|
||||
static int countLeaves(const btDbvtNode* node);
|
||||
static void extractLeaves(const btDbvtNode* node,btAlignedObjectArray<const btDbvtNode*>& leaves);
|
||||
#if DBVT_ENABLE_BENCHMARK
|
||||
@@ -284,7 +329,7 @@ struct btDbvt
|
||||
static void collideTU( const btDbvtNode* root,
|
||||
DBVT_IPOLICY);
|
||||
// Helpers
|
||||
static inline int nearest(const int* i,const btDbvt::sStkNPS* a,btScalar v,int l,int h)
|
||||
static DBVT_INLINE int nearest(const int* i,const btDbvt::sStkNPS* a,btScalar v,int l,int h)
|
||||
{
|
||||
int m=0;
|
||||
while(l<h)
|
||||
@@ -294,7 +339,7 @@ struct btDbvt
|
||||
}
|
||||
return(h);
|
||||
}
|
||||
static inline int allocate( btAlignedObjectArray<int>& ifree,
|
||||
static DBVT_INLINE int allocate( btAlignedObjectArray<int>& ifree,
|
||||
btAlignedObjectArray<sStkNPS>& stock,
|
||||
const sStkNPS& value)
|
||||
{
|
||||
@@ -315,7 +360,7 @@ struct btDbvt
|
||||
//
|
||||
|
||||
//
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromCE(const btVector3& c,const btVector3& e)
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromCE(const btVector3& c,const btVector3& e)
|
||||
{
|
||||
btDbvtAabbMm box;
|
||||
box.mi=c-e;box.mx=c+e;
|
||||
@@ -323,13 +368,13 @@ return(box);
|
||||
}
|
||||
|
||||
//
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromCR(const btVector3& c,btScalar r)
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromCR(const btVector3& c,btScalar r)
|
||||
{
|
||||
return(FromCE(c,btVector3(r,r,r)));
|
||||
}
|
||||
|
||||
//
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromMM(const btVector3& mi,const btVector3& mx)
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromMM(const btVector3& mi,const btVector3& mx)
|
||||
{
|
||||
btDbvtAabbMm box;
|
||||
box.mi=mi;box.mx=mx;
|
||||
@@ -337,7 +382,7 @@ return(box);
|
||||
}
|
||||
|
||||
//
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3* pts,int n)
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3* pts,int n)
|
||||
{
|
||||
btDbvtAabbMm box;
|
||||
box.mi=box.mx=pts[0];
|
||||
@@ -350,7 +395,7 @@ return(box);
|
||||
}
|
||||
|
||||
//
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3** ppts,int n)
|
||||
inline btDbvtAabbMm btDbvtAabbMm::FromPoints(const btVector3** ppts,int n)
|
||||
{
|
||||
btDbvtAabbMm box;
|
||||
box.mi=box.mx=*ppts[0];
|
||||
@@ -363,13 +408,13 @@ return(box);
|
||||
}
|
||||
|
||||
//
|
||||
inline void btDbvtAabbMm::Expand(const btVector3 e)
|
||||
DBVT_INLINE void btDbvtAabbMm::Expand(const btVector3 e)
|
||||
{
|
||||
mi-=e;mx+=e;
|
||||
}
|
||||
|
||||
//
|
||||
inline void btDbvtAabbMm::SignedExpand(const btVector3 e)
|
||||
DBVT_INLINE void btDbvtAabbMm::SignedExpand(const btVector3 e)
|
||||
{
|
||||
if(e.x()>0) mx.setX(mx.x()+e.x()); else mi.setX(mi.x()+e.x());
|
||||
if(e.y()>0) mx.setY(mx.y()+e.y()); else mi.setY(mi.y()+e.y());
|
||||
@@ -377,7 +422,7 @@ if(e.z()>0) mx.setZ(mx.z()+e.z()); else mi.setZ(mi.z()+e.z());
|
||||
}
|
||||
|
||||
//
|
||||
inline bool btDbvtAabbMm::Contain(const btDbvtAabbMm& a) const
|
||||
DBVT_INLINE bool btDbvtAabbMm::Contain(const btDbvtAabbMm& a) const
|
||||
{
|
||||
return( (mi.x()<=a.mi.x())&&
|
||||
(mi.y()<=a.mi.y())&&
|
||||
@@ -388,7 +433,7 @@ return( (mi.x()<=a.mi.x())&&
|
||||
}
|
||||
|
||||
//
|
||||
inline int btDbvtAabbMm::Classify(const btVector3& n,btScalar o,int s) const
|
||||
DBVT_INLINE int btDbvtAabbMm::Classify(const btVector3& n,btScalar o,int s) const
|
||||
{
|
||||
btVector3 pi,px;
|
||||
switch(s)
|
||||
@@ -416,7 +461,7 @@ return(0);
|
||||
}
|
||||
|
||||
//
|
||||
inline btScalar btDbvtAabbMm::ProjectMinimum(const btVector3& v,unsigned signs) const
|
||||
DBVT_INLINE btScalar btDbvtAabbMm::ProjectMinimum(const btVector3& v,unsigned signs) const
|
||||
{
|
||||
const btVector3* b[]={&mx,&mi};
|
||||
const btVector3 p( b[(signs>>0)&1]->x(),
|
||||
@@ -426,7 +471,7 @@ return(dot(p,v));
|
||||
}
|
||||
|
||||
//
|
||||
inline void btDbvtAabbMm::AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const
|
||||
DBVT_INLINE void btDbvtAabbMm::AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const
|
||||
{
|
||||
for(int i=0;i<3;++i)
|
||||
{
|
||||
@@ -438,7 +483,7 @@ for(int i=0;i<3;++i)
|
||||
}
|
||||
|
||||
//
|
||||
inline bool Intersect( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b)
|
||||
{
|
||||
return( (a.mi.x()<=b.mx.x())&&
|
||||
@@ -450,7 +495,7 @@ return( (a.mi.x()<=b.mx.x())&&
|
||||
}
|
||||
|
||||
//
|
||||
inline bool Intersect( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
const btTransform& xform)
|
||||
{
|
||||
@@ -466,7 +511,7 @@ return(true);
|
||||
}
|
||||
|
||||
//
|
||||
inline bool Intersect( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& b)
|
||||
{
|
||||
return( (b.x()>=a.mi.x())&&
|
||||
@@ -478,11 +523,20 @@ return( (b.x()>=a.mi.x())&&
|
||||
}
|
||||
|
||||
//
|
||||
inline bool Intersect( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE bool Intersect( const btDbvtAabbMm& a,
|
||||
const btVector3& org,
|
||||
const btVector3& invdir,
|
||||
const unsigned* signs)
|
||||
{
|
||||
#if 0
|
||||
const btVector3 b0((a.mi-org)*invdir);
|
||||
const btVector3 b1((a.mx-org)*invdir);
|
||||
const btVector3 tmin(btMin(b0[0],b1[0]),btMin(b0[1],b1[1]),btMin(b0[2],b1[2]));
|
||||
const btVector3 tmax(btMax(b0[0],b1[0]),btMax(b0[1],b1[1]),btMax(b0[2],b1[2]));
|
||||
const btScalar tin=btMax(tmin[0],btMax(tmin[1],tmin[2]));
|
||||
const btScalar tout=btMin(tmax[0],btMin(tmax[1],tmax[2]));
|
||||
return(tin<tout);
|
||||
#else
|
||||
const btVector3* bounds[2]={&a.mi,&a.mx};
|
||||
btScalar txmin=(bounds[ signs[0]]->x()-org[0])*invdir[0];
|
||||
btScalar txmax=(bounds[1-signs[0]]->x()-org[0])*invdir[0];
|
||||
@@ -497,30 +551,113 @@ if((txmin>tzmax)||(tzmin>txmax)) return(false);
|
||||
if(tzmin>txmin) txmin=tzmin;
|
||||
if(tzmax<txmax) txmax=tzmax;
|
||||
return(txmax>0);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
inline btScalar Proximity( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE btScalar Proximity( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b)
|
||||
{
|
||||
#if DBVT_PROXIMITY_IMPL == DBVT_IMPL_SSE
|
||||
DBVT_ALIGN btScalar r[1];
|
||||
static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
|
||||
__asm
|
||||
{
|
||||
mov eax,a
|
||||
mov ecx,b
|
||||
movaps xmm0,[eax]
|
||||
movaps xmm2,[ecx]
|
||||
movaps xmm1,[eax+16]
|
||||
movaps xmm3,[ecx+16]
|
||||
addps xmm0,xmm1
|
||||
addps xmm2,xmm3
|
||||
subps xmm0,xmm2
|
||||
andps xmm0,mask
|
||||
movhlps xmm1,xmm0
|
||||
addps xmm0,xmm1
|
||||
pshufd xmm1,xmm0,1
|
||||
addss xmm0,xmm1
|
||||
movss r,xmm0
|
||||
}
|
||||
return(r[0]);
|
||||
#else
|
||||
const btVector3 d=(a.mi+a.mx)-(b.mi+b.mx);
|
||||
return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
inline void Merge( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE int Select( const btDbvtAabbMm& o,
|
||||
const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b)
|
||||
{
|
||||
#if DBVT_SELECT_IMPL == DBVT_IMPL_SSE
|
||||
DBVT_ALIGN __int32 r[1];
|
||||
static DBVT_ALIGN const unsigned __int32 mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
|
||||
__asm
|
||||
{
|
||||
mov eax,o
|
||||
mov ecx,a
|
||||
mov edx,b
|
||||
movaps xmm0,[eax]
|
||||
movaps xmm5,mask
|
||||
addps xmm0,[eax+16]
|
||||
movaps xmm1,[ecx]
|
||||
movaps xmm2,[edx]
|
||||
addps xmm1,[ecx+16]
|
||||
addps xmm2,[edx+16]
|
||||
subps xmm1,xmm0
|
||||
subps xmm2,xmm0
|
||||
andps xmm1,xmm5
|
||||
andps xmm2,xmm5
|
||||
movhlps xmm3,xmm1
|
||||
movhlps xmm4,xmm2
|
||||
addps xmm1,xmm3
|
||||
addps xmm2,xmm4
|
||||
pshufd xmm3,xmm1,1
|
||||
pshufd xmm4,xmm2,1
|
||||
addss xmm1,xmm3
|
||||
addss xmm2,xmm4
|
||||
cmpless xmm2,xmm1
|
||||
movss r,xmm2
|
||||
}
|
||||
return(r[0]&1);
|
||||
#else
|
||||
return(Proximity(o,a)<Proximity(o,b)?0:1);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
DBVT_INLINE void Merge( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b,
|
||||
btDbvtAabbMm& r)
|
||||
{
|
||||
#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
|
||||
__asm
|
||||
{
|
||||
mov eax,a
|
||||
mov edx,b
|
||||
mov ecx,r
|
||||
movaps xmm0,[eax+0]
|
||||
movaps xmm1,[edx+0]
|
||||
movaps xmm2,[eax+16]
|
||||
movaps xmm3,[edx+16]
|
||||
minps xmm0,xmm1
|
||||
maxps xmm2,xmm3
|
||||
movaps [ecx+0],xmm0
|
||||
movaps [ecx+16],xmm2
|
||||
}
|
||||
#else
|
||||
for(int i=0;i<3;++i)
|
||||
{
|
||||
if(a.mi[i]<b.mi[i]) r.mi[i]=a.mi[i]; else r.mi[i]=b.mi[i];
|
||||
if(a.mx[i]>b.mx[i]) r.mx[i]=a.mx[i]; else r.mx[i]=b.mx[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
inline bool NotEqual( const btDbvtAabbMm& a,
|
||||
DBVT_INLINE bool NotEqual( const btDbvtAabbMm& a,
|
||||
const btDbvtAabbMm& b)
|
||||
{
|
||||
return( (a.mi.x()!=b.mi.x())||
|
||||
@@ -576,18 +713,24 @@ DBVT_CHECKTYPE
|
||||
if(root0&&root1)
|
||||
{
|
||||
btAlignedObjectArray<sStkNN> stack;
|
||||
stack.reserve(DOUBLE_STACKSIZE);
|
||||
stack.push_back(sStkNN(root0,root1));
|
||||
int depth=1;
|
||||
int treshold=DOUBLE_STACKSIZE-4;
|
||||
stack.resize(DOUBLE_STACKSIZE);
|
||||
stack[0]=sStkNN(root0,root1);
|
||||
do {
|
||||
sStkNN p=stack[stack.size()-1];
|
||||
stack.pop_back();
|
||||
sStkNN p=stack[--depth];
|
||||
if(depth>treshold)
|
||||
{
|
||||
stack.resize(stack.size()*2);
|
||||
treshold=stack.size()-4;
|
||||
}
|
||||
if(p.a==p.b)
|
||||
{
|
||||
if(p.a->isinternal())
|
||||
{
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.a->childs[0]));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.a->childs[1]));
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.a->childs[1]));
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]);
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]);
|
||||
}
|
||||
}
|
||||
else if(Intersect(p.a->volume,p.b->volume))
|
||||
@@ -596,23 +739,23 @@ if(root0&&root1)
|
||||
{
|
||||
if(p.b->isinternal())
|
||||
{
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b->childs[1]));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b->childs[1]));
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b));
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(p.b->isinternal())
|
||||
{
|
||||
stack.push_back(sStkNN(p.a,p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a,p.b->childs[1]));
|
||||
stack[depth++]=sStkNN(p.a,p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a,p.b->childs[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -620,7 +763,7 @@ if(root0&&root1)
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(stack.size()>0);
|
||||
} while(depth);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -635,34 +778,40 @@ DBVT_CHECKTYPE
|
||||
if(root0&&root1)
|
||||
{
|
||||
btAlignedObjectArray<sStkNN> stack;
|
||||
stack.reserve(DOUBLE_STACKSIZE);
|
||||
stack.push_back(sStkNN(root0,root1));
|
||||
int depth=1;
|
||||
int treshold=DOUBLE_STACKSIZE-4;
|
||||
stack.resize(DOUBLE_STACKSIZE);
|
||||
stack[0]=sStkNN(root0,root1);
|
||||
do {
|
||||
sStkNN p=stack[stack.size()-1];
|
||||
stack.pop_back();
|
||||
sStkNN p=stack[--depth];
|
||||
if(Intersect(p.a->volume,p.b->volume,xform))
|
||||
{
|
||||
if(depth>treshold)
|
||||
{
|
||||
stack.resize(stack.size()*2);
|
||||
treshold=stack.size()-4;
|
||||
}
|
||||
if(p.a->isinternal())
|
||||
{
|
||||
if(p.b->isinternal())
|
||||
{
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b->childs[1]));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b->childs[1]));
|
||||
{
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
stack.push_back(sStkNN(p.a->childs[0],p.b));
|
||||
stack.push_back(sStkNN(p.a->childs[1],p.b));
|
||||
stack[depth++]=sStkNN(p.a->childs[0],p.b);
|
||||
stack[depth++]=sStkNN(p.a->childs[1],p.b);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(p.b->isinternal())
|
||||
{
|
||||
stack.push_back(sStkNN(p.a,p.b->childs[0]));
|
||||
stack.push_back(sStkNN(p.a,p.b->childs[1]));
|
||||
stack[depth++]=sStkNN(p.a,p.b->childs[0]);
|
||||
stack[depth++]=sStkNN(p.a,p.b->childs[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -670,7 +819,7 @@ if(root0&&root1)
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(stack.size()>0);
|
||||
} while(depth);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -945,5 +1094,8 @@ if(root)
|
||||
#undef DBVT_PREFIX
|
||||
#undef DBVT_IPOLICY
|
||||
#undef DBVT_CHECKTYPE
|
||||
#undef DBVT_IMPL_GENERIC
|
||||
#undef DBVT_IMPL_FPU0x86
|
||||
#undef DBVT_IMPL_SSE
|
||||
|
||||
#endif
|
||||
|
||||
@@ -112,7 +112,6 @@ void Process(const btDbvtNode* na,const btDbvtNode* nb)
|
||||
//
|
||||
btDbvtBroadphase::btDbvtBroadphase(btOverlappingPairCache* paircache)
|
||||
{
|
||||
btDbvt::benchmark();
|
||||
m_releasepaircache = (paircache!=0)?false:true;
|
||||
m_predictedframes = 2;
|
||||
m_stageCurrent = 0;
|
||||
|
||||
Reference in New Issue
Block a user