only do gl2cl once

add contact stats option
re-enable a few more demos
This commit is contained in:
erwin coumans
2013-04-19 18:30:05 -07:00
parent b883283460
commit d086090c77
9 changed files with 106 additions and 65 deletions

View File

@@ -97,10 +97,10 @@
-- include "../opencl/vector_add"
include "../opencl/basic_initialize"
include "../demo/gpu_initialize"
-- include "../opencl/parallel_primitives/host"
-- include "../opencl/parallel_primitives/test"
-- include "../opencl/parallel_primitives/benchmark"
-- include "../opencl/lds_bank_conflict"
include "../opencl/parallel_primitives/host"
include "../opencl/parallel_primitives/test"
include "../opencl/parallel_primitives/benchmark"
include "../opencl/lds_bank_conflict"
-- include "../opencl/reduce"
-- include "../opencl/gpu_broadphase/test"
-- include "../opencl/gpu_narrowphase/test"

View File

@@ -67,9 +67,6 @@ GpuDemo::CreateFunc* allDemos[]=
{
// ConcaveCompound2Scene::MyCreateFunc,
ConcaveCompoundScene::MyCreateFunc,
GpuCompoundPlaneScene::MyCreateFunc,
GpuBoxPlaneScene::MyCreateFunc,
@@ -84,10 +81,14 @@ GpuDemo::CreateFunc* allDemos[]=
ConcaveScene::MyCreateFunc,
GpuSphereScene::MyCreateFunc,
ConcaveCompoundScene::MyCreateFunc,
GpuCompoundPlaneScene::MyCreateFunc,
GpuSphereScene::MyCreateFunc,
PairBench::MyCreateFunc,

View File

@@ -549,7 +549,7 @@ void ConcaveCompound2Scene::createDynamicObjects(const ConstructionInfo& ci)
float mass = 1;//j==0? 0.f : 1.f;
//b3Vector3 position(i*10*ci.gapX,j*ci.gapY,k*10*ci.gapZ);
b3Vector3 position(i*10*ci.gapX,50+j*ci.gapY,k*10*ci.gapZ);
b3Vector3 position(i*10*ci.gapX,10+j*ci.gapY,k*10*ci.gapZ);
// b3Quaternion orn(0,0,0,1);
b3Quaternion orn(b3Vector3(0,0,1),1.8);
@@ -665,7 +665,7 @@ b3Vector3 childPositions[3] = {
{
float mass = 1;//j==0? 0.f : 1.f;
b3Vector3 position(i*ci.gapX,150+j*ci.gapY,k*ci.gapZ);
b3Vector3 position(i*ci.gapX,50+j*ci.gapY,k*ci.gapZ);
//b3Quaternion orn(0,0,0,1);
b3Quaternion orn(b3Vector3(1,0,0),0.7);

View File

@@ -158,18 +158,22 @@ void GpuRigidBodyDemo::clientMoveAndDisplay()
if (animate && numObjects)
{
BT_PROFILE("gl2cl");
GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo;
int arraySizeInBytes = numObjects * (3)*sizeof(btVector4);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
cl_bool blocking= CL_TRUE;
positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
GLint err = glGetError();
assert(err==GL_NO_ERROR);
if (!m_data->m_instancePosOrnColor)
{
GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo;
int arraySizeInBytes = numObjects * (3)*sizeof(btVector4);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
cl_bool blocking= CL_TRUE;
positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
GLint err = glGetError();
assert(err==GL_NO_ERROR);
m_data->m_instancePosOrnColor = new btOpenCLArray<btVector4>(m_clData->m_clContext,m_clData->m_clQueue);
m_data->m_instancePosOrnColor->resize(3*numObjects);
m_data->m_instancePosOrnColor->copyFromHostPointer(positions,3*numObjects,0);
glUnmapBuffer( GL_ARRAY_BUFFER);
err = glGetError();
assert(err==GL_NO_ERROR);
}
}
@@ -196,6 +200,13 @@ void GpuRigidBodyDemo::clientMoveAndDisplay()
BT_PROFILE("cl2gl_upload");
GLint err = glGetError();
assert(err==GL_NO_ERROR);
GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo;
int arraySizeInBytes = numObjects * (3)*sizeof(btVector4);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
cl_bool blocking= CL_TRUE;
positions= (btVector4*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),arraySizeInBytes, GL_MAP_WRITE_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
err = glGetError();
assert(err==GL_NO_ERROR);
m_data->m_instancePosOrnColor->copyToHostPointer(positions,3*numObjects,0);
glUnmapBuffer( GL_ARRAY_BUFFER);
err = glGetError();

View File

@@ -591,7 +591,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (gpuBatchContacts)
{
BT_PROFILE("gpu batchContacts");
maxNumBatches = 25;//250;
maxNumBatches = 50;//250;
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
} else
{
@@ -629,10 +629,17 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
int simdWidth =64;//-1;//32;
//int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
maxNumBatches = btMax(numBatches,maxNumBatches);
static int globalMaxBatch = 0;
if (maxNumBatches>globalMaxBatch )
{
globalMaxBatch = maxNumBatches;
printf("maxNumBatches = %d\n",maxNumBatches);
}
clFinish(m_data->m_queue);
@@ -724,7 +731,7 @@ static bool sortfnc(const btSortData& a,const btSortData& b)
b3AlignedObjectArray<int> bodyUsed;
@@ -736,11 +743,7 @@ b3AlignedObjectArray<b3Contact4> old;
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
{
b3AlignedObjectArray<int> bodyUsed;
bodyUsed.resize(numBodies);
for (int q=0;q<numBodies;q++)
bodyUsed[q]=0;
BT_PROFILE("sortConstraintByBatch");
int numIter = 0;
@@ -759,7 +762,8 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
for(int i=0; i<n; i++)
cs[i].getBatchIdx() = -1;
#endif
for(int i=0; i<n; i++) idxSrc[i] = i;
for(int i=0; i<n; i++)
idxSrc[i] = i;
nIdxSrc = n;
int batchIdx = 0;
@@ -803,7 +807,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
if( aUnavailable==0 && bUnavailable==0 ) // ok
{
if (!!aIsStatic)
if (!aIsStatic)
flg[ aIdx/32 ] |= (1<<(aIdx&31));
if (!bIsStatic)
flg[ bIdx/32 ] |= (1<<(bIdx&31));
@@ -861,17 +865,19 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n,
}
b3AlignedObjectArray<int> bodyUsed2;
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
{
BT_PROFILE("sortConstraintByBatch");
BT_PROFILE("sortConstraintByBatch2");
bodyUsed.resize(2*simdWidth);
bodyUsed2.resize(2*simdWidth);
for (int q=0;q<2*simdWidth;q++)
bodyUsed[q]=0;
bodyUsed2[q]=0;
int curBodyUsed = 0;
@@ -905,7 +911,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
int nCurrentBatch = 0;
// clear flag
for(int i=0; i<curBodyUsed; i++)
bodyUsed[i] = 0;
bodyUsed2[i] = 0;
curBodyUsed = 0;
for(int i=numValidConstraints; i<numConstraints; i++)
@@ -925,7 +931,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
{
for (int j=0;j<curBodyUsed;j++)
{
if (bodyA == bodyUsed[j])
if (bodyA == bodyUsed2[j])
{
aUnavailable=1;
break;
@@ -937,7 +943,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
{
for (int j=0;j<curBodyUsed;j++)
{
if (bodyB == bodyUsed[j])
if (bodyB == bodyUsed2[j])
{
bUnavailable=1;
break;
@@ -949,11 +955,11 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
{
if (!aIsStatic)
{
bodyUsed[curBodyUsed++] = bodyA;
bodyUsed2[curBodyUsed++] = bodyA;
}
if (!bIsStatic)
{
bodyUsed[curBodyUsed++] = bodyB;
bodyUsed2[curBodyUsed++] = bodyB;
}
cs[idx].getBatchIdx() = batchIdx;
@@ -972,7 +978,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
{
nCurrentBatch = 0;
for(int i=0; i<curBodyUsed; i++)
bodyUsed[i] = 0;
bodyUsed2[i] = 0;
curBodyUsed = 0;
@@ -1016,26 +1022,34 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int n
}
b3AlignedObjectArray<int> bodyUsed;
b3AlignedObjectArray<int> curUsed;
inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
{
BT_PROFILE("sortConstraintByBatch");
BT_PROFILE("sortConstraintByBatch3");
static int maxSwaps = 0;
int numSwaps = 0;
curUsed.resize(2*simdWidth);
static int maxNumConstraints = 0;
if (maxNumConstraints<numConstraints)
{
maxNumConstraints = numConstraints;
printf("maxNumConstraints = %d\n",maxNumConstraints );
//printf("maxNumConstraints = %d\n",maxNumConstraints );
}
bodyUsed.resize(2*simdWidth);
int numUsedArray = numBodies/32+1;
bodyUsed.resize(numUsedArray);
for (int q=0;q<2*simdWidth;q++)
for (int q=0;q<numUsedArray;q++)
bodyUsed[q]=0;
int curBodyUsed = 0;
int numIter = 0;
@@ -1065,7 +1079,8 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
int nCurrentBatch = 0;
// clear flag
for(int i=0; i<curBodyUsed; i++)
bodyUsed[i] = 0;
bodyUsed[curUsed[i]/32] = 0;
curBodyUsed = 0;
for(int i=numValidConstraints; i<numConstraints; i++)
@@ -1083,37 +1098,25 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
int bUnavailable = 0;
if (!aIsStatic)
{
for (int j=0;j<curBodyUsed;j++)
{
if (bodyA == bodyUsed[j])
{
aUnavailable=1;
break;
}
}
aUnavailable = bodyUsed[ bodyA/32 ] & (1<<(bodyA&31));
}
if (!aUnavailable)
if (!bIsStatic)
{
for (int j=0;j<curBodyUsed;j++)
{
if (bodyB == bodyUsed[j])
{
bUnavailable=1;
break;
}
}
bUnavailable = bodyUsed[ bodyB/32 ] & (1<<(bodyB&31));
}
if( aUnavailable==0 && bUnavailable==0 ) // ok
{
if (!aIsStatic)
{
bodyUsed[curBodyUsed++] = bodyA;
bodyUsed[ bodyA/32 ] |= (1<<(bodyA&31));
curUsed[curBodyUsed++]=bodyA;
}
if (!bIsStatic)
{
bodyUsed[curBodyUsed++] = bodyB;
bodyUsed[ bodyB/32 ] |= (1<<(bodyB&31));
curUsed[curBodyUsed++]=bodyB;
}
cs[idx].getBatchIdx() = batchIdx;
@@ -1131,7 +1134,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
{
nCurrentBatch = 0;
for(int i=0; i<curBodyUsed; i++)
bodyUsed[i] = 0;
bodyUsed[curUsed[i]/32] = 0;
curBodyUsed = 0;
}
}
@@ -1152,7 +1155,7 @@ inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int n
if (maxSwaps<numSwaps)
{
maxSwaps = numSwaps;
printf("maxSwaps = %d\n", maxSwaps);
//printf("maxSwaps = %d\n", maxSwaps);
}
return batchIdx;

View File

@@ -698,6 +698,11 @@ cl_mem b3GpuNarrowPhase::getContactsGpu()
return m_data->m_pBufContactOutGPU->getBufferCL();
}
const b3Contact4* b3GpuNarrowPhase::getContactsCPU() const
{
m_data->m_pBufContactOutGPU->copyToHost(*m_data->m_pBufContactOutCPU);
return &m_data->m_pBufContactOutCPU->at(0);
}
void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
{

View File

@@ -64,6 +64,9 @@ public:
cl_mem getCollidablesGpu();
int getNumCollidablesGpu() const;
const struct b3Contact4* getContactsCPU() const;
cl_mem getContactsGpu();
int getNumContactsGpu() const;

View File

@@ -23,6 +23,8 @@
#include "Bullet3Common/b3Quickprof.h"
#include "b3Config.h"
bool dumpContactStats = false;
b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap )
{
m_data = new b3GpuRigidBodyPipelineInternalData;
@@ -95,6 +97,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
int numPairs = m_data->m_broadphaseSap->getNumOverlap();
int numContacts = 0;
int numBodies = m_data->m_narrowphase->getNumBodiesGpu();
if (numPairs)
@@ -105,8 +108,23 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies);
numContacts = m_data->m_narrowphase->getNumContactsGpu();
//if (numContacts)
// printf("numContacts = %d\n", numContacts);
if (dumpContactStats && numContacts)
{
m_data->m_narrowphase->getContactsGpu();
printf("numContacts = %d\n", numContacts);
int totalPoints = 0;
const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU();
for (int i=0;i<numContacts;i++)
{
totalPoints += contacts->getNPoints();
}
printf("totalPoints=%d\n",totalPoints);
}
}

View File

@@ -641,7 +641,7 @@ extern bool gDebugSkipLoadingBinary;
int main( int argc, char** argv)
{
gDebugSkipLoadingBinary = true;
//gDebugSkipLoadingBinary = true;
cl_int ciErrNum;
b3CommandLineArgs args(argc,argv);