split the unitsphere search in a separate kernel, m_findSeparatingAxisUnitSphereKernel, hopefully it works on Mac OSX OpenCL now.

This commit is contained in:
Erwin Coumans
2014-01-16 14:19:43 -08:00
parent b53cc5edb8
commit f14ec7b870
6 changed files with 126 additions and 62 deletions

View File

@@ -148,6 +148,10 @@ m_unitSphereDirections(m_context,m_queue)
b3Assert(m_mprPenetrationKernel); b3Assert(m_mprPenetrationKernel);
b3Assert(errNum==CL_SUCCESS); b3Assert(errNum==CL_SUCCESS);
m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg );
b3Assert(m_findSeparatingAxisUnitSphereKernel);
b3Assert(errNum==CL_SUCCESS);
int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3);
m_unitSphereDirections.resize(numDirections); m_unitSphereDirections.resize(numDirections);
@@ -285,6 +289,8 @@ GpuSatCollision::~GpuSatCollision()
if (m_findSeparatingAxisEdgeEdgeKernel) if (m_findSeparatingAxisEdgeEdgeKernel)
clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel);
if (m_findSeparatingAxisUnitSphereKernel)
clReleaseKernel(m_findSeparatingAxisUnitSphereKernel);
if (m_mprPenetrationKernel) if (m_mprPenetrationKernel)
clReleaseKernel(m_mprPenetrationKernel); clReleaseKernel(m_mprPenetrationKernel);
@@ -3200,34 +3206,59 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3);
{ {
B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); B3_PROFILE("findSeparatingAxisEdgeEdgeKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( pairs->getBufferCL(), true ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true),
b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true),
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true),
b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true),
b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true),
b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_sepNormals.getBufferCL()),
b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( m_dmins.getBufferCL()), b3BufferInfoCL( m_dmins.getBufferCL()),
b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true)
}; };
b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numDirections); launcher.setConst( numDirections);
launcher.setConst( nPairs ); launcher.setConst( nPairs );
int num = nPairs;
launcher.launch1D( num);
clFinish(m_queue);
int num = nPairs; }
launcher.launch1D( num);
clFinish(m_queue);
} {
B3_PROFILE("findSeparatingAxisUnitSphereKernel");
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( pairs->getBufferCL(), true ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
b3BufferInfoCL( convexData.getBufferCL(),true),
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true),
b3BufferInfoCL( m_sepNormals.getBufferCL()),
b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( m_dmins.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3);
launcher.setConst( numDirections);
launcher.setConst( nPairs );
int num = nPairs;
launcher.launch1D( num);
clFinish(m_queue);
}
} }

View File

@@ -27,6 +27,7 @@ struct GpuSatCollision
cl_command_queue m_queue; cl_command_queue m_queue;
cl_kernel m_findSeparatingAxisKernel; cl_kernel m_findSeparatingAxisKernel;
cl_kernel m_mprPenetrationKernel; cl_kernel m_mprPenetrationKernel;
cl_kernel m_findSeparatingAxisUnitSphereKernel;
cl_kernel m_findSeparatingAxisVertexFaceKernel; cl_kernel m_findSeparatingAxisVertexFaceKernel;

View File

@@ -290,17 +290,21 @@ __kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs,
const float4 DeltaC2 = c0 - c1; const float4 DeltaC2 = c0 - c1;
float4 sepNormal = separatingNormals[i]; float4 sepNormal = separatingNormals[i];
bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;
posB,ornB, if (numEdgeEdgeDirections>numUnitSphereDirections)
DeltaC2,
vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);
if (!sepEE)
{ {
hasSeparatingAxis[i] = 0; bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
} else posB,ornB,
{ DeltaC2,
hasSeparatingAxis[i] = 1; vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);
separatingNormals[i] = sepNormal; if (!sepEE)
{
hasSeparatingAxis[i] = 0;
} else
{
hasSeparatingAxis[i] = 1;
separatingNormals[i] = sepNormal;
}
} }
} //if (hasSeparatingAxis[i]) } //if (hasSeparatingAxis[i])
}//(i<numPairs) }//(i<numPairs)

View File

@@ -1418,17 +1418,21 @@ static const char* mprKernelsCL= \
" const float4 DeltaC2 = c0 - c1;\n" " const float4 DeltaC2 = c0 - c1;\n"
" float4 sepNormal = separatingNormals[i];\n" " float4 sepNormal = separatingNormals[i];\n"
" \n" " \n"
" bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" " int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n"
" posB,ornB,\n" " if (numEdgeEdgeDirections>numUnitSphereDirections)\n"
" DeltaC2,\n"
" vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n"
" if (!sepEE)\n"
" {\n" " {\n"
" hasSeparatingAxis[i] = 0;\n" " bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n"
" } else\n" " posB,ornB,\n"
" {\n" " DeltaC2,\n"
" hasSeparatingAxis[i] = 1;\n" " vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n"
" separatingNormals[i] = sepNormal;\n" " if (!sepEE)\n"
" {\n"
" hasSeparatingAxis[i] = 0;\n"
" } else\n"
" {\n"
" hasSeparatingAxis[i] = 1;\n"
" separatingNormals[i] = sepNormal;\n"
" }\n"
" }\n" " }\n"
" } //if (hasSeparatingAxis[i])\n" " } //if (hasSeparatingAxis[i])\n"
" }//(i<numPairs)\n" " }//(i<numPairs)\n"

View File

@@ -1566,14 +1566,25 @@ __kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs,
bool sepEE = false; bool sepEE = false;
int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;
if (numEdgeEdgeDirections<numUnitSphereDirections) if (numEdgeEdgeDirections<=numUnitSphereDirections)
{ {
sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
posB,ornB, posB,ornB,
DeltaC2, DeltaC2,
vertices,uniqueEdges,faces, vertices,uniqueEdges,faces,
indices,&sepNormal,&dmin); indices,&sepNormal,&dmin);
if (!sepEE)
{
hasSeparatingAxis[i] = 0;
} else
{
hasSeparatingAxis[i] = 1;
separatingNormals[i] = sepNormal;
}
} }
/*
///else case is a separate kernel, to make Mac OSX OpenCL compiler happy
else else
{ {
sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
@@ -1581,15 +1592,16 @@ __kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs,
DeltaC2, DeltaC2,
vertices,unitSphereDirections,numUnitSphereDirections, vertices,unitSphereDirections,numUnitSphereDirections,
&sepNormal,&dmin); &sepNormal,&dmin);
if (!sepEE)
{
hasSeparatingAxis[i] = 0;
} else
{
hasSeparatingAxis[i] = 1;
separatingNormals[i] = sepNormal;
}
} }
if (!sepEE) */
{
hasSeparatingAxis[i] = 0;
} else
{
hasSeparatingAxis[i] = 1;
separatingNormals[i] = sepNormal;
}
} //if (hasSeparatingAxis[i]) } //if (hasSeparatingAxis[i])
}//(i<numPairs) }//(i<numPairs)
} }

View File

@@ -1700,14 +1700,25 @@ static const char* satKernelsCL= \
" \n" " \n"
" bool sepEE = false;\n" " bool sepEE = false;\n"
" int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" " int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n"
" if (numEdgeEdgeDirections<numUnitSphereDirections)\n" " if (numEdgeEdgeDirections<=numUnitSphereDirections)\n"
" {\n" " {\n"
" sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" " sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n"
" posB,ornB,\n" " posB,ornB,\n"
" DeltaC2,\n" " DeltaC2,\n"
" vertices,uniqueEdges,faces,\n" " vertices,uniqueEdges,faces,\n"
" indices,&sepNormal,&dmin);\n" " indices,&sepNormal,&dmin);\n"
" \n"
" if (!sepEE)\n"
" {\n"
" hasSeparatingAxis[i] = 0;\n"
" } else\n"
" {\n"
" hasSeparatingAxis[i] = 1;\n"
" separatingNormals[i] = sepNormal;\n"
" }\n"
" }\n" " }\n"
" /*\n"
" ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n"
" else\n" " else\n"
" {\n" " {\n"
" sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" " sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n"
@@ -1715,15 +1726,16 @@ static const char* satKernelsCL= \
" DeltaC2,\n" " DeltaC2,\n"
" vertices,unitSphereDirections,numUnitSphereDirections,\n" " vertices,unitSphereDirections,numUnitSphereDirections,\n"
" &sepNormal,&dmin);\n" " &sepNormal,&dmin);\n"
" if (!sepEE)\n"
" {\n"
" hasSeparatingAxis[i] = 0;\n"
" } else\n"
" {\n"
" hasSeparatingAxis[i] = 1;\n"
" separatingNormals[i] = sepNormal;\n"
" }\n"
" }\n" " }\n"
" if (!sepEE)\n" " */\n"
" {\n"
" hasSeparatingAxis[i] = 0;\n"
" } else\n"
" {\n"
" hasSeparatingAxis[i] = 1;\n"
" separatingNormals[i] = sepNormal;\n"
" }\n"
" } //if (hasSeparatingAxis[i])\n" " } //if (hasSeparatingAxis[i])\n"
" }//(i<numPairs)\n" " }//(i<numPairs)\n"
"}\n" "}\n"