Synchronize changes from branches/GpuClothAMD to trunk

Main improvements are: GPU cloth collision detection against a capsule shape
,OpenCL-OpenGL interoperability (keeping data buffers on GPU), and bug fixes
Thanks to Lee Howes
This commit is contained in:
erwin.coumans
2011-02-27 09:07:07 +00:00
parent ec1bd45f4f
commit d52f58edd8
37 changed files with 3267 additions and 2481 deletions

View File

@@ -11,15 +11,21 @@ ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../btSoftBodySolver_OpenCLSIMDAware.h
../../CPU/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.

View File

@@ -5,8 +5,11 @@ INCLUDE_DIRECTORIES(
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
../btSoftBodySolver_OpenCLSIMDAware.cpp
../btSoftBodySolverOutputCLtoGL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
@@ -15,7 +18,10 @@ SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverLinkData_OpenCLSIMDAware.h
../btSoftBodySolverBuffer_OpenCL.h
../btSoftBodySolverVertexBuffer_OpenGL.h
../btSoftBodySolverOutputCLtoGL.h
)
# OpenCL and HLSL Shaders.

View File

@@ -65,6 +65,9 @@ public:
cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
// At a minimum the buffer must exist
if( size == 0 )
size = sizeof(ElementType);
m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
if( err != CL_SUCCESS )
{
@@ -81,6 +84,7 @@ public:
btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
:m_cqCommandQue(commandQue),
m_clContext(ctx),
m_buffer(0),
m_CPUBuffer(CPUBuffer),
m_gpuSize(0),
m_onGPU(false),
@@ -91,6 +95,7 @@ public:
~btOpenCLBuffer()
{
clReleaseMemObject(m_buffer);
}
@@ -105,6 +110,16 @@ public:
m_onGPU = false;
}
if( !m_allocated && m_CPUBuffer->size() == 0 )
{
// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
// We should create *something* on the device side
if (!createBuffer()) {
return false;
}
m_allocated = true;
}
if( !m_onGPU && m_CPUBuffer->size() > 0 )
{
if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {

View File

@@ -20,10 +20,26 @@ subject to the following restrictions:
#include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "BulletSoftBody/btSoftBody.h"
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
#include "LinearMath/btQuickprof.h"
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif //__APPLE__
#endif//USE_MINICL
#define BT_DEFAULT_WORKGROUPSIZE 128
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
#if (0)//CL_VERSION_1_1 == 1)
@@ -49,6 +65,10 @@ static char* UpdateNormalsCLString =
#include "OpenCLC/UpdateNormals.cl"
static char* VSolveLinksCLString =
#include "OpenCLC/VSolveLinks.cl"
static char* ComputeBoundsCLString =
#include "OpenCLC/ComputeBounds.cl"
static char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC/SolveCollisionsAndUpdateVelocities.cl"
#else
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
@@ -72,6 +92,10 @@ static char* UpdateNormalsCLString =
#include "OpenCLC10/UpdateNormals.cl"
static char* VSolveLinksCLString =
#include "OpenCLC10/VSolveLinks.cl"
static char* ComputeBoundsCLString =
#include "OpenCLC10/ComputeBounds.cl"
static char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
#endif //CL_VERSION_1_1
@@ -583,6 +607,7 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
m_linkData(queue, ctx),
m_vertexData(queue, ctx),
m_triangleData(queue, ctx),
clFunctions(queue, ctx),
m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
@@ -590,6 +615,11 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ),
m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ),
m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ),
m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ),
m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ),
m_clPerClothMinBounds( queue, ctx, &m_perClothMinBounds, false ),
m_clPerClothMaxBounds( queue, ctx, &m_perClothMaxBounds, false ),
m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ),
m_cqCommandQue( queue ),
m_cxMainContext(ctx),
m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE)
@@ -600,15 +630,85 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
m_updateSolverConstants = true;
m_shadersInitialized = false;
prepareLinksKernel = 0;
solvePositionsFromLinksKernel = 0;
updateConstantsKernel = 0;
integrateKernel = 0;
addVelocityKernel = 0;
updatePositionsFromVelocitiesKernel = 0;
updateVelocitiesFromPositionsWithoutVelocitiesKernel = 0;
updateVelocitiesFromPositionsWithVelocitiesKernel = 0;
vSolveLinksKernel = 0;
solveCollisionsAndUpdateVelocitiesKernel = 0;
resetNormalsAndAreasKernel = 0;
resetNormalsAndAreasKernel = 0;
normalizeNormalsAndAreasKernel = 0;
computeBoundsKernel = 0;
outputToVertexArrayKernel = 0;
applyForcesKernel = 0;
}
btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver()
{
releaseKernels();
}
void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies )
void btOpenCLSoftBodySolver::releaseKernels()
{
if( m_softBodySet.size() != softBodies.size() )
RELEASE_CL_KERNEL( prepareLinksKernel );
RELEASE_CL_KERNEL( solvePositionsFromLinksKernel );
RELEASE_CL_KERNEL( updateConstantsKernel );
RELEASE_CL_KERNEL( integrateKernel );
RELEASE_CL_KERNEL( addVelocityKernel );
RELEASE_CL_KERNEL( updatePositionsFromVelocitiesKernel );
RELEASE_CL_KERNEL( updateVelocitiesFromPositionsWithoutVelocitiesKernel );
RELEASE_CL_KERNEL( updateVelocitiesFromPositionsWithVelocitiesKernel );
RELEASE_CL_KERNEL( vSolveLinksKernel );
RELEASE_CL_KERNEL( solveCollisionsAndUpdateVelocitiesKernel );
RELEASE_CL_KERNEL( resetNormalsAndAreasKernel );
RELEASE_CL_KERNEL( normalizeNormalsAndAreasKernel );
RELEASE_CL_KERNEL( computeBoundsKernel );
RELEASE_CL_KERNEL( outputToVertexArrayKernel );
RELEASE_CL_KERNEL( applyForcesKernel );
m_shadersInitialized = false;
}
void btOpenCLSoftBodySolver::copyBackToSoftBodies()
{
// Move the vertex data back to the host first
m_vertexData.moveFromAccelerator();
// Loop over soft bodies, copying all the vertex positions back for each body in turn
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ];
btSoftBody *softBody = softBodyInterface->getSoftBody();
int firstVertex = softBodyInterface->getFirstVertex();
int numVertices = softBodyInterface->getNumVertices();
// Copy vertices from solver back into the softbody
for( int vertex = 0; vertex < numVertices; ++vertex )
{
using Vectormath::Aos::Point3;
Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] );
softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() );
softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() );
softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() );
}
}
} // btOpenCLSoftBodySolver::copyBackToSoftBodies
void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate )
{
if( forceUpdate || m_softBodySet.size() != softBodies.size() )
{
// Have a change in the soft body set so update, reloading all the data
getVertexData().clear();
@@ -633,6 +733,11 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
m_perClothMinBounds.push_back( UIntVector3(UINT_MAX, UINT_MAX, UINT_MAX) );
m_perClothMaxBounds.push_back( UIntVector3(0, 0, 0) );
m_perClothFriction.push_back( softBody->getFriction() );
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
// Add space for new vertices and triangles in the default solver for now
// TODO: Include space here for tearing too later
@@ -738,12 +843,6 @@ btSoftBodyTriangleData &btOpenCLSoftBodySolver::getTriangleData()
return m_triangleData;
}
bool btOpenCLSoftBodySolver::checkInitialized()
{
return buildShaders();
}
void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
{
cl_int ciErrNum;
@@ -751,11 +850,15 @@ void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS);
ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
size_t numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0 );
if( ciErrNum != CL_SUCCESS )
if (numWorkItems)
{
btAssert( 0 && "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" );
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0 );
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" );
}
}
}
@@ -770,10 +873,13 @@ void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
if (numWorkItems)
{
btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)");
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)");
}
}
}
@@ -875,10 +981,13 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
ciErrNum = clSetKernelArg(applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,applyForcesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
if (numWorkItems)
{
btAssert( 0 && "enqueueNDRangeKernel(applyForcesKernel)");
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,applyForcesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(applyForcesKernel)");
}
}
}
@@ -904,10 +1013,13 @@ void btOpenCLSoftBodySolver::integrate( float solverdt )
ciErrNum = clSetKernelArg(integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,integrateKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
if (numWorkItems)
{
btAssert( 0 && "enqueueNDRangeKernel(integrateKernel)");
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,integrateKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(integrateKernel)");
}
}
}
@@ -924,6 +1036,102 @@ float btOpenCLSoftBodySolver::computeTriangleArea(
return area;
}
void btOpenCLSoftBodySolver::updateBounds()
{
//#define USE_GPU_BOUNDS_COMPUTATION
#ifdef USE_GPU_BOUNDS_COMPUTATION
using Vectormath::Aos::Point3;
// Interpretation structure for float and int
struct FPRep {
unsigned int mantissa : 23;
unsigned int exponent : 8;
unsigned int sign : 1;
};
union FloatAsInt
{
float floatValue;
int intValue;
unsigned int uintValue;
FPRep fpRep;
};
// Update bounds array to min and max int values to allow easy atomics
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
m_perClothMinBounds[softBodyIndex] = UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX );
m_perClothMaxBounds[softBodyIndex] = UIntVector3( 0, 0, 0 );
}
m_vertexData.moveToAccelerator();
m_clPerClothMinBounds.moveToGPU();
m_clPerClothMaxBounds.moveToGPU();
computeBounds( );
m_clPerClothMinBounds.moveFromGPU();
m_clPerClothMaxBounds.moveFromGPU();
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
UIntVector3 minBoundUInt = m_perClothMinBounds[softBodyIndex];
UIntVector3 maxBoundUInt = m_perClothMaxBounds[softBodyIndex];
/*UIntVector3 minBoundUInt;
minBoundUInt.x = m_perClothMinBounds[softBodyIndex*4];
minBoundUInt.y = m_perClothMinBounds[softBodyIndex*4+1];
minBoundUInt.z = m_perClothMinBounds[softBodyIndex*4+2];
UIntVector3 maxBoundUInt;
maxBoundUInt.x = m_perClothMaxBounds[softBodyIndex*4];
maxBoundUInt.y = m_perClothMaxBounds[softBodyIndex*4+1];
maxBoundUInt.z = m_perClothMaxBounds[softBodyIndex*4+2];*/
// Convert back to float
FloatAsInt fai;
btVector3 minBound;
fai.uintValue = minBoundUInt.x;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
minBound.setX( fai.floatValue );
fai.uintValue = minBoundUInt.y;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
minBound.setY( fai.floatValue );
fai.uintValue = minBoundUInt.z;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
minBound.setZ( fai.floatValue );
btVector3 maxBound;
fai.uintValue = maxBoundUInt.x;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
maxBound.setX( fai.floatValue );
fai.uintValue = maxBoundUInt.y;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
maxBound.setY( fai.floatValue );
fai.uintValue = maxBoundUInt.z;
fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
maxBound.setZ( fai.floatValue );
// And finally assign to the soft body
m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
}
#else
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btVector3 minBound(-1e30,-1e30,-1e30), maxBound(1e30,1e30,1e30);
m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
}
#endif//USE_GPU_BOUNDS_COMPUTATION
} // btOpenCLSoftBodySolver::updateBounds
void btOpenCLSoftBodySolver::updateConstants( float timeStep )
{
@@ -954,6 +1162,66 @@ void btOpenCLSoftBodySolver::updateConstants( float timeStep )
}
class QuickSortCompare
{
public:
bool operator() ( const CollisionShapeDescription& a, const CollisionShapeDescription& b )
{
return ( a.softBodyIdentifier < b.softBodyIdentifier );
}
};
/**
* Sort the collision object details array and generate indexing into it for the per-cloth collision object array.
*/
void btOpenCLSoftBodySolver::prepareCollisionConstraints()
{
// First do a simple sort on the collision objects
btAlignedObjectArray<int> numObjectsPerClothPrefixSum;
btAlignedObjectArray<int> numObjectsPerCloth;
numObjectsPerCloth.resize( m_softBodySet.size(), 0 );
numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 );
m_collisionObjectDetails.quickSort( QuickSortCompare() );
if (!m_perClothCollisionObjects.size())
return;
// Generating indexing for perClothCollisionObjects
// First clear the previous values with the "no collision object for cloth" constant
for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex )
{
m_perClothCollisionObjects[clothIndex].firstObject = -1;
m_perClothCollisionObjects[clothIndex].endObject = -1;
}
int currentCloth = 0;
int startIndex = 0;
for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject )
{
int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier;
if( nextCloth != currentCloth )
{
// Changed cloth in the array
// Set the end index and the range is what we need for currentCloth
m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
m_perClothCollisionObjects[currentCloth].endObject = collisionObject;
currentCloth = nextCloth;
startIndex = collisionObject;
}
}
// And update last cloth
m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
m_perClothCollisionObjects[currentCloth].endObject = m_collisionObjectDetails.size();
} // btOpenCLSoftBodySolver::prepareCollisionConstraints
void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
{
@@ -993,6 +1261,9 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
}
}
prepareCollisionConstraints();
// Compute new positions from velocity
// Also update the previous position so that our position computation is now based on the new position from the velocity solution
// rather than based directly on the original positions
@@ -1016,8 +1287,9 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
// At this point assume that the force array is blank - we will overwrite it
solveCollisionsAndUpdateVelocities( 1.f/solverdt );
}
@@ -1158,19 +1430,88 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( flo
} // updateVelocitiesFromPositionsWithoutVelocities
void btOpenCLSoftBodySolver::computeBounds( )
{
m_vertexData.moveToAccelerator();
cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices();
int numSoftBodies = m_softBodySet.size();
ciErrNum = clSetKernelArg(computeBoundsKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(computeBoundsKernel, 1, sizeof(int), &numSoftBodies);
ciErrNum = clSetKernelArg(computeBoundsKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(computeBoundsKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(computeBoundsKernel, 4, sizeof(cl_mem),&m_clPerClothMinBounds.m_buffer);
ciErrNum = clSetKernelArg(computeBoundsKernel, 5, sizeof(cl_mem),&m_clPerClothMaxBounds.m_buffer);
ciErrNum = clSetKernelArg(computeBoundsKernel, 6, sizeof(cl_uint4)*256,0);
ciErrNum = clSetKernelArg(computeBoundsKernel, 7, sizeof(cl_uint4)*256,0);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
if (numWorkItems)
{
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,computeBoundsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(computeBoundsKernel)");
}
}
clFinish(m_cqCommandQue);
} // btOpenCLSoftBodySolver::computeBounds
void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
{
// Copy kernel parameters to GPU
m_vertexData.moveToAccelerator();
m_clPerClothFriction.moveToGPU();
m_clPerClothDampingFactor.moveToGPU();
m_clPerClothCollisionObjects.moveToGPU();
m_clCollisionObjectDetails.moveToGPU();
cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
if (numWorkItems)
{
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
}
}
} // btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities
// End kernel dispatches
/////////////////////////////////////
void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
{
// Currently only support CPU output buffers
// TODO: check for DX11 buffers. Take all offsets into the same DX11 buffer
// and use them together on a single kernel call if possible by setting up a
// per-cloth target buffer array for the copy kernel.
btSoftBodySolver *solver = softBody->getSoftBodySolver();
btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
btOpenCLAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody );
btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
@@ -1180,8 +1521,8 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);
float *basePointer = cpuVertexBuffer->getBasePointer();
m_vertexData.m_clVertexPosition.copyFromGPU();
m_vertexData.m_clVertexNormal.copyFromGPU();
vertexData.m_clVertexPosition.copyFromGPU();
vertexData.m_clVertexNormal.copyFromGPU();
if( vertexBuffer->hasVertexPositions() )
{
@@ -1191,7 +1532,7 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
{
Vectormath::Aos::Point3 position = m_vertexData.getPosition(vertexIndex);
Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex);
*(vertexPointer + 0) = position.getX();
*(vertexPointer + 1) = position.getY();
*(vertexPointer + 2) = position.getZ();
@@ -1206,7 +1547,7 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
{
Vectormath::Aos::Vector3 normal = m_vertexData.getNormal(vertexIndex);
Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex);
*(normalPointer + 0) = normal.getX();
*(normalPointer + 1) = normal.getY();
*(normalPointer + 2) = normal.getZ();
@@ -1215,10 +1556,11 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
}
}
} // btCPUSoftBodySolver::outputToVertexBuffers
} // btSoftBodySolverOutputCLtoCPU::outputToVertexBuffers
cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelSource, const char* kernelName )
cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros )
{
printf("compiling kernelName: %s ",kernelName);
cl_kernel kernel;
@@ -1229,19 +1571,45 @@ cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelS
// oclCHECKERROR(ciErrNum, CL_SUCCESS);
// Build the program with 'mad' Optimization option
#ifdef MAC
char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
#else
const char* flags = "-DGUID_ARG=";
//const char* flags = "-DGUID_ARG= -fno-alias";
const char* flags = "-DGUID_ARG= ";
#endif
ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, flags, NULL, NULL);
char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5];
sprintf(compileFlags, "%s %s", flags, additionalMacros);
ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, compileFlags, NULL, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
size_t numDevices;
clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, 0, 0, &numDevices );
cl_device_id *devices = new cl_device_id[numDevices];
clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, numDevices, devices, &numDevices );
for( int i = 0; i < 2; ++i )
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
// to be carefully, terminate with \0
// there's no information in the reference whether the string is 0 terminated or not
build_log[ret_val_size] = '\0';
printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
delete[] build_log;
}
btAssert(0);
exit(0);
}
// Create the kernel
kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
@@ -1252,37 +1620,123 @@ cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelS
}
printf("ready. \n");
delete [] compileFlags;
return kernel;
}
void btOpenCLSoftBodySolver::predictMotion( float timeStep )
{
// Fill the force arrays with current acceleration data etc
m_perClothWindVelocity.resize( m_softBodySet.size() );
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
// Clear the collision shape array for the next frame
// Ensure that the DX11 ones are moved off the device so they will be updated correctly
m_clCollisionObjectDetails.changedOnCPU();
m_clPerClothCollisionObjects.changedOnCPU();
m_collisionObjectDetails.clear();
{
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
BT_PROFILE("perClothWindVelocity");
// Fill the force arrays with current acceleration data etc
m_perClothWindVelocity.resize( m_softBodySet.size() );
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
}
}
{
BT_PROFILE("changedOnCPU");
m_clPerClothWindVelocity.changedOnCPU();
}
m_clPerClothWindVelocity.changedOnCPU();
// Apply forces that we know about to the cloths
applyForces( timeStep * getTimeScale() );
{
BT_PROFILE("applyForces");
// Apply forces that we know about to the cloths
applyForces( timeStep * getTimeScale() );
}
// Itegrate motion for all soft bodies dealt with by the solver
integrate( timeStep * getTimeScale() );
{
BT_PROFILE("integrate");
// Itegrate motion for all soft bodies dealt with by the solver
integrate( timeStep * getTimeScale() );
}
{
BT_PROFILE("updateBounds");
updateBounds();
}
// End prediction work for solvers
}
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
{
Vectormath::Aos::Transform3 outTransform;
outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
outTransform.setCol(3, toVector3(transform.getOrigin()));
return outTransform;
}
void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
{
float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin();
btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
m_softBody->m_bounds[0] = lowerBound - vectorMargin;
m_softBody->m_bounds[1] = upperBound + vectorMargin;
} // btOpenCLSoftBodySolver::btDX11AcceleratedSoftBodyInterface::updateBounds
void btOpenCLSoftBodySolver::processCollision( btSoftBody*, btSoftBody* )
{
}
// Add the collision object to the set to deal with for a particular soft body
void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollisionObject* collisionObject )
{
int softBodyIndex = findSoftBodyIndex( softBody );
if( softBodyIndex >= 0 )
{
btCollisionShape *collisionShape = collisionObject->getCollisionShape();
float friction = collisionObject->getFriction();
int shapeType = collisionShape->getShapeType();
if( shapeType == CAPSULE_SHAPE_PROXYTYPE )
{
// Add to the list of expected collision objects
CollisionShapeDescription newCollisionShapeDescription;
newCollisionShapeDescription.softBodyIdentifier = softBodyIndex;
newCollisionShapeDescription.collisionShapeType = shapeType;
// TODO: May need to transpose this matrix either here or in HLSL
newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform());
btCapsuleShape *capsule = static_cast<btCapsuleShape*>( collisionShape );
newCollisionShapeDescription.radius = capsule->getRadius();
newCollisionShapeDescription.halfHeight = capsule->getHalfHeight();
newCollisionShapeDescription.margin = capsule->getMargin();
newCollisionShapeDescription.upAxis = capsule->getUpAxis();
newCollisionShapeDescription.friction = friction;
btRigidBody* body = static_cast< btRigidBody* >( collisionObject );
newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity());
newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
m_collisionObjectDetails.push_back( newCollisionShapeDescription );
} else {
btAssert("Unsupported collision shape type\n");
}
} else {
btAssert("Unknown soft body");
}
} // btOpenCLSoftBodySolver::processCollision
btOpenCLAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
btOpenCLAcceleratedSoftBodyInterface* btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
{
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
if( softBodyInterface->getSoftBody() == softBody )
return softBodyInterface;
}
@@ -1290,27 +1744,50 @@ btOpenCLAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterf
}
int btOpenCLSoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody )
{
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
if( softBodyInterface->getSoftBody() == softBody )
return softBodyIndex;
}
return 1;
}
bool btOpenCLSoftBodySolver::checkInitialized()
{
if( !m_shadersInitialized )
if( buildShaders() )
m_shadersInitialized = true;
return m_shadersInitialized;
}
bool btOpenCLSoftBodySolver::buildShaders()
{
// Ensure current kernels are released first
releaseKernels();
bool returnVal = true;
if( m_shadersInitialized )
return true;
prepareLinksKernel = compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel" );
updatePositionsFromVelocitiesKernel = compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" );
solvePositionsFromLinksKernel = compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" );
updateVelocitiesFromPositionsWithVelocitiesKernel = compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" );
updateVelocitiesFromPositionsWithoutVelocitiesKernel = compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" );
integrateKernel = compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" );
applyForcesKernel = compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" );
prepareLinksKernel = clFunctions.compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel" );
updatePositionsFromVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" );
solvePositionsFromLinksKernel = clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" );
updateVelocitiesFromPositionsWithVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" );
updateVelocitiesFromPositionsWithoutVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" );
computeBoundsKernel = clFunctions.compileCLKernelFromString( ComputeBoundsCLString, "ComputeBoundsKernel" );
solveCollisionsAndUpdateVelocitiesKernel = clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" );
integrateKernel = clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" );
applyForcesKernel = clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" );
// TODO: Rename to UpdateSoftBodies
resetNormalsAndAreasKernel = compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" );
normalizeNormalsAndAreasKernel = compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" );
updateSoftBodiesKernel = compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" );
//outputToVertexArrayWithNormalsKernel = compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" );
//outputToVertexArrayWithoutNormalsKernel = compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" );
resetNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" );
normalizeNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" );
updateSoftBodiesKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" );
if( returnVal )

View File

@@ -25,12 +25,60 @@ subject to the following restrictions:
#include "btSoftBodySolverVertexData_OpenCL.h"
#include "btSoftBodySolverTriangleData_OpenCL.h"
class CLFunctions
{
protected:
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
public:
CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
m_cqCommandQue( cqCommandQue ),
m_cxMainContext( cxMainContext )
{
}
/**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/
cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros = "" );
};
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
* Entry in the collision shape array.
* Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
*/
struct CollisionShapeDescription
{
Vectormath::Aos::Transform3 shapeTransform;
Vectormath::Aos::Vector3 linearVelocity;
Vectormath::Aos::Vector3 angularVelocity;
int softBodyIdentifier;
int collisionShapeType;
// Both needed for capsule
float radius;
float halfHeight;
int upAxis;
float margin;
float friction;
CollisionShapeDescription()
{
collisionShapeType = 0;
margin = 0;
friction = 0;
}
};
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btOpenCLAcceleratedSoftBodyInterface
{
protected:
@@ -100,6 +148,11 @@ public:
{
return m_firstTriangle;
}
/**
* Update the bounds in the btSoftBody object
*/
void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
@@ -108,7 +161,7 @@ public:
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
@@ -172,20 +225,61 @@ public:
};
class btOpenCLSoftBodySolver : public btSoftBodySolver
{
private:
public:
struct UIntVector3
{
UIntVector3()
{
x = 0;
y = 0;
z = 0;
_padding = 0;
}
UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
{
x = x_;
y = y_;
z = z_;
_padding = 0;
}
unsigned int x;
unsigned int y;
unsigned int z;
unsigned int _padding;
};
struct CollisionObjectIndices
{
CollisionObjectIndices( int f, int e )
{
firstObject = f;
endObject = e;
}
int firstObject;
int endObject;
};
btSoftBodyLinkDataOpenCL m_linkData;
btSoftBodyVertexDataOpenCL m_vertexData;
btSoftBodyTriangleDataOpenCL m_triangleData;
protected:
CLFunctions clFunctions;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
@@ -224,6 +318,46 @@ private:
btAlignedObjectArray< float > m_perClothMediumDensity;
btOpenCLBuffer<float> m_clPerClothMediumDensity;
/**
* Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
*/
btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects;
btOpenCLBuffer<CollisionObjectIndices> m_clPerClothCollisionObjects;
/**
* Collision shapes being passed across to the cloths in this solver.
*/
btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails;
btOpenCLBuffer< CollisionShapeDescription > m_clCollisionObjectDetails;
/**
* Minimum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
* This is really a uint4 array but thanks to a limitation of OpenCL atomics we are using uints.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMinBounds;
btOpenCLBuffer< UIntVector3 > m_clPerClothMinBounds;
/**
* Maximum bounds for each cloth.
* Updated by GPU and returned for use by broad phase.
* These are int vectors as a reminder that they store the int representation of a float, not a float.
* Bit 31 is inverted - is floats are stored with int-sortable values.
*/
btAlignedObjectArray< UIntVector3 > m_perClothMaxBounds;
btOpenCLBuffer< UIntVector3 > m_clPerClothMaxBounds;
/**
* Friction coefficient for each cloth
*/
btAlignedObjectArray< float > m_perClothFriction;
btOpenCLBuffer< float > m_clPerClothFriction;
cl_kernel prepareLinksKernel;
cl_kernel solvePositionsFromLinksKernel;
cl_kernel updateConstantsKernel;
@@ -233,41 +367,37 @@ private:
cl_kernel updateVelocitiesFromPositionsWithoutVelocitiesKernel;
cl_kernel updateVelocitiesFromPositionsWithVelocitiesKernel;
cl_kernel vSolveLinksKernel;
cl_kernel solveCollisionsAndUpdateVelocitiesKernel;
cl_kernel resetNormalsAndAreasKernel;
cl_kernel normalizeNormalsAndAreasKernel;
cl_kernel computeBoundsKernel;
cl_kernel updateSoftBodiesKernel;
cl_kernel outputToVertexArrayWithNormalsKernel;
cl_kernel outputToVertexArrayWithoutNormalsKernel;
cl_kernel outputToVertexArrayKernel;
cl_kernel applyForcesKernel;
cl_kernel collideSphereKernel;
cl_kernel collideCylinderKernel;
cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
size_t m_defaultWorkGroupSize;
/**
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/
cl_kernel compileCLKernelFromString( const char *shaderString, const char *shaderName );
bool buildShaders();
virtual bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
void prepareCollisionConstraints();
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
int findSoftBodyIndex( const btSoftBody* const softBody );
virtual void applyForces( float solverdt );
@@ -276,7 +406,7 @@ private:
*/
virtual void integrate( float solverdt );
void updateConstants( float timeStep );
virtual void updateConstants( float timeStep );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
@@ -292,15 +422,20 @@ private:
void updatePositionsFromVelocities( float solverdt );
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
void computeBounds( );
virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void updateBounds();
void releaseKernels();
public:
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx);
@@ -308,7 +443,8 @@ public:
virtual ~btOpenCLSoftBodySolver();
btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
virtual btSoftBodyLinkData &getLinkData();
@@ -316,20 +452,27 @@ public:
virtual btSoftBodyTriangleData &getTriangleData();
virtual SolverTypes getSolverType() const
{
return CL_SOLVER;
}
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
virtual void copyBackToSoftBodies();
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer );
virtual void processCollision( btSoftBody *, btCollisionObject* );
virtual void processCollision( btSoftBody*, btSoftBody* );
virtual void setDefaultWorkgroupSize(size_t workGroupSize)
{
@@ -339,6 +482,27 @@ public:
{
return m_defaultWorkGroupSize;
}
}; // btOpenCLSoftBodySolver
/**
* Class to manage movement of data from a solver to a given target.
* This version is the CL to CPU version.
*/
class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
{
protected:
public:
btSoftBodySolverOutputCLtoCPU()
{
}
/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H