Removed the unused OpenCL kernels Add example how to cache binary kernels, see SoftDemo compiled with OpenCL AMD using msvc/vs2008_opencl.bat
1097 lines
45 KiB
C++
1097 lines
45 KiB
C++
/*
|
|
Bullet Continuous Collision Detection and Physics Library
|
|
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it freely,
|
|
subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
|
|
|
|
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
|
|
#include "vectormath/vmInclude.h"
|
|
#include <stdio.h> //@todo: remove the debugging printf at some stage
|
|
#include "btSoftBodySolver_OpenCLSIMDAware.h"
|
|
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
|
|
#include "BulletSoftBody/btSoftBody.h"
|
|
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
|
|
#include <limits.h>
|
|
|
|
#define WAVEFRONT_SIZE 32
|
|
#define WAVEFRONT_BLOCK_MULTIPLIER 2
|
|
#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
|
|
#define LINKS_PER_SIMD_LANE 16
|
|
|
|
static const size_t workGroupSize = GROUP_SIZE;
|
|
|
|
|
|
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
|
|
|
|
////OpenCL 1.0 kernels don't use float3
|
|
#define MSTRINGIFY(A) #A
|
|
static const char* UpdatePositionsFromVelocitiesCLString =
|
|
#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
|
|
static const char* SolvePositionsCLString =
|
|
#include "OpenCLC10/SolvePositionsSIMDBatched.cl"
|
|
static const char* UpdateNodesCLString =
|
|
#include "OpenCLC10/UpdateNodes.cl"
|
|
static const char* UpdatePositionsCLString =
|
|
#include "OpenCLC10/UpdatePositions.cl"
|
|
static const char* UpdateConstantsCLString =
|
|
#include "OpenCLC10/UpdateConstants.cl"
|
|
static const char* IntegrateCLString =
|
|
#include "OpenCLC10/Integrate.cl"
|
|
static const char* ApplyForcesCLString =
|
|
#include "OpenCLC10/ApplyForces.cl"
|
|
static const char* UpdateFixedVertexPositionsCLString =
|
|
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
|
|
static const char* UpdateNormalsCLString =
|
|
#include "OpenCLC10/UpdateNormals.cl"
|
|
static const char* VSolveLinksCLString =
|
|
#include "OpenCLC10/VSolveLinks.cl"
|
|
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
|
|
#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
|
|
static const char* OutputToVertexArrayCLString =
|
|
#include "OpenCLC10/OutputToVertexArray.cl"
|
|
|
|
|
|
|
|
btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx) :
|
|
m_cqCommandQue(queue),
|
|
m_wavefrontSize( WAVEFRONT_SIZE ),
|
|
m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
|
|
m_maxBatchesWithinWave( 0 ),
|
|
m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
|
|
m_numWavefronts( 0 ),
|
|
m_maxVertex( 0 ),
|
|
m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
|
|
m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
|
|
m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
|
|
m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
|
|
m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
|
|
m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
|
|
m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
|
|
m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
|
|
{
|
|
}
|
|
|
|
btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
|
|
{
|
|
}
|
|
|
|
static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
|
|
{
|
|
Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
|
|
return outVec;
|
|
}
|
|
|
|
/** Allocate enough space in all link-related arrays to fit numLinks links */
|
|
void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
|
|
{
|
|
int previousSize = m_links.size();
|
|
int newSize = previousSize + numLinks;
|
|
|
|
btSoftBodyLinkData::createLinks( numLinks );
|
|
|
|
// Resize the link addresses array as well
|
|
m_linkAddresses.resize( newSize );
|
|
}
|
|
|
|
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
|
|
void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt(
|
|
const LinkDescription &link,
|
|
int linkIndex )
|
|
{
|
|
btSoftBodyLinkData::setLinkAt( link, linkIndex );
|
|
|
|
if( link.getVertex0() > m_maxVertex )
|
|
m_maxVertex = link.getVertex0();
|
|
if( link.getVertex1() > m_maxVertex )
|
|
m_maxVertex = link.getVertex1();
|
|
|
|
// Set the link index correctly for initialisation
|
|
m_linkAddresses[linkIndex] = linkIndex;
|
|
}
|
|
|
|
bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
|
|
{
|
|
return m_onGPU;
|
|
}
|
|
|
|
bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
|
|
{
|
|
bool success = true;
|
|
success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
|
|
success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
|
|
success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
|
|
success = success && m_clLinkStrength.moveToGPU();
|
|
success = success && m_clLinksMassLSC.moveToGPU();
|
|
success = success && m_clLinksRestLengthSquared.moveToGPU();
|
|
success = success && m_clLinksRestLength.moveToGPU();
|
|
success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
|
|
|
|
if( success ) {
|
|
m_onGPU = true;
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
|
|
{
|
|
bool success = true;
|
|
success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
|
|
success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
|
|
success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
|
|
success = success && m_clLinkStrength.moveFromGPU();
|
|
success = success && m_clLinksMassLSC.moveFromGPU();
|
|
success = success && m_clLinksRestLengthSquared.moveFromGPU();
|
|
success = success && m_clLinksRestLength.moveFromGPU();
|
|
success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
|
|
|
|
if( success ) {
|
|
m_onGPU = false;
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
|
|
btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
|
|
m_linkData(queue, ctx)
|
|
{
|
|
// Initial we will clearly need to update solver constants
|
|
// For now this is global for the cloths linked with this solver - we should probably make this body specific
|
|
// for performance in future once we understand more clearly when constants need to be updated
|
|
m_updateSolverConstants = true;
|
|
|
|
m_shadersInitialized = false;
|
|
}
|
|
|
|
btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
|
|
{
|
|
releaseKernels();
|
|
}
|
|
|
|
void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
|
|
{
|
|
if( forceUpdate || m_softBodySet.size() != softBodies.size() )
|
|
{
|
|
// Have a change in the soft body set so update, reloading all the data
|
|
getVertexData().clear();
|
|
getTriangleData().clear();
|
|
getLinkData().clear();
|
|
m_softBodySet.resize(0);
|
|
m_anchorIndex.clear();
|
|
|
|
int maxPiterations = 0;
|
|
int maxViterations = 0;
|
|
|
|
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
|
|
{
|
|
btSoftBody *softBody = softBodies[ softBodyIndex ];
|
|
using Vectormath::Aos::Matrix3;
|
|
using Vectormath::Aos::Point3;
|
|
|
|
// Create SoftBody that will store the information within the solver
|
|
btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
|
|
m_softBodySet.push_back( newSoftBody );
|
|
|
|
m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
|
|
m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
|
|
m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
|
|
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
|
|
m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
|
|
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
|
|
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
|
|
m_perClothFriction.push_back( softBody->getFriction() );
|
|
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
|
|
|
|
// Add space for new vertices and triangles in the default solver for now
|
|
// TODO: Include space here for tearing too later
|
|
int firstVertex = getVertexData().getNumVertices();
|
|
int numVertices = softBody->m_nodes.size();
|
|
// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
|
|
// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
|
|
int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
|
|
// Allocate space for new vertices in all the vertex arrays
|
|
getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
|
|
|
|
|
|
int firstTriangle = getTriangleData().getNumTriangles();
|
|
int numTriangles = softBody->m_faces.size();
|
|
int maxTriangles = numTriangles;
|
|
getTriangleData().createTriangles( maxTriangles );
|
|
|
|
// Copy vertices from softbody into the solver
|
|
for( int vertex = 0; vertex < numVertices; ++vertex )
|
|
{
|
|
Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
|
|
btSoftBodyVertexData::VertexDescription desc;
|
|
|
|
// TODO: Position in the softbody might be pre-transformed
|
|
// or we may need to adapt for the pose.
|
|
//desc.setPosition( cloth.getMeshTransform()*multPoint );
|
|
desc.setPosition( multPoint );
|
|
|
|
float vertexInverseMass = softBody->m_nodes[vertex].m_im;
|
|
desc.setInverseMass(vertexInverseMass);
|
|
getVertexData().setVertexAt( desc, firstVertex + vertex );
|
|
|
|
m_anchorIndex.push_back(-1.0);
|
|
}
|
|
|
|
// Copy triangles similarly
|
|
// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
|
|
for( int triangle = 0; triangle < numTriangles; ++triangle )
|
|
{
|
|
// Note that large array storage is relative to the array not to the cloth
|
|
// So we need to add firstVertex to each value
|
|
int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
|
|
int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
|
|
int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
|
|
btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
|
|
getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
|
|
|
|
// Increase vertex triangle counts for this triangle
|
|
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
|
|
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
|
|
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
|
|
}
|
|
|
|
int firstLink = getLinkData().getNumLinks();
|
|
int numLinks = softBody->m_links.size();
|
|
int maxLinks = numLinks;
|
|
|
|
// Allocate space for the links
|
|
getLinkData().createLinks( numLinks );
|
|
|
|
// Add the links
|
|
for( int link = 0; link < numLinks; ++link )
|
|
{
|
|
int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
|
|
int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
|
|
|
|
btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
|
|
newLink.setLinkStrength(1.f);
|
|
getLinkData().setLinkAt(newLink, firstLink + link);
|
|
}
|
|
|
|
newSoftBody->setFirstVertex( firstVertex );
|
|
newSoftBody->setFirstTriangle( firstTriangle );
|
|
newSoftBody->setNumVertices( numVertices );
|
|
newSoftBody->setMaxVertices( maxVertices );
|
|
newSoftBody->setNumTriangles( numTriangles );
|
|
newSoftBody->setMaxTriangles( maxTriangles );
|
|
newSoftBody->setFirstLink( firstLink );
|
|
newSoftBody->setNumLinks( numLinks );
|
|
|
|
// Find maximum piterations and viterations
|
|
int piterations = softBody->m_cfg.piterations;
|
|
|
|
if ( piterations > maxPiterations )
|
|
maxPiterations = piterations;
|
|
|
|
int viterations = softBody->m_cfg.viterations;
|
|
|
|
if ( viterations > maxViterations )
|
|
maxViterations = viterations;
|
|
|
|
// zero mass
|
|
for( int vertex = 0; vertex < numVertices; ++vertex )
|
|
{
|
|
if ( softBody->m_nodes[vertex].m_im == 0 )
|
|
{
|
|
AnchorNodeInfoCL nodeInfo;
|
|
nodeInfo.clVertexIndex = firstVertex + vertex;
|
|
nodeInfo.pNode = &softBody->m_nodes[vertex];
|
|
|
|
m_anchorNodeInfoArray.push_back(nodeInfo);
|
|
}
|
|
}
|
|
|
|
// anchor position
|
|
if ( numVertices > 0 )
|
|
{
|
|
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
|
|
{
|
|
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
|
|
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
|
|
|
|
AnchorNodeInfoCL nodeInfo;
|
|
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
|
|
nodeInfo.pNode = anchorNode;
|
|
|
|
m_anchorNodeInfoArray.push_back(nodeInfo);
|
|
}
|
|
}
|
|
}
|
|
|
|
m_anchorPosition.clear();
|
|
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
|
|
|
|
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
|
|
{
|
|
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
|
|
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
|
|
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
|
|
}
|
|
|
|
updateConstants(0.f);
|
|
|
|
// set position and velocity iterations
|
|
setNumberOfPositionIterations(maxPiterations);
|
|
setNumberOfVelocityIterations(maxViterations);
|
|
|
|
// set wind velocity
|
|
m_perClothWindVelocity.resize( m_softBodySet.size() );
|
|
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
|
{
|
|
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
|
|
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
|
|
}
|
|
|
|
m_clPerClothWindVelocity.changedOnCPU();
|
|
|
|
// generate batches
|
|
m_linkData.generateBatches();
|
|
m_triangleData.generateBatches();
|
|
|
|
// Build the shaders to match the batching parameters
|
|
buildShaders();
|
|
}
|
|
}
|
|
|
|
|
|
btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
|
|
{
|
|
// TODO: Consider setting link data to "changed" here
|
|
return m_linkData;
|
|
}
|
|
|
|
|
|
|
|
|
|
void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
|
|
{
|
|
|
|
using namespace Vectormath::Aos;
|
|
|
|
if( m_updateSolverConstants )
|
|
{
|
|
m_updateSolverConstants = false;
|
|
|
|
// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
|
|
|
|
// Initialise link constants
|
|
const int numLinks = m_linkData.getNumLinks();
|
|
for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
|
|
m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
|
|
float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
|
|
float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
|
|
float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
|
|
float massLSC = (invMass0 + invMass1)/linearStiffness;
|
|
m_linkData.getMassLSC(linkIndex) = massLSC;
|
|
float restLength = m_linkData.getRestLength(linkIndex);
|
|
float restLengthSquared = restLength*restLength;
|
|
m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
|
|
{
|
|
|
|
using Vectormath::Aos::Vector3;
|
|
using Vectormath::Aos::Point3;
|
|
using Vectormath::Aos::lengthSqr;
|
|
using Vectormath::Aos::dot;
|
|
|
|
// Prepare links
|
|
int numLinks = m_linkData.getNumLinks();
|
|
int numVertices = m_vertexData.getNumVertices();
|
|
|
|
float kst = 1.f;
|
|
float ti = 0.f;
|
|
|
|
|
|
m_clPerClothDampingFactor.moveToGPU();
|
|
m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
|
|
|
|
|
|
// Ensure data is on accelerator
|
|
m_linkData.moveToAccelerator();
|
|
m_vertexData.moveToAccelerator();
|
|
|
|
|
|
//prepareLinks();
|
|
|
|
prepareCollisionConstraints();
|
|
|
|
// Solve drift
|
|
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
|
|
{
|
|
|
|
for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
|
|
{
|
|
int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
|
|
int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
|
|
solveLinksForPosition( startWave, numWaves, kst, ti );
|
|
}
|
|
} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
|
|
|
|
|
|
// At this point assume that the force array is blank - we will overwrite it
|
|
solveCollisionsAndUpdateVelocities( 1.f/solverdt );
|
|
}
|
|
|
|
|
|
//////////////////////////////////////
|
|
// Kernel dispatches
|
|
|
|
|
|
void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
|
|
{
|
|
cl_int ciErrNum;
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
|
|
|
|
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
|
|
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
|
|
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
|
|
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
|
|
|
|
size_t numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
|
|
|
|
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
|
|
|
|
if( ciErrNum!= CL_SUCCESS )
|
|
{
|
|
btAssert( 0 && "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
|
|
}
|
|
|
|
} // solveLinksForPosition
|
|
|
|
void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
|
|
{
|
|
// Copy kernel parameters to GPU
|
|
m_vertexData.moveToAccelerator();
|
|
m_clPerClothFriction.moveToGPU();
|
|
m_clPerClothDampingFactor.moveToGPU();
|
|
m_clPerClothCollisionObjects.moveToGPU();
|
|
m_clCollisionObjectDetails.moveToGPU();
|
|
|
|
cl_int ciErrNum;
|
|
int numVerts = m_vertexData.getNumVertices();
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
|
|
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
|
|
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
|
|
|
|
if (numWorkItems)
|
|
{
|
|
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
|
|
|
|
if( ciErrNum != CL_SUCCESS )
|
|
{
|
|
btAssert( 0 && "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
|
|
}
|
|
}
|
|
|
|
} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
|
|
|
|
// End kernel dispatches
|
|
/////////////////////////////////////
|
|
|
|
|
|
|
|
bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
|
|
{
|
|
releaseKernels();
|
|
|
|
if( m_shadersInitialized )
|
|
return true;
|
|
|
|
const char* additionalMacros="";
|
|
|
|
m_currentCLFunctions->clearKernelCompilationFailures();
|
|
|
|
char *wavefrontMacros = new char[256];
|
|
|
|
sprintf(
|
|
wavefrontMacros,
|
|
"-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d",
|
|
m_linkData.getMaxVerticesPerWavefront(),
|
|
m_linkData.getMaxBatchesPerWavefront(),
|
|
m_linkData.getWavefrontSize(),
|
|
WAVEFRONT_BLOCK_MULTIPLIER,
|
|
WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
|
|
|
|
m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
|
|
m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
|
|
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
|
|
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
|
|
m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
|
|
m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
|
|
m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
|
|
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
|
|
|
|
// TODO: Rename to UpdateSoftBodies
|
|
m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
|
m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
|
m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
|
|
|
delete [] wavefrontMacros;
|
|
|
|
if( m_currentCLFunctions->getKernelCompilationFailures()==0)
|
|
{
|
|
m_shadersInitialized = true;
|
|
}
|
|
|
|
return m_shadersInitialized;
|
|
}
|
|
|
|
|
|
|
|
|
|
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
|
|
{
|
|
Vectormath::Aos::Transform3 outTransform;
|
|
outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
|
|
outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
|
|
outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
|
|
outTransform.setCol(3, toVector3(transform.getOrigin()));
|
|
return outTransform;
|
|
}
|
|
|
|
|
|
static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
|
|
{
|
|
// A per-batch map of truth values stating whether a given vertex is in that batch
|
|
// This allows us to significantly optimize the batching
|
|
btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
|
|
|
|
for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
|
|
{
|
|
btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
|
|
|
|
int batch = 0;
|
|
bool placed = false;
|
|
while( batch < wavefrontBatches.size() && !placed )
|
|
{
|
|
// Test the current batch, see if this wave shares any vertex with the waves in the batch
|
|
bool foundSharedVertex = false;
|
|
for( int link = 0; link < wavefront.size(); ++link )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
|
|
if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
|
|
{
|
|
foundSharedVertex = true;
|
|
}
|
|
}
|
|
|
|
if( !foundSharedVertex )
|
|
{
|
|
wavefrontBatches[batch].push_back( waveIndex );
|
|
// Insert vertices into this batch too
|
|
for( int link = 0; link < wavefront.size(); ++link )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
|
|
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
|
|
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
|
|
}
|
|
placed = true;
|
|
}
|
|
batch++;
|
|
}
|
|
if( batch == wavefrontBatches.size() && !placed )
|
|
{
|
|
wavefrontBatches.resize( batch + 1 );
|
|
wavefrontBatches[batch].push_back( waveIndex );
|
|
|
|
// And resize map as well
|
|
mapOfVerticesInBatches.resize( batch + 1 );
|
|
|
|
// Resize maps with total number of vertices
|
|
mapOfVerticesInBatches[batch].resize( numVertices+1, false );
|
|
|
|
// Insert vertices into this batch too
|
|
for( int link = 0; link < wavefront.size(); ++link )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
|
|
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
|
|
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
|
|
}
|
|
}
|
|
}
|
|
mapOfVerticesInBatches.clear();
|
|
}
|
|
|
|
// Function to remove an object from a vector maintaining correct ordering of the vector
|
|
template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
|
|
{
|
|
int currentSize = vectorToUpdate.size();
|
|
for( int i = indexToRemove; i < (currentSize-1); ++i )
|
|
{
|
|
vectorToUpdate[i] = vectorToUpdate[i+1];
|
|
}
|
|
if( currentSize > 0 )
|
|
vectorToUpdate.resize( currentSize - 1 );
|
|
}
|
|
|
|
/**
|
|
* Insert element into vectorToUpdate at index index.
|
|
*/
|
|
template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
|
|
{
|
|
vectorToUpdate.resize( vectorToUpdate.size() + 1 );
|
|
for( int i = (vectorToUpdate.size() - 1); i > index; --i )
|
|
{
|
|
vectorToUpdate[i] = vectorToUpdate[i-1];
|
|
}
|
|
vectorToUpdate[index] = element;
|
|
}
|
|
|
|
/**
|
|
* Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
|
|
* ie it treats vectorToUpdate as an ordered set.
|
|
*/
|
|
template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
|
|
{
|
|
int index = 0;
|
|
while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
|
|
{
|
|
index++;
|
|
}
|
|
if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
|
|
insertAtIndex( vectorToUpdate, index, element );
|
|
}
|
|
|
|
static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
|
|
{
|
|
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
|
|
numLinksPerVertex[nodes.vertex0]++;
|
|
numLinksPerVertex[nodes.vertex1]++;
|
|
}
|
|
int maxLinksPerVertex = 0;
|
|
for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
|
|
{
|
|
maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
|
|
}
|
|
maxLinks = maxLinksPerVertex;
|
|
|
|
btAlignedObjectArray< int > linksFoundPerVertex;
|
|
linksFoundPerVertex.resize( numVertices, 0 );
|
|
|
|
listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
|
|
|
|
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
|
|
{
|
|
// Do vertex 0
|
|
int vertexIndex = nodes.vertex0;
|
|
int linkForVertex = linksFoundPerVertex[nodes.vertex0];
|
|
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
|
|
|
|
listOfLinksPerVertex[linkAddress] = linkIndex;
|
|
|
|
linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
|
|
}
|
|
{
|
|
// Do vertex 1
|
|
int vertexIndex = nodes.vertex1;
|
|
int linkForVertex = linksFoundPerVertex[nodes.vertex1];
|
|
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
|
|
|
|
listOfLinksPerVertex[linkAddress] = linkIndex;
|
|
|
|
linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void computeBatchingIntoWavefronts(
|
|
btSoftBodyLinkData &linkData,
|
|
int wavefrontSize,
|
|
int linksPerWorkItem,
|
|
int maxLinksPerWavefront,
|
|
btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts,
|
|
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
|
|
btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
|
|
)
|
|
{
|
|
|
|
|
|
// Attempt generation of larger batches of links.
|
|
btAlignedObjectArray< bool > processedLink;
|
|
processedLink.resize( linkData.getNumLinks() );
|
|
btAlignedObjectArray< int > listOfLinksPerVertex;
|
|
int maxLinksPerVertex = 0;
|
|
|
|
// Count num vertices
|
|
int numVertices = 0;
|
|
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
|
|
numVertices = btMax( numVertices, nodes.vertex0 + 1 );
|
|
numVertices = btMax( numVertices, nodes.vertex1 + 1 );
|
|
}
|
|
|
|
// Need list of links per vertex
|
|
// Compute valence of each vertex
|
|
btAlignedObjectArray <int> numLinksPerVertex;
|
|
numLinksPerVertex.resize(0);
|
|
numLinksPerVertex.resize( numVertices, 0 );
|
|
|
|
generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
|
|
|
|
if (!numVertices)
|
|
return;
|
|
|
|
for( int vertex = 0; vertex < 10; ++vertex )
|
|
{
|
|
for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
|
|
{
|
|
int linkAddress = vertex * maxLinksPerVertex + link;
|
|
}
|
|
}
|
|
|
|
|
|
// At this point we know what links we have for each vertex so we can start batching
|
|
|
|
// We want a vertex to start with, let's go with 0
|
|
int currentVertex = 0;
|
|
int linksProcessed = 0;
|
|
|
|
btAlignedObjectArray <int> verticesToProcess;
|
|
|
|
while( linksProcessed < linkData.getNumLinks() )
|
|
{
|
|
// Next wavefront
|
|
int nextWavefront = linksForWavefronts.size();
|
|
linksForWavefronts.resize( nextWavefront + 1 );
|
|
btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
|
|
verticesForWavefronts.resize( nextWavefront + 1 );
|
|
btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
|
|
|
|
linksForWavefront.resize(0);
|
|
|
|
// Loop to find enough links to fill the wavefront
|
|
// Stopping if we either run out of links, or fill it
|
|
while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
|
|
{
|
|
// Go through the links for the current vertex
|
|
for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
|
|
{
|
|
int linkAddress = currentVertex * maxLinksPerVertex + link;
|
|
int linkIndex = listOfLinksPerVertex[linkAddress];
|
|
|
|
// If we have not already processed this link, add it to the wavefront
|
|
// Claim it as another processed link
|
|
// Add the vertex at the far end to the list of vertices to process.
|
|
if( !processedLink[linkIndex] )
|
|
{
|
|
linksForWavefront.push_back( linkIndex );
|
|
linksProcessed++;
|
|
processedLink[linkIndex] = true;
|
|
int v0 = linkData.getVertexPair(linkIndex).vertex0;
|
|
int v1 = linkData.getVertexPair(linkIndex).vertex1;
|
|
if( v0 == currentVertex )
|
|
verticesToProcess.push_back( v1 );
|
|
else
|
|
verticesToProcess.push_back( v0 );
|
|
}
|
|
}
|
|
if( verticesToProcess.size() > 0 )
|
|
{
|
|
// Get the element on the front of the queue and remove it
|
|
currentVertex = verticesToProcess[0];
|
|
removeFromVector( verticesToProcess, 0 );
|
|
} else {
|
|
// If we've not yet processed all the links, find the first unprocessed one
|
|
// and select one of its vertices as the current vertex
|
|
if( linksProcessed < linkData.getNumLinks() )
|
|
{
|
|
int searchLink = 0;
|
|
while( processedLink[searchLink] )
|
|
searchLink++;
|
|
currentVertex = linkData.getVertexPair(searchLink).vertex0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// We have either finished or filled a wavefront
|
|
for( int link = 0; link < linksForWavefront.size(); ++link )
|
|
{
|
|
int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
|
|
int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
|
|
insertUniqueAndOrderedIntoVector( vertexSet, v0 );
|
|
insertUniqueAndOrderedIntoVector( vertexSet, v1 );
|
|
}
|
|
// Iterate over links mapped to the wave and batch those
|
|
// We can run a batch on each cycle trivially
|
|
|
|
batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
|
|
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
|
|
|
|
|
|
for( int link = 0; link < linksForWavefront.size(); ++link )
|
|
{
|
|
int linkIndex = linksForWavefront[link];
|
|
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
|
|
|
|
int batch = 0;
|
|
bool placed = false;
|
|
while( batch < batchesWithinWave.size() && !placed )
|
|
{
|
|
bool foundSharedVertex = false;
|
|
if( batchesWithinWave[batch].size() >= wavefrontSize )
|
|
{
|
|
// If we have already filled this batch, move on to another
|
|
foundSharedVertex = true;
|
|
} else {
|
|
for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
|
|
{
|
|
btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
|
|
|
|
if( vertices.vertex0 == vertices2.vertex0 ||
|
|
vertices.vertex1 == vertices2.vertex0 ||
|
|
vertices.vertex0 == vertices2.vertex1 ||
|
|
vertices.vertex1 == vertices2.vertex1 )
|
|
{
|
|
foundSharedVertex = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if( !foundSharedVertex )
|
|
{
|
|
batchesWithinWave[batch].push_back( linkIndex );
|
|
placed = true;
|
|
} else {
|
|
++batch;
|
|
}
|
|
}
|
|
if( batch == batchesWithinWave.size() && !placed )
|
|
{
|
|
batchesWithinWave.resize( batch + 1 );
|
|
batchesWithinWave[batch].push_back( linkIndex );
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
|
|
{
|
|
btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
|
|
btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
|
|
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
|
|
btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
|
|
|
|
// Group the links into wavefronts
|
|
computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
|
|
|
|
|
|
// Batch the wavefronts
|
|
generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
|
|
|
|
m_numWavefronts = linksForWavefronts.size();
|
|
|
|
// At this point we have a description of which links we need to process in each wavefront
|
|
|
|
// First correctly fill the batch ranges vector
|
|
int numBatches = wavefrontBatches.size();
|
|
m_wavefrontBatchStartLengths.resize(0);
|
|
int prefixSum = 0;
|
|
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
|
|
{
|
|
int wavesInBatch = wavefrontBatches[batchIndex].size();
|
|
int nextPrefixSum = prefixSum + wavesInBatch;
|
|
m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
|
|
|
|
prefixSum += wavesInBatch;
|
|
}
|
|
|
|
// Also find max number of batches within a wave
|
|
m_maxBatchesWithinWave = 0;
|
|
m_maxVerticesWithinWave = 0;
|
|
m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
|
|
for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
|
|
{
|
|
// See if the number of batches in this wave is greater than the current maxium
|
|
int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
|
|
int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
|
|
m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
|
|
m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
|
|
}
|
|
|
|
// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
|
|
m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
|
|
|
|
// Now we know the maximum number of vertices per-wave we can resize the global vertices array
|
|
m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
|
|
|
|
// Grab backup copies of all the link data arrays for the sorting process
|
|
btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair> m_links_Backup(m_links);
|
|
btAlignedObjectArray<float> m_linkStrength_Backup(m_linkStrength);
|
|
btAlignedObjectArray<float> m_linksMassLSC_Backup(m_linksMassLSC);
|
|
btAlignedObjectArray<float> m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
|
|
//btAlignedObjectArray<Vectormath::Aos::Vector3> m_linksCLength_Backup(m_linksCLength);
|
|
//btAlignedObjectArray<float> m_linksLengthRatio_Backup(m_linksLengthRatio);
|
|
btAlignedObjectArray<float> m_linksRestLength_Backup(m_linksRestLength);
|
|
btAlignedObjectArray<float> m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
|
|
|
|
// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
|
|
m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
|
|
|
|
// Then re-order links into wavefront blocks
|
|
|
|
// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
|
|
int wavefrontCount = 0;
|
|
|
|
// Iterate over batches of wavefronts, then wavefronts in the batch
|
|
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
|
|
{
|
|
btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
|
|
int wavefrontsInBatch = batch.size();
|
|
|
|
|
|
for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
|
|
{
|
|
|
|
int originalWavefrontIndex = batch[wavefrontIndex];
|
|
btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
|
|
int verticesUsedByWavefront = wavefrontVertices.size();
|
|
|
|
// Copy the set of vertices into the correctly structured array for use on the device
|
|
// Fill the non-vertices with -1s
|
|
// so we can mask out those reads
|
|
for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
|
|
{
|
|
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
|
|
}
|
|
for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
|
|
{
|
|
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
|
|
}
|
|
|
|
// Obtain the set of batches within the current wavefront
|
|
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
|
|
// Set the size of the batches for use in the solver, correctly ordered
|
|
NumBatchesVerticesPair batchesAndVertices;
|
|
batchesAndVertices.numBatches = batchesWithinWavefront.size();
|
|
batchesAndVertices.numVertices = verticesUsedByWavefront;
|
|
m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
|
|
|
|
|
|
// Now iterate over batches within the wavefront to structure the links correctly
|
|
for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
|
|
{
|
|
btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
|
|
int wavefrontBatchSize = linksInBatch.size();
|
|
|
|
int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
|
|
|
|
for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
|
|
{
|
|
int originalLinkAddress = linksInBatch[linkIndex];
|
|
// Reorder simple arrays trivially
|
|
m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
|
|
m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
|
|
m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
|
|
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
|
|
m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
|
|
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
|
|
|
|
// The local address is more complicated. We need to work out where a given vertex will end up
|
|
// by searching the set of vertices for this link and using the index as the local address
|
|
btSoftBodyLinkData::LinkNodePair localPair;
|
|
btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
|
|
localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
|
|
localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
|
|
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
|
|
}
|
|
for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
|
|
{
|
|
// Put 0s into these arrays for padding for cleanliness
|
|
m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
|
|
m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
|
|
m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
|
|
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
|
|
m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
|
|
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
|
|
|
|
|
|
// For local addresses of junk data choose a set of addresses just above the range of valid ones
|
|
// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
|
|
// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
|
|
// off the end of that range so we need no control
|
|
btSoftBodyLinkData::LinkNodePair localPair;
|
|
localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
|
|
localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
|
|
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
wavefrontCount++;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
|
|
|
|
|
|
|