Files
bullet3/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
erwin.coumans 1f7a67b7da Apply a patch that updates vertex position on GPU for the OpenCL version, by Dongsoo Han (saggita), work-in-progress
Removed the unused OpenCL kernels
Add example how to cache binary kernels, see SoftDemo compiled with OpenCL AMD using msvc/vs2008_opencl.bat
2011-12-20 18:03:24 +00:00

1097 lines
45 KiB
C++

/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
#include "vectormath/vmInclude.h"
#include <stdio.h> //@todo: remove the debugging printf at some stage
#include "btSoftBodySolver_OpenCLSIMDAware.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "BulletSoftBody/btSoftBody.h"
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
#include <limits.h>
#define WAVEFRONT_SIZE 32
#define WAVEFRONT_BLOCK_MULTIPLIER 2
#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
#define LINKS_PER_SIMD_LANE 16
static const size_t workGroupSize = GROUP_SIZE;
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
static const char* UpdatePositionsFromVelocitiesCLString =
#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
static const char* SolvePositionsCLString =
#include "OpenCLC10/SolvePositionsSIMDBatched.cl"
static const char* UpdateNodesCLString =
#include "OpenCLC10/UpdateNodes.cl"
static const char* UpdatePositionsCLString =
#include "OpenCLC10/UpdatePositions.cl"
static const char* UpdateConstantsCLString =
#include "OpenCLC10/UpdateConstants.cl"
static const char* IntegrateCLString =
#include "OpenCLC10/Integrate.cl"
static const char* ApplyForcesCLString =
#include "OpenCLC10/ApplyForces.cl"
static const char* UpdateFixedVertexPositionsCLString =
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
static const char* UpdateNormalsCLString =
#include "OpenCLC10/UpdateNormals.cl"
static const char* VSolveLinksCLString =
#include "OpenCLC10/VSolveLinks.cl"
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
static const char* OutputToVertexArrayCLString =
#include "OpenCLC10/OutputToVertexArray.cl"
btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx) :
m_cqCommandQue(queue),
m_wavefrontSize( WAVEFRONT_SIZE ),
m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
m_maxBatchesWithinWave( 0 ),
m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
m_numWavefronts( 0 ),
m_maxVertex( 0 ),
m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
{
}
btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
{
}
static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
{
Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
return outVec;
}
/** Allocate enough space in all link-related arrays to fit numLinks links */
void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
{
int previousSize = m_links.size();
int newSize = previousSize + numLinks;
btSoftBodyLinkData::createLinks( numLinks );
// Resize the link addresses array as well
m_linkAddresses.resize( newSize );
}
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt(
const LinkDescription &link,
int linkIndex )
{
btSoftBodyLinkData::setLinkAt( link, linkIndex );
if( link.getVertex0() > m_maxVertex )
m_maxVertex = link.getVertex0();
if( link.getVertex1() > m_maxVertex )
m_maxVertex = link.getVertex1();
// Set the link index correctly for initialisation
m_linkAddresses[linkIndex] = linkIndex;
}
bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
{
return m_onGPU;
}
bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
{
bool success = true;
success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
success = success && m_clLinkStrength.moveToGPU();
success = success && m_clLinksMassLSC.moveToGPU();
success = success && m_clLinksRestLengthSquared.moveToGPU();
success = success && m_clLinksRestLength.moveToGPU();
success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
if( success ) {
m_onGPU = true;
}
return success;
}
bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
{
bool success = true;
success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
success = success && m_clLinkStrength.moveFromGPU();
success = success && m_clLinksMassLSC.moveFromGPU();
success = success && m_clLinksRestLengthSquared.moveFromGPU();
success = success && m_clLinksRestLength.moveFromGPU();
success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
if( success ) {
m_onGPU = false;
}
return success;
}
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
m_linkData(queue, ctx)
{
// Initial we will clearly need to update solver constants
// For now this is global for the cloths linked with this solver - we should probably make this body specific
// for performance in future once we understand more clearly when constants need to be updated
m_updateSolverConstants = true;
m_shadersInitialized = false;
}
btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
{
releaseKernels();
}
void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
{
if( forceUpdate || m_softBodySet.size() != softBodies.size() )
{
// Have a change in the soft body set so update, reloading all the data
getVertexData().clear();
getTriangleData().clear();
getLinkData().clear();
m_softBodySet.resize(0);
m_anchorIndex.clear();
int maxPiterations = 0;
int maxViterations = 0;
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
{
btSoftBody *softBody = softBodies[ softBodyIndex ];
using Vectormath::Aos::Matrix3;
using Vectormath::Aos::Point3;
// Create SoftBody that will store the information within the solver
btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
m_softBodySet.push_back( newSoftBody );
m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
m_perClothFriction.push_back( softBody->getFriction() );
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
// Add space for new vertices and triangles in the default solver for now
// TODO: Include space here for tearing too later
int firstVertex = getVertexData().getNumVertices();
int numVertices = softBody->m_nodes.size();
// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
// Allocate space for new vertices in all the vertex arrays
getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
int firstTriangle = getTriangleData().getNumTriangles();
int numTriangles = softBody->m_faces.size();
int maxTriangles = numTriangles;
getTriangleData().createTriangles( maxTriangles );
// Copy vertices from softbody into the solver
for( int vertex = 0; vertex < numVertices; ++vertex )
{
Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
btSoftBodyVertexData::VertexDescription desc;
// TODO: Position in the softbody might be pre-transformed
// or we may need to adapt for the pose.
//desc.setPosition( cloth.getMeshTransform()*multPoint );
desc.setPosition( multPoint );
float vertexInverseMass = softBody->m_nodes[vertex].m_im;
desc.setInverseMass(vertexInverseMass);
getVertexData().setVertexAt( desc, firstVertex + vertex );
m_anchorIndex.push_back(-1.0);
}
// Copy triangles similarly
// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
for( int triangle = 0; triangle < numTriangles; ++triangle )
{
// Note that large array storage is relative to the array not to the cloth
// So we need to add firstVertex to each value
int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
// Increase vertex triangle counts for this triangle
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
}
int firstLink = getLinkData().getNumLinks();
int numLinks = softBody->m_links.size();
int maxLinks = numLinks;
// Allocate space for the links
getLinkData().createLinks( numLinks );
// Add the links
for( int link = 0; link < numLinks; ++link )
{
int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
newLink.setLinkStrength(1.f);
getLinkData().setLinkAt(newLink, firstLink + link);
}
newSoftBody->setFirstVertex( firstVertex );
newSoftBody->setFirstTriangle( firstTriangle );
newSoftBody->setNumVertices( numVertices );
newSoftBody->setMaxVertices( maxVertices );
newSoftBody->setNumTriangles( numTriangles );
newSoftBody->setMaxTriangles( maxTriangles );
newSoftBody->setFirstLink( firstLink );
newSoftBody->setNumLinks( numLinks );
// Find maximum piterations and viterations
int piterations = softBody->m_cfg.piterations;
if ( piterations > maxPiterations )
maxPiterations = piterations;
int viterations = softBody->m_cfg.viterations;
if ( viterations > maxViterations )
maxViterations = viterations;
// zero mass
for( int vertex = 0; vertex < numVertices; ++vertex )
{
if ( softBody->m_nodes[vertex].m_im == 0 )
{
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + vertex;
nodeInfo.pNode = &softBody->m_nodes[vertex];
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
// anchor position
if ( numVertices > 0 )
{
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
{
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
AnchorNodeInfoCL nodeInfo;
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
nodeInfo.pNode = anchorNode;
m_anchorNodeInfoArray.push_back(nodeInfo);
}
}
}
m_anchorPosition.clear();
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
{
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
}
updateConstants(0.f);
// set position and velocity iterations
setNumberOfPositionIterations(maxPiterations);
setNumberOfVelocityIterations(maxViterations);
// set wind velocity
m_perClothWindVelocity.resize( m_softBodySet.size() );
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
}
m_clPerClothWindVelocity.changedOnCPU();
// generate batches
m_linkData.generateBatches();
m_triangleData.generateBatches();
// Build the shaders to match the batching parameters
buildShaders();
}
}
btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
{
// TODO: Consider setting link data to "changed" here
return m_linkData;
}
void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
{
using namespace Vectormath::Aos;
if( m_updateSolverConstants )
{
m_updateSolverConstants = false;
// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
// Initialise link constants
const int numLinks = m_linkData.getNumLinks();
for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
{
btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
float massLSC = (invMass0 + invMass1)/linearStiffness;
m_linkData.getMassLSC(linkIndex) = massLSC;
float restLength = m_linkData.getRestLength(linkIndex);
float restLengthSquared = restLength*restLength;
m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
}
}
}
void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
{
using Vectormath::Aos::Vector3;
using Vectormath::Aos::Point3;
using Vectormath::Aos::lengthSqr;
using Vectormath::Aos::dot;
// Prepare links
int numLinks = m_linkData.getNumLinks();
int numVertices = m_vertexData.getNumVertices();
float kst = 1.f;
float ti = 0.f;
m_clPerClothDampingFactor.moveToGPU();
m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
// Ensure data is on accelerator
m_linkData.moveToAccelerator();
m_vertexData.moveToAccelerator();
//prepareLinks();
prepareCollisionConstraints();
// Solve drift
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
{
for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
{
int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
solveLinksForPosition( startWave, numWaves, kst, ti );
}
} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
// At this point assume that the force array is blank - we will overwrite it
solveCollisionsAndUpdateVelocities( 1.f/solverdt );
}
//////////////////////////////////////
// Kernel dispatches
void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
{
cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
size_t numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
if( ciErrNum!= CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
}
} // solveLinksForPosition
void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
{
// Copy kernel parameters to GPU
m_vertexData.moveToAccelerator();
m_clPerClothFriction.moveToGPU();
m_clPerClothDampingFactor.moveToGPU();
m_clPerClothCollisionObjects.moveToGPU();
m_clCollisionObjectDetails.moveToGPU();
cl_int ciErrNum;
int numVerts = m_vertexData.getNumVertices();
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
if (numWorkItems)
{
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
}
}
} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
// End kernel dispatches
/////////////////////////////////////
bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
{
releaseKernels();
if( m_shadersInitialized )
return true;
const char* additionalMacros="";
m_currentCLFunctions->clearKernelCompilationFailures();
char *wavefrontMacros = new char[256];
sprintf(
wavefrontMacros,
"-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d",
m_linkData.getMaxVerticesPerWavefront(),
m_linkData.getMaxBatchesPerWavefront(),
m_linkData.getWavefrontSize(),
WAVEFRONT_BLOCK_MULTIPLIER,
WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
// TODO: Rename to UpdateSoftBodies
m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
delete [] wavefrontMacros;
if( m_currentCLFunctions->getKernelCompilationFailures()==0)
{
m_shadersInitialized = true;
}
return m_shadersInitialized;
}
static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
{
Vectormath::Aos::Transform3 outTransform;
outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
outTransform.setCol(3, toVector3(transform.getOrigin()));
return outTransform;
}
static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
{
// A per-batch map of truth values stating whether a given vertex is in that batch
// This allows us to significantly optimize the batching
btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
{
btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
int batch = 0;
bool placed = false;
while( batch < wavefrontBatches.size() && !placed )
{
// Test the current batch, see if this wave shares any vertex with the waves in the batch
bool foundSharedVertex = false;
for( int link = 0; link < wavefront.size(); ++link )
{
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
{
foundSharedVertex = true;
}
}
if( !foundSharedVertex )
{
wavefrontBatches[batch].push_back( waveIndex );
// Insert vertices into this batch too
for( int link = 0; link < wavefront.size(); ++link )
{
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
}
placed = true;
}
batch++;
}
if( batch == wavefrontBatches.size() && !placed )
{
wavefrontBatches.resize( batch + 1 );
wavefrontBatches[batch].push_back( waveIndex );
// And resize map as well
mapOfVerticesInBatches.resize( batch + 1 );
// Resize maps with total number of vertices
mapOfVerticesInBatches[batch].resize( numVertices+1, false );
// Insert vertices into this batch too
for( int link = 0; link < wavefront.size(); ++link )
{
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
}
}
}
mapOfVerticesInBatches.clear();
}
// Function to remove an object from a vector maintaining correct ordering of the vector
template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
{
int currentSize = vectorToUpdate.size();
for( int i = indexToRemove; i < (currentSize-1); ++i )
{
vectorToUpdate[i] = vectorToUpdate[i+1];
}
if( currentSize > 0 )
vectorToUpdate.resize( currentSize - 1 );
}
/**
* Insert element into vectorToUpdate at index index.
*/
template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
{
vectorToUpdate.resize( vectorToUpdate.size() + 1 );
for( int i = (vectorToUpdate.size() - 1); i > index; --i )
{
vectorToUpdate[i] = vectorToUpdate[i-1];
}
vectorToUpdate[index] = element;
}
/**
* Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
* ie it treats vectorToUpdate as an ordered set.
*/
template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
{
int index = 0;
while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
{
index++;
}
if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
insertAtIndex( vectorToUpdate, index, element );
}
static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
{
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
{
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
numLinksPerVertex[nodes.vertex0]++;
numLinksPerVertex[nodes.vertex1]++;
}
int maxLinksPerVertex = 0;
for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
{
maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
}
maxLinks = maxLinksPerVertex;
btAlignedObjectArray< int > linksFoundPerVertex;
linksFoundPerVertex.resize( numVertices, 0 );
listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
{
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
{
// Do vertex 0
int vertexIndex = nodes.vertex0;
int linkForVertex = linksFoundPerVertex[nodes.vertex0];
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
listOfLinksPerVertex[linkAddress] = linkIndex;
linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
}
{
// Do vertex 1
int vertexIndex = nodes.vertex1;
int linkForVertex = linksFoundPerVertex[nodes.vertex1];
int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
listOfLinksPerVertex[linkAddress] = linkIndex;
linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
}
}
}
static void computeBatchingIntoWavefronts(
btSoftBodyLinkData &linkData,
int wavefrontSize,
int linksPerWorkItem,
int maxLinksPerWavefront,
btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts,
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
)
{
// Attempt generation of larger batches of links.
btAlignedObjectArray< bool > processedLink;
processedLink.resize( linkData.getNumLinks() );
btAlignedObjectArray< int > listOfLinksPerVertex;
int maxLinksPerVertex = 0;
// Count num vertices
int numVertices = 0;
for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
{
btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
numVertices = btMax( numVertices, nodes.vertex0 + 1 );
numVertices = btMax( numVertices, nodes.vertex1 + 1 );
}
// Need list of links per vertex
// Compute valence of each vertex
btAlignedObjectArray <int> numLinksPerVertex;
numLinksPerVertex.resize(0);
numLinksPerVertex.resize( numVertices, 0 );
generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
if (!numVertices)
return;
for( int vertex = 0; vertex < 10; ++vertex )
{
for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
{
int linkAddress = vertex * maxLinksPerVertex + link;
}
}
// At this point we know what links we have for each vertex so we can start batching
// We want a vertex to start with, let's go with 0
int currentVertex = 0;
int linksProcessed = 0;
btAlignedObjectArray <int> verticesToProcess;
while( linksProcessed < linkData.getNumLinks() )
{
// Next wavefront
int nextWavefront = linksForWavefronts.size();
linksForWavefronts.resize( nextWavefront + 1 );
btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
verticesForWavefronts.resize( nextWavefront + 1 );
btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
linksForWavefront.resize(0);
// Loop to find enough links to fill the wavefront
// Stopping if we either run out of links, or fill it
while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
{
// Go through the links for the current vertex
for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
{
int linkAddress = currentVertex * maxLinksPerVertex + link;
int linkIndex = listOfLinksPerVertex[linkAddress];
// If we have not already processed this link, add it to the wavefront
// Claim it as another processed link
// Add the vertex at the far end to the list of vertices to process.
if( !processedLink[linkIndex] )
{
linksForWavefront.push_back( linkIndex );
linksProcessed++;
processedLink[linkIndex] = true;
int v0 = linkData.getVertexPair(linkIndex).vertex0;
int v1 = linkData.getVertexPair(linkIndex).vertex1;
if( v0 == currentVertex )
verticesToProcess.push_back( v1 );
else
verticesToProcess.push_back( v0 );
}
}
if( verticesToProcess.size() > 0 )
{
// Get the element on the front of the queue and remove it
currentVertex = verticesToProcess[0];
removeFromVector( verticesToProcess, 0 );
} else {
// If we've not yet processed all the links, find the first unprocessed one
// and select one of its vertices as the current vertex
if( linksProcessed < linkData.getNumLinks() )
{
int searchLink = 0;
while( processedLink[searchLink] )
searchLink++;
currentVertex = linkData.getVertexPair(searchLink).vertex0;
}
}
}
// We have either finished or filled a wavefront
for( int link = 0; link < linksForWavefront.size(); ++link )
{
int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
insertUniqueAndOrderedIntoVector( vertexSet, v0 );
insertUniqueAndOrderedIntoVector( vertexSet, v1 );
}
// Iterate over links mapped to the wave and batch those
// We can run a batch on each cycle trivially
batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
for( int link = 0; link < linksForWavefront.size(); ++link )
{
int linkIndex = linksForWavefront[link];
btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
int batch = 0;
bool placed = false;
while( batch < batchesWithinWave.size() && !placed )
{
bool foundSharedVertex = false;
if( batchesWithinWave[batch].size() >= wavefrontSize )
{
// If we have already filled this batch, move on to another
foundSharedVertex = true;
} else {
for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
{
btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
if( vertices.vertex0 == vertices2.vertex0 ||
vertices.vertex1 == vertices2.vertex0 ||
vertices.vertex0 == vertices2.vertex1 ||
vertices.vertex1 == vertices2.vertex1 )
{
foundSharedVertex = true;
break;
}
}
}
if( !foundSharedVertex )
{
batchesWithinWave[batch].push_back( linkIndex );
placed = true;
} else {
++batch;
}
}
if( batch == batchesWithinWave.size() && !placed )
{
batchesWithinWave.resize( batch + 1 );
batchesWithinWave[batch].push_back( linkIndex );
}
}
}
}
void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
{
btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
// Group the links into wavefronts
computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
// Batch the wavefronts
generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
m_numWavefronts = linksForWavefronts.size();
// At this point we have a description of which links we need to process in each wavefront
// First correctly fill the batch ranges vector
int numBatches = wavefrontBatches.size();
m_wavefrontBatchStartLengths.resize(0);
int prefixSum = 0;
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
{
int wavesInBatch = wavefrontBatches[batchIndex].size();
int nextPrefixSum = prefixSum + wavesInBatch;
m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
prefixSum += wavesInBatch;
}
// Also find max number of batches within a wave
m_maxBatchesWithinWave = 0;
m_maxVerticesWithinWave = 0;
m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
{
// See if the number of batches in this wave is greater than the current maxium
int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
}
// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
// Now we know the maximum number of vertices per-wave we can resize the global vertices array
m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
// Grab backup copies of all the link data arrays for the sorting process
btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair> m_links_Backup(m_links);
btAlignedObjectArray<float> m_linkStrength_Backup(m_linkStrength);
btAlignedObjectArray<float> m_linksMassLSC_Backup(m_linksMassLSC);
btAlignedObjectArray<float> m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
//btAlignedObjectArray<Vectormath::Aos::Vector3> m_linksCLength_Backup(m_linksCLength);
//btAlignedObjectArray<float> m_linksLengthRatio_Backup(m_linksLengthRatio);
btAlignedObjectArray<float> m_linksRestLength_Backup(m_linksRestLength);
btAlignedObjectArray<float> m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
// Then re-order links into wavefront blocks
// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
int wavefrontCount = 0;
// Iterate over batches of wavefronts, then wavefronts in the batch
for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
{
btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
int wavefrontsInBatch = batch.size();
for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
{
int originalWavefrontIndex = batch[wavefrontIndex];
btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
int verticesUsedByWavefront = wavefrontVertices.size();
// Copy the set of vertices into the correctly structured array for use on the device
// Fill the non-vertices with -1s
// so we can mask out those reads
for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
{
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
}
for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
{
m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
}
// Obtain the set of batches within the current wavefront
btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
// Set the size of the batches for use in the solver, correctly ordered
NumBatchesVerticesPair batchesAndVertices;
batchesAndVertices.numBatches = batchesWithinWavefront.size();
batchesAndVertices.numVertices = verticesUsedByWavefront;
m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
// Now iterate over batches within the wavefront to structure the links correctly
for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
{
btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
int wavefrontBatchSize = linksInBatch.size();
int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
{
int originalLinkAddress = linksInBatch[linkIndex];
// Reorder simple arrays trivially
m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
// The local address is more complicated. We need to work out where a given vertex will end up
// by searching the set of vertices for this link and using the index as the local address
btSoftBodyLinkData::LinkNodePair localPair;
btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
}
for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
{
// Put 0s into these arrays for padding for cleanliness
m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
// For local addresses of junk data choose a set of addresses just above the range of valid ones
// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
// off the end of that range so we need no control
btSoftBodyLinkData::LinkNodePair localPair;
localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
}
}
wavefrontCount++;
}
}
} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()