Apply a patch that updates vertex position on GPU for the OpenCL version, by Dongsoo Han (saggita), work-in-progress
Removed the unused OpenCL kernels Add example how to cache binary kernels, see SoftDemo compiled with OpenCL AMD using msvc/vs2008_opencl.bat
This commit is contained in:
@@ -55,7 +55,7 @@ public:
|
||||
virtual bool onAccelerator();
|
||||
virtual bool moveToAccelerator();
|
||||
|
||||
virtual bool moveFromAccelerator();
|
||||
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -308,18 +308,42 @@ bool btSoftBodyVertexDataDX11::moveToAccelerator()
|
||||
return success;
|
||||
}
|
||||
|
||||
bool btSoftBodyVertexDataDX11::moveFromAccelerator()
|
||||
bool btSoftBodyVertexDataDX11::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
|
||||
{
|
||||
bool success = true;
|
||||
success = success && m_dx11ClothIdentifier.moveFromGPU();
|
||||
success = success && m_dx11VertexPosition.moveFromGPU();
|
||||
success = success && m_dx11VertexPreviousPosition.moveFromGPU();
|
||||
success = success && m_dx11VertexVelocity.moveFromGPU();
|
||||
success = success && m_dx11VertexForceAccumulator.moveFromGPU();
|
||||
success = success && m_dx11VertexNormal.moveFromGPU();
|
||||
success = success && m_dx11VertexInverseMass.moveFromGPU();
|
||||
success = success && m_dx11VertexArea.moveFromGPU();
|
||||
success = success && m_dx11VertexTriangleCount.moveFromGPU();
|
||||
|
||||
if (!bCopy)
|
||||
{
|
||||
success = success && m_dx11ClothIdentifier.moveFromGPU();
|
||||
success = success && m_dx11VertexPosition.moveFromGPU();
|
||||
success = success && m_dx11VertexPreviousPosition.moveFromGPU();
|
||||
success = success && m_dx11VertexVelocity.moveFromGPU();
|
||||
success = success && m_dx11VertexForceAccumulator.moveFromGPU();
|
||||
success = success && m_dx11VertexNormal.moveFromGPU();
|
||||
success = success && m_dx11VertexInverseMass.moveFromGPU();
|
||||
success = success && m_dx11VertexArea.moveFromGPU();
|
||||
success = success && m_dx11VertexTriangleCount.moveFromGPU();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bCopyMinimum)
|
||||
{
|
||||
success = success && m_dx11VertexPosition.copyFromGPU();
|
||||
success = success && m_dx11VertexNormal.copyFromGPU();
|
||||
}
|
||||
else
|
||||
{
|
||||
success = success && m_dx11ClothIdentifier.copyFromGPU();
|
||||
success = success && m_dx11VertexPosition.copyFromGPU();
|
||||
success = success && m_dx11VertexPreviousPosition.copyFromGPU();
|
||||
success = success && m_dx11VertexVelocity.copyFromGPU();
|
||||
success = success && m_dx11VertexForceAccumulator.copyFromGPU();
|
||||
success = success && m_dx11VertexNormal.copyFromGPU();
|
||||
success = success && m_dx11VertexInverseMass.copyFromGPU();
|
||||
success = success && m_dx11VertexArea.copyFromGPU();
|
||||
success = success && m_dx11VertexTriangleCount.copyFromGPU();
|
||||
}
|
||||
}
|
||||
|
||||
if( success )
|
||||
m_onGPU = true;
|
||||
@@ -619,10 +643,10 @@ void btDX11SoftBodySolver::releaseKernels()
|
||||
}
|
||||
|
||||
|
||||
void btDX11SoftBodySolver::copyBackToSoftBodies()
|
||||
void btDX11SoftBodySolver::copyBackToSoftBodies(bool bMove)
|
||||
{
|
||||
// Move the vertex data back to the host first
|
||||
m_vertexData.moveFromAccelerator();
|
||||
m_vertexData.moveFromAccelerator(!bMove);
|
||||
|
||||
// Loop over soft bodies, copying all the vertex positions back for each body in turn
|
||||
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
||||
|
||||
@@ -607,7 +607,7 @@ public:
|
||||
|
||||
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
|
||||
|
||||
virtual void copyBackToSoftBodies();
|
||||
virtual void copyBackToSoftBodies(bool bMove = true);
|
||||
|
||||
virtual void solveConstraints( float solverdt );
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@ SET(BulletSoftBodyOpenCLSolvers_HDRS
|
||||
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
|
||||
${BulletSoftBodyOpenCLSolvers_SRCS}
|
||||
${BulletSoftBodyOpenCLSolvers_HDRS}
|
||||
${BulletSoftBodyOpenCLSolvers_OpenCLC}
|
||||
)
|
||||
|
||||
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
|
||||
|
||||
@@ -45,7 +45,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders
|
||||
)
|
||||
|
||||
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
|
||||
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl")
|
||||
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
|
||||
endforeach(f)
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ subject to the following restrictions:
|
||||
#include "../OpenCLC10/UpdatePositions.cl"
|
||||
#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
|
||||
#include "../OpenCLC10/VSolveLinks.cl"
|
||||
#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
|
||||
//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
|
||||
|
||||
|
||||
@@ -39,7 +40,7 @@ MINICL_REGISTER(ApplyForcesKernel)
|
||||
MINICL_REGISTER(ResetNormalsAndAreasKernel)
|
||||
MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
|
||||
MINICL_REGISTER(UpdateSoftBodiesKernel)
|
||||
|
||||
MINICL_REGISTER(UpdateFixedVertexPositions)
|
||||
|
||||
float mydot3a(float4 a, float4 b)
|
||||
{
|
||||
|
||||
@@ -44,7 +44,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders
|
||||
)
|
||||
|
||||
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
|
||||
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl")
|
||||
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
|
||||
endforeach(f)
|
||||
|
||||
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
/*#define float3 float4
|
||||
float dot3(float3 a, float3 b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}*/
|
||||
|
||||
float3 projectOnAxis( float3 v, float3 a )
|
||||
{
|
||||
return (a*dot(v, a));
|
||||
}
|
||||
|
||||
__kernel void
|
||||
ApplyForcesKernel(
|
||||
const uint numNodes,
|
||||
const float solverdt,
|
||||
const float epsilon,
|
||||
__global int * g_vertexClothIdentifier,
|
||||
__global float4 * g_vertexNormal,
|
||||
__global float * g_vertexArea,
|
||||
__global float * g_vertexInverseMass,
|
||||
__global float * g_clothLiftFactor,
|
||||
__global float * g_clothDragFactor,
|
||||
__global float4 * g_clothWindVelocity,
|
||||
__global float4 * g_clothAcceleration,
|
||||
__global float * g_clothMediumDensity,
|
||||
__global float4 * g_vertexForceAccumulator,
|
||||
__global float4 * g_vertexVelocity)
|
||||
{
|
||||
unsigned int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
int clothId = g_vertexClothIdentifier[nodeID];
|
||||
float nodeIM = g_vertexInverseMass[nodeID];
|
||||
|
||||
if( nodeIM > 0.0f )
|
||||
{
|
||||
float3 nodeV = g_vertexVelocity[nodeID].xyz;
|
||||
float3 normal = g_vertexNormal[nodeID].xyz;
|
||||
float area = g_vertexArea[nodeID];
|
||||
float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
|
||||
|
||||
// Read per-cloth values
|
||||
float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
|
||||
float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
|
||||
float liftFactor = g_clothLiftFactor[clothId];
|
||||
float dragFactor = g_clothDragFactor[clothId];
|
||||
float mediumDensity = g_clothMediumDensity[clothId];
|
||||
|
||||
// Apply the acceleration to the cloth rather than do this via a force
|
||||
nodeV += (clothAcceleration*solverdt);
|
||||
|
||||
g_vertexVelocity[nodeID] = (float4)(nodeV, 0.f);
|
||||
|
||||
float3 relativeWindVelocity = nodeV - clothWindVelocity;
|
||||
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
|
||||
|
||||
if( relativeSpeedSquared > epsilon )
|
||||
{
|
||||
// Correct direction of normal relative to wind direction and get dot product
|
||||
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
|
||||
float dvNormal = dot(normal, relativeWindVelocity);
|
||||
if( dvNormal > 0 )
|
||||
{
|
||||
float3 force = (float3)(0.f, 0.f, 0.f);
|
||||
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
|
||||
float c1 = c0 * mediumDensity;
|
||||
force += normal * (-c1 * liftFactor);
|
||||
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
|
||||
|
||||
float dtim = solverdt * nodeIM;
|
||||
float3 forceDTIM = force * dtim;
|
||||
|
||||
float3 nodeFPlusForce = nodeF + force;
|
||||
|
||||
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
|
||||
float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
|
||||
|
||||
nodeF = nodeFPlusForce;
|
||||
if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
|
||||
nodeF = nodeFMinus;
|
||||
|
||||
g_vertexForceAccumulator[nodeID] = (float4)(nodeF, 0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,80 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable \n
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
ComputeBoundsKernel(
|
||||
int numNodes,
|
||||
int numSoftBodies,
|
||||
__global int * g_vertexClothIdentifier,
|
||||
__global float4 * g_vertexPositions,
|
||||
volatile __global uint * g_clothMinBounds,
|
||||
volatile __global uint * g_clothMaxBounds,
|
||||
volatile __local uint * clothMinBounds,
|
||||
volatile __local uint * clothMaxBounds)
|
||||
{
|
||||
// Init min and max bounds arrays
|
||||
if( get_local_id(0) < numSoftBodies )
|
||||
{
|
||||
|
||||
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
|
||||
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
|
||||
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
|
||||
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
|
||||
clothMaxBounds[get_local_id(0)*4] = 0;
|
||||
clothMaxBounds[get_local_id(0)*4+1] = 0;
|
||||
clothMaxBounds[get_local_id(0)*4+2] = 0;
|
||||
clothMaxBounds[get_local_id(0)*4+3] = 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)];
|
||||
if( clothIdentifier >= 0 )
|
||||
{
|
||||
float3 position = g_vertexPositions[get_global_id(0)].xyz;
|
||||
|
||||
/* Reinterpret position as uint */
|
||||
uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z));
|
||||
|
||||
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
|
||||
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
|
||||
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
|
||||
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
|
||||
|
||||
/* Min/max with the LDS values */
|
||||
atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
|
||||
atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||
atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||
|
||||
atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
|
||||
atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
|
||||
atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
|
||||
/* Use global atomics to update the global versions of the data*/
|
||||
if( get_local_id(0) < numSoftBodies )
|
||||
{
|
||||
/*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
|
||||
atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
|
||||
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
|
||||
atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
|
||||
|
||||
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
|
||||
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
|
||||
atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
);
|
||||
@@ -1,35 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
// Node indices for each link
|
||||
|
||||
//#define float3 float4
|
||||
|
||||
__kernel void
|
||||
IntegrateKernel(
|
||||
const int numNodes,
|
||||
const float solverdt,
|
||||
__global float * g_vertexInverseMasses,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float4 * g_vertexVelocity,
|
||||
__global float4 * g_vertexPreviousPositions,
|
||||
__global float4 * g_vertexForceAccumulator)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float3 position = g_vertexPositions[nodeID].xyz;
|
||||
float3 velocity = g_vertexVelocity[nodeID].xyz;
|
||||
float3 force = g_vertexForceAccumulator[nodeID].xyz;
|
||||
float inverseMass = g_vertexInverseMasses[nodeID];
|
||||
|
||||
g_vertexPreviousPositions[nodeID] = (float4)(position, 0.f);
|
||||
velocity += force * inverseMass * solverdt;
|
||||
position += velocity * solverdt;
|
||||
|
||||
g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
|
||||
g_vertexPositions[nodeID] = (float4)(position, 0.f);
|
||||
g_vertexVelocity[nodeID] = (float4)(velocity, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,46 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
cbuffer OutputToVertexArrayCB : register( b0 )
|
||||
{
|
||||
int startNode;
|
||||
int numNodes;
|
||||
int offsetX;
|
||||
int strideX;
|
||||
|
||||
int offsetN;
|
||||
int strideN;
|
||||
int padding1;
|
||||
int padding2;
|
||||
};
|
||||
|
||||
|
||||
StructuredBuffer<float4> g_nodesx : register( t0 );
|
||||
StructuredBuffer<float4> g_nodesn : register( t1 );
|
||||
|
||||
RWStructuredBuffer<float> g_vertexBuffer : register( u0 );
|
||||
|
||||
|
||||
[numthreads(128, 1, 1)]
|
||||
void
|
||||
OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
|
||||
{
|
||||
int nodeID = DTid.x;
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 nodeX = g_nodesx[nodeID + startNode];
|
||||
float4 nodeN = g_nodesn[nodeID + startNode];
|
||||
|
||||
// Stride should account for the float->float4 conversion
|
||||
int positionDestination = nodeID * strideX + offsetX;
|
||||
g_vertexBuffer[positionDestination] = nodeX.x;
|
||||
g_vertexBuffer[positionDestination+1] = nodeX.y;
|
||||
g_vertexBuffer[positionDestination+2] = nodeX.z;
|
||||
|
||||
int normalDestination = nodeID * strideN + offsetN;
|
||||
g_vertexBuffer[normalDestination] = nodeN.x;
|
||||
g_vertexBuffer[normalDestination+1] = nodeN.y;
|
||||
g_vertexBuffer[normalDestination+2] = nodeN.z;
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,34 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
__kernel void
|
||||
PrepareLinksKernel(
|
||||
const int numLinks,
|
||||
__global int2 * g_linksVertexIndices,
|
||||
__global float * g_linksMassLSC,
|
||||
__global float4 * g_nodesPreviousPosition,
|
||||
__global float * g_linksLengthRatio,
|
||||
__global float4 * g_linksCurrentLength)
|
||||
{
|
||||
int linkID = get_global_id(0);
|
||||
if( linkID < numLinks )
|
||||
{
|
||||
int2 nodeIndices = g_linksVertexIndices[linkID];
|
||||
int node0 = nodeIndices.x;
|
||||
int node1 = nodeIndices.y;
|
||||
|
||||
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
|
||||
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
|
||||
|
||||
float massLSC = g_linksMassLSC[linkID];
|
||||
|
||||
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
|
||||
|
||||
float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
|
||||
linkLengthRatio = 1.0f/linkLengthRatio;
|
||||
|
||||
g_linksCurrentLength[linkID] = linkCurrentLength;
|
||||
g_linksLengthRatio[linkID] = linkLengthRatio;
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,195 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int firstObject;
|
||||
int endObject;
|
||||
} CollisionObjectIndices;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 shapeTransform[4]; // column major 4x4 matrix
|
||||
float4 linearVelocity;
|
||||
float4 angularVelocity;
|
||||
|
||||
int softBodyIdentifier;
|
||||
int collisionShapeType;
|
||||
|
||||
|
||||
// Shape information
|
||||
// Compressed from the union
|
||||
float radius;
|
||||
float halfHeight;
|
||||
int upAxis;
|
||||
|
||||
float margin;
|
||||
float friction;
|
||||
|
||||
int padding0;
|
||||
|
||||
} CollisionShapeDescription;
|
||||
|
||||
/* From btBroadphaseProxy.h */
|
||||
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||
|
||||
/* Multiply column-major matrix against vector */
|
||||
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||
{
|
||||
float4 returnVector;
|
||||
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||
returnVector.x = dot(row0, vector);
|
||||
returnVector.y = dot(row1, vector);
|
||||
returnVector.z = dot(row2, vector);
|
||||
returnVector.w = dot(row3, vector);
|
||||
return returnVector;
|
||||
}
|
||||
|
||||
__kernel void
|
||||
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||
const int numNodes,
|
||||
const float isolverdt,
|
||||
__global int *g_vertexClothIdentifier,
|
||||
__global float4 *g_vertexPreviousPositions,
|
||||
__global float * g_perClothFriction,
|
||||
__global float * g_clothDampingFactor,
|
||||
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||
__global float4 * g_vertexForces,
|
||||
__global float4 *g_vertexVelocities,
|
||||
__global float4 *g_vertexPositions)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
|
||||
if( get_global_id(0) < numNodes )
|
||||
{
|
||||
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||
|
||||
// Abort if this is not a valid cloth
|
||||
if( clothIdentifier < 0 )
|
||||
return;
|
||||
|
||||
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||
float3 velocity;
|
||||
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||
float velocityCoefficient = (1.f - dampingFactor);
|
||||
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||
|
||||
if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
|
||||
{
|
||||
velocity = (float3)(15, 0, 0);
|
||||
|
||||
/* We have some possible collisions to deal with */
|
||||
for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
|
||||
{
|
||||
CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
|
||||
float colliderFriction = shapeDescription.friction;
|
||||
|
||||
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||
{
|
||||
/* Colliding with a capsule */
|
||||
|
||||
float capsuleHalfHeight = shapeDescription.halfHeight;
|
||||
float capsuleRadius = shapeDescription.radius;
|
||||
float capsuleMargin = shapeDescription.margin;
|
||||
int capsuleupAxis = shapeDescription.upAxis;
|
||||
|
||||
/* Four columns of worldTransform matrix */
|
||||
float4 worldTransform[4];
|
||||
worldTransform[0] = shapeDescription.shapeTransform[0];
|
||||
worldTransform[1] = shapeDescription.shapeTransform[1];
|
||||
worldTransform[2] = shapeDescription.shapeTransform[2];
|
||||
worldTransform[3] = shapeDescription.shapeTransform[3];
|
||||
|
||||
// Correctly define capsule centerline vector
|
||||
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||
c2.x = -c1.x;
|
||||
c2.y = -c1.y;
|
||||
c2.z = -c1.z;
|
||||
|
||||
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||
float3 segment = (worldC2 - worldC1).xyz;
|
||||
|
||||
/* compute distance of tangent to vertex along line segment in capsule */
|
||||
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||
|
||||
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
|
||||
float distanceFromLine = length(position - closestPoint);
|
||||
float distanceFromC1 = length(worldC1 - position);
|
||||
float distanceFromC2 = length(worldC2 - position);
|
||||
|
||||
/* Final distance from collision, point to push from, direction to push in
|
||||
for impulse force */
|
||||
float dist;
|
||||
float3 normalVector;
|
||||
if( distanceAlongSegment < 0 )
|
||||
{
|
||||
dist = distanceFromC1;
|
||||
normalVector = normalize(position - worldC1).xyz;
|
||||
} else if( distanceAlongSegment > 1.f ) {
|
||||
dist = distanceFromC2;
|
||||
normalVector = normalize(position - worldC2).xyz;
|
||||
} else {
|
||||
dist = distanceFromLine;
|
||||
normalVector = normalize(position - closestPoint).xyz;
|
||||
}
|
||||
|
||||
float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
|
||||
float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
|
||||
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
|
||||
|
||||
float minDistance = capsuleRadius + capsuleMargin;
|
||||
|
||||
/* In case of no collision, this is the value of velocity */
|
||||
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||
|
||||
|
||||
// Check for a collision
|
||||
if( dist < minDistance )
|
||||
{
|
||||
/* Project back to surface along normal */
|
||||
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
|
||||
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||
|
||||
float3 p1 = normalize(cross(normalVector, segment));
|
||||
float3 p2 = normalize(cross(p1, normalVector));
|
||||
/* Full friction is sum of velocities in each direction of plane */
|
||||
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||
|
||||
/* Real friction is peak friction corrected by friction coefficients */
|
||||
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||
|
||||
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||
|
||||
if( approachSpeed <= 0.0f )
|
||||
forceOnVertex -= frictionVector;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Update velocity */
|
||||
float3 difference = position.xyz - previousPosition.xyz;
|
||||
velocity = difference*velocityCoefficient*isolverdt;
|
||||
}
|
||||
|
||||
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||
|
||||
/* Update external force */
|
||||
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
|
||||
|
||||
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,213 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int firstObject;
|
||||
int endObject;
|
||||
} CollisionObjectIndices;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 shapeTransform[4]; /* column major 4x4 matrix */
|
||||
float4 linearVelocity;
|
||||
float4 angularVelocity;
|
||||
|
||||
int softBodyIdentifier;
|
||||
int collisionShapeType;
|
||||
|
||||
|
||||
// Shape information
|
||||
// Compressed from the union
|
||||
float radius;
|
||||
float halfHeight;
|
||||
int upAxis;
|
||||
|
||||
float margin;
|
||||
float friction;
|
||||
|
||||
int padding0;
|
||||
|
||||
} CollisionShapeDescription;
|
||||
|
||||
/* From btBroadphaseProxy.h */
|
||||
__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
|
||||
|
||||
|
||||
/* Multiply column-major matrix against vector */
|
||||
float4 matrixVectorMul( float4 matrix[4], float4 vector )
|
||||
{
|
||||
float4 returnVector;
|
||||
float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
|
||||
float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
|
||||
float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
|
||||
float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
|
||||
returnVector.x = dot(row0, vector);
|
||||
returnVector.y = dot(row1, vector);
|
||||
returnVector.z = dot(row2, vector);
|
||||
returnVector.w = dot(row3, vector);
|
||||
return returnVector;
|
||||
}
|
||||
|
||||
__kernel void
|
||||
SolveCollisionsAndUpdateVelocitiesKernel(
|
||||
const int numNodes,
|
||||
const float isolverdt,
|
||||
__global int *g_vertexClothIdentifier,
|
||||
__global float4 *g_vertexPreviousPositions,
|
||||
__global float * g_perClothFriction,
|
||||
__global float * g_clothDampingFactor,
|
||||
__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
|
||||
__global CollisionShapeDescription * g_collisionObjectDetails,
|
||||
__global float4 * g_vertexForces,
|
||||
__global float4 *g_vertexVelocities,
|
||||
__global float4 *g_vertexPositions,
|
||||
__local CollisionShapeDescription *localCollisionShapes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
|
||||
|
||||
int clothIdentifier = g_vertexClothIdentifier[nodeID];
|
||||
|
||||
// Abort if this is not a valid cloth
|
||||
if( clothIdentifier < 0 )
|
||||
return;
|
||||
|
||||
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
|
||||
float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
|
||||
float3 velocity;
|
||||
float clothFriction = g_perClothFriction[clothIdentifier];
|
||||
float dampingFactor = g_clothDampingFactor[clothIdentifier];
|
||||
float velocityCoefficient = (1.f - dampingFactor);
|
||||
CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
|
||||
|
||||
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||
if( numObjects > 0 )
|
||||
{
|
||||
/* We have some possible collisions to deal with */
|
||||
|
||||
/* First load all of the collision objects into LDS */
|
||||
int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
|
||||
if( get_local_id(0) < numObjects )
|
||||
{
|
||||
localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
|
||||
}
|
||||
}
|
||||
|
||||
/* Safe as the vertices are padded so that not more than one soft body is in a group */
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */
|
||||
if( numObjects > 0 )
|
||||
{
|
||||
velocity = (float3)(0, 0, 0);
|
||||
|
||||
|
||||
// We have some possible collisions to deal with
|
||||
for( int collision = 0; collision < numObjects; ++collision )
|
||||
{
|
||||
CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
|
||||
float colliderFriction = shapeDescription.friction;
|
||||
|
||||
if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
|
||||
{
|
||||
/* Colliding with a capsule */
|
||||
|
||||
float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
|
||||
float capsuleRadius = localCollisionShapes[collision].radius;
|
||||
float capsuleMargin = localCollisionShapes[collision].margin;
|
||||
int capsuleupAxis = localCollisionShapes[collision].upAxis;
|
||||
|
||||
float4 worldTransform[4];
|
||||
worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
|
||||
worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
|
||||
worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
|
||||
worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
|
||||
|
||||
// Correctly define capsule centerline vector
|
||||
float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||
float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
|
||||
c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
|
||||
c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
|
||||
c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
|
||||
c2.x = -c1.x;
|
||||
c2.y = -c1.y;
|
||||
c2.z = -c1.z;
|
||||
|
||||
float4 worldC1 = matrixVectorMul(worldTransform, c1);
|
||||
float4 worldC2 = matrixVectorMul(worldTransform, c2);
|
||||
float3 segment = (worldC2 - worldC1).xyz;
|
||||
|
||||
|
||||
/* compute distance of tangent to vertex along line segment in capsule */
|
||||
float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
|
||||
|
||||
float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
|
||||
float distanceFromLine = length(position - closestPoint);
|
||||
float distanceFromC1 = length(worldC1 - position);
|
||||
float distanceFromC2 = length(worldC2 - position);
|
||||
|
||||
/* Final distance from collision, point to push from, direction to push in
|
||||
for impulse force */
|
||||
float dist;
|
||||
float3 normalVector;
|
||||
if( distanceAlongSegment < 0 )
|
||||
{
|
||||
dist = distanceFromC1;
|
||||
normalVector = normalize(position - worldC1).xyz;
|
||||
} else if( distanceAlongSegment > 1.f ) {
|
||||
dist = distanceFromC2;
|
||||
normalVector = normalize(position - worldC2).xyz;
|
||||
} else {
|
||||
dist = distanceFromLine;
|
||||
normalVector = normalize(position - closestPoint).xyz;
|
||||
}
|
||||
|
||||
float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
|
||||
float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
|
||||
float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
|
||||
|
||||
float minDistance = capsuleRadius + capsuleMargin;
|
||||
|
||||
/* In case of no collision, this is the value of velocity */
|
||||
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||
|
||||
|
||||
/* Check for a collision */
|
||||
if( dist < minDistance )
|
||||
{
|
||||
/* Project back to surface along normal */
|
||||
position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
|
||||
velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
|
||||
float3 relativeVelocity = velocity - velocityOfSurfacePoint;
|
||||
|
||||
float3 p1 = normalize(cross(normalVector, segment));
|
||||
float3 p2 = normalize(cross(p1, normalVector));
|
||||
/* Full friction is sum of velocities in each direction of plane */
|
||||
float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
|
||||
|
||||
/* Real friction is peak friction corrected by friction coefficients */
|
||||
frictionVector = frictionVector * (colliderFriction*clothFriction);
|
||||
|
||||
float approachSpeed = dot(relativeVelocity, normalVector);
|
||||
|
||||
if( approachSpeed <= 0.0f )
|
||||
forceOnVertex -= frictionVector;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Update velocity */
|
||||
float3 difference = position.xyz - previousPosition.xyz;
|
||||
velocity = difference*velocityCoefficient*isolverdt;
|
||||
}
|
||||
|
||||
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||
|
||||
/* Update external force */
|
||||
g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
|
||||
|
||||
g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,55 +0,0 @@
|
||||
|
||||
MSTRINGIFY(
|
||||
|
||||
/*#define float3 float4
|
||||
|
||||
float dot3(float3 a, float3 b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}*/
|
||||
|
||||
__kernel void
|
||||
SolvePositionsFromLinksKernel(
|
||||
const int startLink,
|
||||
const int numLinks,
|
||||
const float kst,
|
||||
const float ti,
|
||||
__global int2 * g_linksVertexIndices,
|
||||
__global float * g_linksMassLSC,
|
||||
__global float * g_linksRestLengthSquared,
|
||||
__global float * g_verticesInverseMass,
|
||||
__global float4 * g_vertexPositions)
|
||||
|
||||
{
|
||||
int linkID = get_global_id(0) + startLink;
|
||||
if( get_global_id(0) < numLinks )
|
||||
{
|
||||
float massLSC = g_linksMassLSC[linkID];
|
||||
float restLengthSquared = g_linksRestLengthSquared[linkID];
|
||||
|
||||
if( massLSC > 0.0f )
|
||||
{
|
||||
int2 nodeIndices = g_linksVertexIndices[linkID];
|
||||
int node0 = nodeIndices.x;
|
||||
int node1 = nodeIndices.y;
|
||||
|
||||
float3 position0 = g_vertexPositions[node0].xyz;
|
||||
float3 position1 = g_vertexPositions[node1].xyz;
|
||||
|
||||
float inverseMass0 = g_verticesInverseMass[node0];
|
||||
float inverseMass1 = g_verticesInverseMass[node1];
|
||||
|
||||
float3 del = position1 - position0;
|
||||
float len = dot(del, del);
|
||||
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
|
||||
position0 = position0 - del*(k*inverseMass0);
|
||||
position1 = position1 + del*(k*inverseMass1);
|
||||
|
||||
g_vertexPositions[node0] = (float4)(position0, 0.f);
|
||||
g_vertexPositions[node1] = (float4)(position1, 0.f);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,129 +0,0 @@
|
||||
/*
|
||||
Bullet Continuous Collision Detection and Physics Library
|
||||
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
MSTRINGIFY(
|
||||
|
||||
float mydot3(float4 a, float4 b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}
|
||||
|
||||
__kernel void
|
||||
SolvePositionsFromLinksKernel(
|
||||
const int startWaveInBatch,
|
||||
const int numWaves,
|
||||
const float kst,
|
||||
const float ti,
|
||||
__global int2 *g_wavefrontBatchCountsVertexCounts,
|
||||
__global int *g_vertexAddressesPerWavefront,
|
||||
__global int2 * g_linksVertexIndices,
|
||||
__global float * g_linksMassLSC,
|
||||
__global float * g_linksRestLengthSquared,
|
||||
__global float * g_verticesInverseMass,
|
||||
__global float4 * g_vertexPositions,
|
||||
__local int2 *wavefrontBatchCountsVertexCounts,
|
||||
__local float4 *vertexPositionSharedData,
|
||||
__local float *vertexInverseMassSharedData)
|
||||
{
|
||||
const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
|
||||
const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
|
||||
const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
|
||||
const int localWavefront = wavefront - firstWavefrontInBlock;
|
||||
|
||||
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
|
||||
if( wavefront < (startWaveInBatch + numWaves) )
|
||||
{
|
||||
// Load the batch counts for the wavefronts
|
||||
|
||||
int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
|
||||
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
|
||||
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
|
||||
|
||||
// Load the vertices for the wavefronts
|
||||
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||
{
|
||||
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||
|
||||
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
|
||||
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
|
||||
}
|
||||
|
||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Loop through the batches performing the solve on each in LDS
|
||||
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
|
||||
|
||||
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
|
||||
|
||||
int batch = 0;
|
||||
do
|
||||
{
|
||||
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
|
||||
int locationOfValue = baseDataLocation + laneInWavefront;
|
||||
|
||||
|
||||
// These loads should all be perfectly linear across the WF
|
||||
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
|
||||
float massLSC = g_linksMassLSC[locationOfValue];
|
||||
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
|
||||
|
||||
// LDS vertex addresses based on logical wavefront number in block and loaded index
|
||||
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
|
||||
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
|
||||
|
||||
float4 position0 = vertexPositionSharedData[vertexAddress0];
|
||||
float4 position1 = vertexPositionSharedData[vertexAddress1];
|
||||
|
||||
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
|
||||
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
|
||||
|
||||
float4 del = position1 - position0;
|
||||
float len = mydot3(del, del);
|
||||
|
||||
float k = 0;
|
||||
if( massLSC > 0.0f )
|
||||
{
|
||||
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
|
||||
}
|
||||
|
||||
position0 = position0 - del*(k*inverseMass0);
|
||||
position1 = position1 + del*(k*inverseMass1);
|
||||
|
||||
// Ensure compiler does not re-order memory operations
|
||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
vertexPositionSharedData[vertexAddress0] = position0;
|
||||
vertexPositionSharedData[vertexAddress1] = position1;
|
||||
|
||||
// Ensure compiler does not re-order memory operations
|
||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
++batch;
|
||||
} while( batch < batchesWithinWavefront );
|
||||
|
||||
// Update the global memory vertices for the wavefronts
|
||||
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
|
||||
{
|
||||
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
|
||||
|
||||
g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,44 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
/*#define float3 float4
|
||||
|
||||
float dot3(float3 a, float3 b)
|
||||
{
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}*/
|
||||
|
||||
__kernel void
|
||||
UpdateConstantsKernel(
|
||||
const int numLinks,
|
||||
__global int2 * g_linksVertexIndices,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float * g_vertexInverseMasses,
|
||||
__global float * g_linksMaterialLSC,
|
||||
__global float * g_linksMassLSC,
|
||||
__global float * g_linksRestLengthSquared,
|
||||
__global float * g_linksRestLengths)
|
||||
{
|
||||
int linkID = get_global_id(0);
|
||||
if( linkID < numLinks )
|
||||
{
|
||||
int2 nodeIndices = g_linksVertexIndices[linkID];
|
||||
int node0 = nodeIndices.x;
|
||||
int node1 = nodeIndices.y;
|
||||
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
|
||||
|
||||
float3 position0 = g_vertexPositions[node0].xyz;
|
||||
float3 position1 = g_vertexPositions[node1].xyz;
|
||||
float inverseMass0 = g_vertexInverseMasses[node0];
|
||||
float inverseMass1 = g_vertexInverseMasses[node1];
|
||||
|
||||
float3 difference = position0 - position1;
|
||||
float length2 = dot(difference, difference);
|
||||
float length = sqrt(length2);
|
||||
|
||||
g_linksRestLengths[linkID] = length;
|
||||
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
|
||||
g_linksRestLengthSquared[linkID] = length*length;
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,40 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
//#define float3 float4
|
||||
|
||||
__kernel void
|
||||
updateVelocitiesFromPositionsWithVelocitiesKernel(
|
||||
int numNodes,
|
||||
float isolverdt,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float4 * g_vertexPreviousPositions,
|
||||
__global int * g_vertexClothIndices,
|
||||
__global float *g_clothVelocityCorrectionCoefficients,
|
||||
__global float * g_clothDampingFactor,
|
||||
__global float4 * g_vertexVelocities,
|
||||
__global float4 * g_vertexForces)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float3 position = g_vertexPositions[nodeID].xyz;
|
||||
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
|
||||
float3 velocity = g_vertexVelocities[nodeID].xyz;
|
||||
int clothIndex = g_vertexClothIndices[nodeID];
|
||||
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
|
||||
float dampingFactor = g_clothDampingFactor[clothIndex];
|
||||
float velocityCoefficient = (1.f - dampingFactor);
|
||||
|
||||
float3 difference = position - previousPosition;
|
||||
|
||||
velocity += difference*velocityCorrectionCoefficient*isolverdt;
|
||||
|
||||
// Damp the velocity
|
||||
velocity *= velocityCoefficient;
|
||||
|
||||
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,103 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
//#define float3 float4
|
||||
|
||||
/*float length3(float3 a)
|
||||
{
|
||||
a.w = 0;
|
||||
return length(a);
|
||||
}
|
||||
|
||||
float normalize3(float3 a)
|
||||
{
|
||||
a.w = 0;
|
||||
return normalize(a);
|
||||
}*/
|
||||
|
||||
__kernel void
|
||||
ResetNormalsAndAreasKernel(
|
||||
const unsigned int numNodes,
|
||||
__global float4 * g_vertexNormals,
|
||||
__global float * g_vertexArea)
|
||||
{
|
||||
if( get_global_id(0) < numNodes )
|
||||
{
|
||||
g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
|
||||
g_vertexArea[get_global_id(0)] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void
|
||||
UpdateSoftBodiesKernel(
|
||||
const unsigned int startFace,
|
||||
const unsigned int numFaces,
|
||||
__global int4 * g_triangleVertexIndexSet,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float4 * g_vertexNormals,
|
||||
__global float * g_vertexArea,
|
||||
__global float4 * g_triangleNormals,
|
||||
__global float * g_triangleArea)
|
||||
{
|
||||
int faceID = get_global_id(0) + startFace;
|
||||
if( get_global_id(0) < numFaces )
|
||||
{
|
||||
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
|
||||
int nodeIndex0 = triangleIndexSet.x;
|
||||
int nodeIndex1 = triangleIndexSet.y;
|
||||
int nodeIndex2 = triangleIndexSet.z;
|
||||
|
||||
float3 node0 = g_vertexPositions[nodeIndex0].xyz;
|
||||
float3 node1 = g_vertexPositions[nodeIndex1].xyz;
|
||||
float3 node2 = g_vertexPositions[nodeIndex2].xyz;
|
||||
float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
|
||||
float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
|
||||
float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
|
||||
float vertexArea0 = g_vertexArea[nodeIndex0];
|
||||
float vertexArea1 = g_vertexArea[nodeIndex1];
|
||||
float vertexArea2 = g_vertexArea[nodeIndex2];
|
||||
|
||||
float3 vector0 = node1 - node0;
|
||||
float3 vector1 = node2 - node0;
|
||||
|
||||
float3 faceNormal = cross(vector0.xyz, vector1.xyz);
|
||||
float triangleArea = length(faceNormal);
|
||||
|
||||
nodeNormal0 = nodeNormal0 + faceNormal;
|
||||
nodeNormal1 = nodeNormal1 + faceNormal;
|
||||
nodeNormal2 = nodeNormal2 + faceNormal;
|
||||
vertexArea0 = vertexArea0 + triangleArea;
|
||||
vertexArea1 = vertexArea1 + triangleArea;
|
||||
vertexArea2 = vertexArea2 + triangleArea;
|
||||
|
||||
g_triangleNormals[faceID] = (float4)(normalize(faceNormal), 0.f);
|
||||
g_vertexNormals[nodeIndex0] = (float4)(nodeNormal0, 0.f);
|
||||
g_vertexNormals[nodeIndex1] = (float4)(nodeNormal1, 0.f);
|
||||
g_vertexNormals[nodeIndex2] = (float4)(nodeNormal2, 0.f);
|
||||
g_triangleArea[faceID] = triangleArea;
|
||||
g_vertexArea[nodeIndex0] = vertexArea0;
|
||||
g_vertexArea[nodeIndex1] = vertexArea1;
|
||||
g_vertexArea[nodeIndex2] = vertexArea2;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void
|
||||
NormalizeNormalsAndAreasKernel(
|
||||
const unsigned int numNodes,
|
||||
__global int * g_vertexTriangleCount,
|
||||
__global float4 * g_vertexNormals,
|
||||
__global float * g_vertexArea)
|
||||
{
|
||||
if( get_global_id(0) < numNodes )
|
||||
{
|
||||
float4 normal = g_vertexNormals[get_global_id(0)];
|
||||
float area = g_vertexArea[get_global_id(0)];
|
||||
int numTriangles = g_vertexTriangleCount[get_global_id(0)];
|
||||
|
||||
float vectorLength = length(normal);
|
||||
|
||||
g_vertexNormals[get_global_id(0)] = normalize(normal);
|
||||
g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,36 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
//#define float3 float4
|
||||
|
||||
__kernel void
|
||||
updateVelocitiesFromPositionsWithoutVelocitiesKernel(
|
||||
const int numNodes,
|
||||
const float isolverdt,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float4 * g_vertexPreviousPositions,
|
||||
__global int * g_vertexClothIndices,
|
||||
__global float * g_clothDampingFactor,
|
||||
__global float4 * g_vertexVelocities,
|
||||
__global float4 * g_vertexForces)
|
||||
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float3 position = g_vertexPositions[nodeID].xyz;
|
||||
float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
|
||||
float3 velocity = g_vertexVelocities[nodeID].xyz;
|
||||
int clothIndex = g_vertexClothIndices[nodeID];
|
||||
float dampingFactor = g_clothDampingFactor[clothIndex];
|
||||
float velocityCoefficient = (1.f - dampingFactor);
|
||||
|
||||
float3 difference = position - previousPosition;
|
||||
|
||||
velocity = difference*velocityCoefficient*isolverdt;
|
||||
|
||||
g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
|
||||
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,26 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
//#define float3 float4
|
||||
|
||||
__kernel void
|
||||
UpdatePositionsFromVelocitiesKernel(
|
||||
const int numNodes,
|
||||
const float solverSDT,
|
||||
__global float4 * g_vertexVelocities,
|
||||
__global float4 * g_vertexPreviousPositions,
|
||||
__global float4 * g_vertexCurrentPosition)
|
||||
{
|
||||
int vertexID = get_global_id(0);
|
||||
if( vertexID < numNodes )
|
||||
{
|
||||
float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
|
||||
float3 velocity = g_vertexVelocities[vertexID].xyz;
|
||||
|
||||
float3 newPosition = previousPosition + velocity*solverSDT;
|
||||
|
||||
g_vertexCurrentPosition[vertexID] = (float4)(newPosition, 0.f);
|
||||
g_vertexPreviousPositions[vertexID] = (float4)(newPosition, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -1,45 +0,0 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
__kernel void
|
||||
VSolveLinksKernel(
|
||||
int startLink,
|
||||
int numLinks,
|
||||
float kst,
|
||||
__global int2 * g_linksVertexIndices,
|
||||
__global float * g_linksLengthRatio,
|
||||
__global float4 * g_linksCurrentLength,
|
||||
__global float * g_vertexInverseMass,
|
||||
__global float4 * g_vertexVelocity)
|
||||
{
|
||||
int linkID = get_global_id(0) + startLink;
|
||||
if( get_global_id(0) < numLinks )
|
||||
{
|
||||
int2 nodeIndices = g_linksVertexIndices[linkID];
|
||||
int node0 = nodeIndices.x;
|
||||
int node1 = nodeIndices.y;
|
||||
|
||||
float linkLengthRatio = g_linksLengthRatio[linkID];
|
||||
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
|
||||
|
||||
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
|
||||
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
|
||||
|
||||
float vertexInverseMass0 = g_vertexInverseMass[node0];
|
||||
float vertexInverseMass1 = g_vertexInverseMass[node1];
|
||||
|
||||
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
|
||||
float dotResult = dot(linkCurrentLength, nodeDifference);
|
||||
float j = -dotResult*linkLengthRatio*kst;
|
||||
|
||||
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
|
||||
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
|
||||
|
||||
vertexVelocity0 += velocityChange0;
|
||||
vertexVelocity1 -= velocityChange1;
|
||||
|
||||
g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
|
||||
g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -6,6 +6,18 @@ float adot3(float4 a, float4 b)
|
||||
return a.x*b.x + a.y*b.y + a.z*b.z;
|
||||
}
|
||||
|
||||
float alength3(float4 a)
|
||||
{
|
||||
a.w = 0;
|
||||
return length(a);
|
||||
}
|
||||
|
||||
float4 anormalize3(float4 a)
|
||||
{
|
||||
a.w = 0;
|
||||
return normalize(a);
|
||||
}
|
||||
|
||||
float4 projectOnAxis( float4 v, float4 a )
|
||||
{
|
||||
return (a*adot3(v, a));
|
||||
@@ -53,36 +65,35 @@ ApplyForcesKernel(
|
||||
|
||||
g_vertexVelocity[nodeID] = nodeV;
|
||||
|
||||
float4 relativeWindVelocity = nodeV - clothWindVelocity;
|
||||
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
|
||||
// Aerodynamics
|
||||
float4 rel_v = nodeV - clothWindVelocity;
|
||||
float rel_v_len = alength3(rel_v);
|
||||
float rel_v2 = dot(rel_v, rel_v);
|
||||
|
||||
if( relativeSpeedSquared > epsilon )
|
||||
if( rel_v2 > epsilon )
|
||||
{
|
||||
// Correct direction of normal relative to wind direction and get dot product
|
||||
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
|
||||
float dvNormal = dot(normal, relativeWindVelocity);
|
||||
if( dvNormal > 0 )
|
||||
{
|
||||
float4 force = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
|
||||
float c1 = c0 * mediumDensity;
|
||||
force += normal * (-c1 * liftFactor);
|
||||
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
|
||||
float4 rel_v_nrm = anormalize3(rel_v);
|
||||
float4 nrm = normal;
|
||||
|
||||
float dtim = solverdt * nodeIM;
|
||||
float4 forceDTIM = force * dtim;
|
||||
nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
|
||||
|
||||
float4 nodeFPlusForce = nodeF + force;
|
||||
float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||
float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
|
||||
|
||||
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
|
||||
float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
|
||||
float n_dot_v = dot(nrm, rel_v_nrm);
|
||||
|
||||
nodeF = nodeFPlusForce;
|
||||
//if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
|
||||
// nodeF = nodeFMinus;
|
||||
// drag force
|
||||
if ( dragFactor > 0.f )
|
||||
fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
|
||||
|
||||
// lift force
|
||||
// Check angle of attack
|
||||
// cos(10<EFBFBD>) = 0.98480
|
||||
if ( 0 < n_dot_v && n_dot_v < 0.98480f)
|
||||
fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
|
||||
|
||||
nodeF += fDrag + fLift;
|
||||
g_vertexForceAccumulator[nodeID] = nodeF;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
__kernel void
|
||||
UpdateFixedVertexPositions(
|
||||
const uint numNodes,
|
||||
__global int * g_anchorIndex,
|
||||
__global float4 * g_vertexPositions,
|
||||
__global float4 * g_anchorPositions GUID_ARG)
|
||||
{
|
||||
unsigned int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
int anchorIndex = g_anchorIndex[nodeID];
|
||||
float4 position = g_vertexPositions[nodeID];
|
||||
|
||||
if ( anchorIndex >= 0 )
|
||||
{
|
||||
float4 anchorPosition = g_anchorPositions[anchorIndex];
|
||||
g_vertexPositions[nodeID] = anchorPosition;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -5,17 +5,10 @@
|
||||
#include "btSoftBodySolverVertexBuffer_OpenGL.h"
|
||||
#include "BulletSoftBody/btSoftBody.h"
|
||||
|
||||
#if (0)//CL_VERSION_1_1 == 1)
|
||||
//OpenCL 1.1 kernels use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static char* OutputToVertexArrayCLString =
|
||||
#include "OpenCLC/OutputToVertexArray.cl"
|
||||
#else
|
||||
////OpenCL 1.0 kernels don't use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static char* OutputToVertexArrayCLString =
|
||||
#include "OpenCLC10/OutputToVertexArray.cl"
|
||||
#endif //CL_VERSION_1_1
|
||||
|
||||
|
||||
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
|
||||
@@ -105,8 +98,8 @@ bool btSoftBodySolverOutputCLtoGL::buildShaders()
|
||||
if( m_shadersInitialized )
|
||||
return true;
|
||||
|
||||
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" );
|
||||
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" );
|
||||
outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
|
||||
outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
|
||||
|
||||
|
||||
if( returnVal )
|
||||
|
||||
@@ -45,7 +45,7 @@ public:
|
||||
|
||||
virtual bool moveToAccelerator();
|
||||
|
||||
virtual bool moveFromAccelerator();
|
||||
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -20,10 +20,13 @@ subject to the following restrictions:
|
||||
#include "btSoftBodySolver_OpenCL.h"
|
||||
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
|
||||
#include "BulletSoftBody/btSoftBody.h"
|
||||
#include "BulletSoftBody/btSoftBodyInternals.h"
|
||||
#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
|
||||
#include "BulletCollision/COllisionShapes/btSphereShape.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include <limits.h>
|
||||
|
||||
|
||||
#define BT_SUPPRESS_OPENCL_ASSERTS
|
||||
|
||||
#ifdef USE_MINICL
|
||||
@@ -36,8 +39,7 @@ subject to the following restrictions:
|
||||
#endif //__APPLE__
|
||||
#endif//USE_MINICL
|
||||
|
||||
#define BT_DEFAULT_WORKGROUPSIZE 128
|
||||
|
||||
#define BT_DEFAULT_WORKGROUPSIZE 64
|
||||
|
||||
|
||||
#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
|
||||
@@ -45,32 +47,6 @@ subject to the following restrictions:
|
||||
|
||||
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
|
||||
|
||||
#if (0)//CL_VERSION_1_1 == 1)
|
||||
//OpenCL 1.1 kernels use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* PrepareLinksCLString =
|
||||
#include "OpenCLC/PrepareLinks.cl"
|
||||
static const char* UpdatePositionsFromVelocitiesCLString =
|
||||
#include "OpenCLC/UpdatePositionsFromVelocities.cl"
|
||||
static const char* SolvePositionsCLString =
|
||||
#include "OpenCLC/SolvePositions.cl"
|
||||
static const char* UpdateNodesCLString =
|
||||
#include "OpenCLC/UpdateNodes.cl"
|
||||
static const char* UpdatePositionsCLString =
|
||||
#include "OpenCLC/UpdatePositions.cl"
|
||||
static const char* UpdateConstantsCLString =
|
||||
#include "OpenCLC/UpdateConstants.cl"
|
||||
static const char* IntegrateCLString =
|
||||
#include "OpenCLC/Integrate.cl"
|
||||
static const char* ApplyForcesCLString =
|
||||
#include "OpenCLC/ApplyForces.cl"
|
||||
static const char* UpdateNormalsCLString =
|
||||
#include "OpenCLC/UpdateNormals.cl"
|
||||
static const char* VSolveLinksCLString =
|
||||
#include "OpenCLC/VSolveLinks.cl"
|
||||
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
|
||||
#include "OpenCLC/SolveCollisionsAndUpdateVelocities.cl"
|
||||
#else
|
||||
////OpenCL 1.0 kernels don't use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* PrepareLinksCLString =
|
||||
@@ -89,13 +65,14 @@ static const char* IntegrateCLString =
|
||||
#include "OpenCLC10/Integrate.cl"
|
||||
static const char* ApplyForcesCLString =
|
||||
#include "OpenCLC10/ApplyForces.cl"
|
||||
static const char* UpdateFixedVertexPositionsCLString =
|
||||
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
|
||||
static const char* UpdateNormalsCLString =
|
||||
#include "OpenCLC10/UpdateNormals.cl"
|
||||
static const char* VSolveLinksCLString =
|
||||
#include "OpenCLC10/VSolveLinks.cl"
|
||||
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
|
||||
#include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
|
||||
#endif //CL_VERSION_1_1
|
||||
|
||||
|
||||
btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) :
|
||||
@@ -141,18 +118,42 @@ bool btSoftBodyVertexDataOpenCL::moveToAccelerator()
|
||||
return success;
|
||||
}
|
||||
|
||||
bool btSoftBodyVertexDataOpenCL::moveFromAccelerator()
|
||||
bool btSoftBodyVertexDataOpenCL::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
|
||||
{
|
||||
bool success = true;
|
||||
success = success && m_clClothIdentifier.moveFromGPU();
|
||||
success = success && m_clVertexPosition.moveFromGPU();
|
||||
success = success && m_clVertexPreviousPosition.moveFromGPU();
|
||||
success = success && m_clVertexVelocity.moveFromGPU();
|
||||
success = success && m_clVertexForceAccumulator.moveFromGPU();
|
||||
success = success && m_clVertexNormal.moveFromGPU();
|
||||
success = success && m_clVertexInverseMass.moveFromGPU();
|
||||
success = success && m_clVertexArea.moveFromGPU();
|
||||
success = success && m_clVertexTriangleCount.moveFromGPU();
|
||||
|
||||
if (!bCopy)
|
||||
{
|
||||
success = success && m_clClothIdentifier.moveFromGPU();
|
||||
success = success && m_clVertexPosition.moveFromGPU();
|
||||
success = success && m_clVertexPreviousPosition.moveFromGPU();
|
||||
success = success && m_clVertexVelocity.moveFromGPU();
|
||||
success = success && m_clVertexForceAccumulator.moveFromGPU();
|
||||
success = success && m_clVertexNormal.moveFromGPU();
|
||||
success = success && m_clVertexInverseMass.moveFromGPU();
|
||||
success = success && m_clVertexArea.moveFromGPU();
|
||||
success = success && m_clVertexTriangleCount.moveFromGPU();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bCopyMinimum)
|
||||
{
|
||||
success = success && m_clVertexPosition.copyFromGPU();
|
||||
success = success && m_clVertexNormal.copyFromGPU();
|
||||
}
|
||||
else
|
||||
{
|
||||
success = success && m_clClothIdentifier.copyFromGPU();
|
||||
success = success && m_clVertexPosition.copyFromGPU();
|
||||
success = success && m_clVertexPreviousPosition.copyFromGPU();
|
||||
success = success && m_clVertexVelocity.copyFromGPU();
|
||||
success = success && m_clVertexForceAccumulator.copyFromGPU();
|
||||
success = success && m_clVertexNormal.copyFromGPU();
|
||||
success = success && m_clVertexInverseMass.copyFromGPU();
|
||||
success = success && m_clVertexArea.copyFromGPU();
|
||||
success = success && m_clVertexTriangleCount.copyFromGPU();
|
||||
}
|
||||
}
|
||||
|
||||
if( success )
|
||||
m_onGPU = true;
|
||||
@@ -160,9 +161,6 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator()
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx)
|
||||
:m_cqCommandQue(queue),
|
||||
m_clLinks( queue, ctx, &m_links, false ),
|
||||
@@ -602,11 +600,12 @@ void btSoftBodyTriangleDataOpenCL::generateBatches()
|
||||
|
||||
|
||||
|
||||
btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) :
|
||||
btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
|
||||
m_linkData(queue, ctx),
|
||||
m_vertexData(queue, ctx),
|
||||
m_triangleData(queue, ctx),
|
||||
m_clFunctions(queue, ctx),
|
||||
m_defaultCLFunctions(queue, ctx),
|
||||
m_currentCLFunctions(&m_defaultCLFunctions),
|
||||
m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
|
||||
m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
|
||||
m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
|
||||
@@ -617,10 +616,14 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
|
||||
m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ),
|
||||
m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ),
|
||||
m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ),
|
||||
m_clAnchorPosition( queue, ctx, &m_anchorPosition, true ),
|
||||
m_clAnchorIndex( queue, ctx, &m_anchorIndex, true),
|
||||
m_cqCommandQue( queue ),
|
||||
m_cxMainContext(ctx),
|
||||
m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE)
|
||||
m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE),
|
||||
m_bUpdateAnchoredNodePos(bUpdateAchchoredNodePos)
|
||||
{
|
||||
|
||||
// Initial we will clearly need to update solver constants
|
||||
// For now this is global for the cloths linked with this solver - we should probably make this body specific
|
||||
// for performance in future once we understand more clearly when constants need to be updated
|
||||
@@ -643,6 +646,7 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex
|
||||
m_normalizeNormalsAndAreasKernel = 0;
|
||||
m_outputToVertexArrayKernel = 0;
|
||||
m_applyForcesKernel = 0;
|
||||
m_updateFixedVertexPositionsKernel = 0;
|
||||
}
|
||||
|
||||
btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver()
|
||||
@@ -666,14 +670,16 @@ void btOpenCLSoftBodySolver::releaseKernels()
|
||||
RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel );
|
||||
RELEASE_CL_KERNEL( m_outputToVertexArrayKernel );
|
||||
RELEASE_CL_KERNEL( m_applyForcesKernel );
|
||||
RELEASE_CL_KERNEL( m_updateFixedVertexPositionsKernel );
|
||||
|
||||
m_shadersInitialized = false;
|
||||
}
|
||||
|
||||
void btOpenCLSoftBodySolver::copyBackToSoftBodies()
|
||||
void btOpenCLSoftBodySolver::copyBackToSoftBodies(bool bMove)
|
||||
{
|
||||
|
||||
// Move the vertex data back to the host first
|
||||
m_vertexData.moveFromAccelerator();
|
||||
m_vertexData.moveFromAccelerator(!bMove);
|
||||
|
||||
// Loop over soft bodies, copying all the vertex positions back for each body in turn
|
||||
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
||||
@@ -688,15 +694,16 @@ void btOpenCLSoftBodySolver::copyBackToSoftBodies()
|
||||
for( int vertex = 0; vertex < numVertices; ++vertex )
|
||||
{
|
||||
using Vectormath::Aos::Point3;
|
||||
Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] );
|
||||
Point3 vertexPosition( m_vertexData.getVertexPositions()[firstVertex + vertex] );
|
||||
Point3 normal(m_vertexData.getNormal(firstVertex + vertex));
|
||||
|
||||
softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
|
||||
softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
|
||||
softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
|
||||
|
||||
softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() );
|
||||
softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() );
|
||||
softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() );
|
||||
softBody->m_nodes[vertex].m_n.setX( normal.getX() );
|
||||
softBody->m_nodes[vertex].m_n.setY( normal.getY() );
|
||||
softBody->m_nodes[vertex].m_n.setZ( normal.getZ() );
|
||||
}
|
||||
}
|
||||
} // btOpenCLSoftBodySolver::copyBackToSoftBodies
|
||||
@@ -710,7 +717,10 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
|
||||
getTriangleData().clear();
|
||||
getLinkData().clear();
|
||||
m_softBodySet.resize(0);
|
||||
m_anchorIndex.clear();
|
||||
|
||||
int maxPiterations = 0;
|
||||
int maxViterations = 0;
|
||||
|
||||
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
|
||||
{
|
||||
@@ -759,6 +769,8 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
|
||||
float vertexInverseMass = softBody->m_nodes[vertex].m_im;
|
||||
desc.setInverseMass(vertexInverseMass);
|
||||
getVertexData().setVertexAt( desc, firstVertex + vertex );
|
||||
|
||||
m_anchorIndex.push_back(-1.0);
|
||||
}
|
||||
|
||||
// Copy triangles similarly
|
||||
@@ -805,13 +817,76 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
|
||||
newSoftBody->setMaxTriangles( maxTriangles );
|
||||
newSoftBody->setFirstLink( firstLink );
|
||||
newSoftBody->setNumLinks( numLinks );
|
||||
|
||||
// Find maximum piterations and viterations
|
||||
int piterations = softBody->m_cfg.piterations;
|
||||
|
||||
if ( piterations > maxPiterations )
|
||||
maxPiterations = piterations;
|
||||
|
||||
int viterations = softBody->m_cfg.viterations;
|
||||
|
||||
if ( viterations > maxViterations )
|
||||
maxViterations = viterations;
|
||||
|
||||
// zero mass
|
||||
for( int vertex = 0; vertex < numVertices; ++vertex )
|
||||
{
|
||||
if ( softBody->m_nodes[vertex].m_im == 0 )
|
||||
{
|
||||
AnchorNodeInfoCL nodeInfo;
|
||||
nodeInfo.clVertexIndex = firstVertex + vertex;
|
||||
nodeInfo.pNode = &softBody->m_nodes[vertex];
|
||||
|
||||
m_anchorNodeInfoArray.push_back(nodeInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// anchor position
|
||||
if ( numVertices > 0 )
|
||||
{
|
||||
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
|
||||
{
|
||||
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
|
||||
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
|
||||
|
||||
AnchorNodeInfoCL nodeInfo;
|
||||
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
|
||||
nodeInfo.pNode = anchorNode;
|
||||
|
||||
m_anchorNodeInfoArray.push_back(nodeInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
m_anchorPosition.clear();
|
||||
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
|
||||
|
||||
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
|
||||
{
|
||||
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
|
||||
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
|
||||
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
|
||||
}
|
||||
|
||||
updateConstants(0.f);
|
||||
|
||||
// set position and velocity iterations
|
||||
setNumberOfPositionIterations(maxPiterations);
|
||||
setNumberOfVelocityIterations(maxViterations);
|
||||
|
||||
// set wind velocity
|
||||
m_perClothWindVelocity.resize( m_softBodySet.size() );
|
||||
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
||||
{
|
||||
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
|
||||
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
|
||||
}
|
||||
|
||||
m_clPerClothWindVelocity.changedOnCPU();
|
||||
|
||||
// generate batches
|
||||
m_linkData.generateBatches();
|
||||
m_triangleData.generateBatches();
|
||||
|
||||
@@ -861,7 +936,6 @@ void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
|
||||
|
||||
void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
|
||||
ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices);
|
||||
@@ -882,7 +956,6 @@ void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
|
||||
|
||||
void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle);
|
||||
ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles);
|
||||
@@ -948,9 +1021,34 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath
|
||||
}
|
||||
}
|
||||
|
||||
void btOpenCLSoftBodySolver::updateFixedVertexPositions()
|
||||
{
|
||||
// Ensure data is on accelerator
|
||||
m_vertexData.moveToAccelerator();
|
||||
m_clAnchorPosition.moveToGPU();
|
||||
m_clAnchorIndex.moveToGPU();
|
||||
|
||||
cl_int ciErrNum ;
|
||||
int numVerts = m_vertexData.getNumVertices();
|
||||
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel, 0, sizeof(int), &numVerts);
|
||||
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,1, sizeof(cl_mem), &m_clAnchorIndex.m_buffer);
|
||||
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
|
||||
ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,3, sizeof(cl_mem), &m_clAnchorPosition.m_buffer);
|
||||
|
||||
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
|
||||
if (numWorkItems)
|
||||
{
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateFixedVertexPositionsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
|
||||
if( ciErrNum != CL_SUCCESS )
|
||||
{
|
||||
btAssert( 0 && "enqueueNDRangeKernel(m_updateFixedVertexPositionsKernel)");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void btOpenCLSoftBodySolver::applyForces( float solverdt )
|
||||
{
|
||||
|
||||
// Ensure data is on accelerator
|
||||
m_vertexData.moveToAccelerator();
|
||||
m_clPerClothAcceleration.moveToGPU();
|
||||
@@ -976,6 +1074,7 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
|
||||
ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer);
|
||||
ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
|
||||
ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
|
||||
|
||||
size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
|
||||
if (numWorkItems)
|
||||
{
|
||||
@@ -993,8 +1092,6 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
|
||||
*/
|
||||
void btOpenCLSoftBodySolver::integrate( float solverdt )
|
||||
{
|
||||
|
||||
|
||||
// Ensure data is on accelerator
|
||||
m_vertexData.moveToAccelerator();
|
||||
|
||||
@@ -1186,7 +1283,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
|
||||
updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
|
||||
}
|
||||
|
||||
// Solve drift
|
||||
// Solve position
|
||||
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
|
||||
{
|
||||
for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
|
||||
@@ -1210,7 +1307,6 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
|
||||
// Kernel dispatches
|
||||
void btOpenCLSoftBodySolver::prepareLinks()
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
int numLinks = m_linkData.getNumLinks();
|
||||
ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks);
|
||||
@@ -1231,7 +1327,6 @@ void btOpenCLSoftBodySolver::prepareLinks()
|
||||
|
||||
void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
int numVerts = m_vertexData.getNumVertices();
|
||||
ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts);
|
||||
@@ -1251,7 +1346,6 @@ void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
|
||||
|
||||
void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink);
|
||||
ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks);
|
||||
@@ -1275,7 +1369,6 @@ void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks,
|
||||
|
||||
void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink);
|
||||
ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks);
|
||||
@@ -1297,7 +1390,6 @@ void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks,
|
||||
|
||||
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
int numVerts = m_vertexData.getNumVertices();
|
||||
ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts);
|
||||
@@ -1322,7 +1414,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float
|
||||
|
||||
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
|
||||
{
|
||||
|
||||
cl_int ciErrNum;
|
||||
int numVerts = m_vertexData.getNumVertices();
|
||||
ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts);
|
||||
@@ -1347,7 +1438,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( flo
|
||||
|
||||
void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
|
||||
{
|
||||
|
||||
// Copy kernel parameters to GPU
|
||||
m_vertexData.moveToAccelerator();
|
||||
m_clPerClothFriction.moveToGPU();
|
||||
@@ -1355,7 +1445,6 @@ void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt
|
||||
m_clPerClothCollisionObjects.moveToGPU();
|
||||
m_clCollisionObjectDetails.moveToGPU();
|
||||
|
||||
|
||||
cl_int ciErrNum;
|
||||
int numVerts = m_vertexData.getNumVertices();
|
||||
ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
|
||||
@@ -1446,7 +1535,7 @@ void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody
|
||||
|
||||
|
||||
|
||||
cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros )
|
||||
cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ,const char* orgSrcFileNameForCaching)
|
||||
{
|
||||
printf("compiling kernelName: %s ",kernelName);
|
||||
cl_kernel kernel=0;
|
||||
@@ -1554,20 +1643,25 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep )
|
||||
m_clPerClothCollisionObjects.changedOnCPU();
|
||||
m_collisionObjectDetails.clear();
|
||||
|
||||
if ( m_bUpdateAnchoredNodePos )
|
||||
{
|
||||
BT_PROFILE("perClothWindVelocity");
|
||||
// Fill the force arrays with current acceleration data etc
|
||||
m_perClothWindVelocity.resize( m_softBodySet.size() );
|
||||
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
||||
// In OpenCL cloth solver, if softbody node has zero inverse mass(infinite mass) or anchor attached,
|
||||
// we need to update the node position in case the node or anchor is animated externally.
|
||||
// If there is no such node, we can eliminate the unnecessary CPU-to-GPU data trasferring.
|
||||
for ( int i = 0; i < m_anchorNodeInfoArray.size(); i++ )
|
||||
{
|
||||
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
|
||||
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[i];
|
||||
btSoftBody::Node* node = anchorNodeInfo.pNode;
|
||||
|
||||
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
|
||||
using Vectormath::Aos::Point3;
|
||||
Point3 pos((float)node->m_x.getX(), (float)node->m_x.getY(), (float)node->m_x.getZ());
|
||||
m_anchorPosition[i] = pos;
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("changedOnCPU");
|
||||
m_clPerClothWindVelocity.changedOnCPU();
|
||||
|
||||
if ( m_anchorNodeInfoArray.size() > 0 )
|
||||
m_clAnchorPosition.changedOnCPU();
|
||||
|
||||
updateFixedVertexPositions();
|
||||
}
|
||||
|
||||
{
|
||||
@@ -1601,7 +1695,7 @@ static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
|
||||
|
||||
void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
|
||||
{
|
||||
float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin();
|
||||
float scalarMargin = (float)getSoftBody()->getCollisionShape()->getMargin();
|
||||
btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
|
||||
m_softBody->m_bounds[0] = lowerBound - vectorMargin;
|
||||
m_softBody->m_bounds[1] = upperBound + vectorMargin;
|
||||
@@ -1641,7 +1735,8 @@ void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollision
|
||||
newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
|
||||
m_collisionObjectDetails.push_back( newCollisionShapeDescription );
|
||||
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
printf("Unsupported collision shape type\n");
|
||||
//btAssert(0 && "Unsupported collision shape type\n");
|
||||
}
|
||||
@@ -1688,32 +1783,36 @@ bool btOpenCLSoftBodySolver::checkInitialized()
|
||||
|
||||
bool btOpenCLSoftBodySolver::buildShaders()
|
||||
{
|
||||
// Ensure current kernels are released first
|
||||
releaseKernels();
|
||||
|
||||
if( m_shadersInitialized )
|
||||
return true;
|
||||
|
||||
m_clFunctions.clearKernelCompilationFailures();
|
||||
const char* additionalMacros="";
|
||||
|
||||
m_prepareLinksKernel = m_clFunctions.compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel" );
|
||||
m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" );
|
||||
m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" );
|
||||
m_vSolveLinksKernel = m_clFunctions.compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" );
|
||||
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" );
|
||||
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" );
|
||||
m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" );
|
||||
m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" );
|
||||
m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" );
|
||||
// Ensure current kernels are released first
|
||||
releaseKernels();
|
||||
|
||||
m_currentCLFunctions->clearKernelCompilationFailures();
|
||||
|
||||
m_prepareLinksKernel = m_currentCLFunctions->compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel",additionalMacros,"OpenCLC10/PrepareLinks.cl" );
|
||||
m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
|
||||
m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel",additionalMacros,"OpenCLC10/SolvePositions.cl" );
|
||||
m_vSolveLinksKernel = m_currentCLFunctions->compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ,additionalMacros,"OpenCLC10/VSolveLinks.cl");
|
||||
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdateNodes.cl");
|
||||
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositions.cl");
|
||||
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ,additionalMacros,"OpenCLC10/SolveCollisionsAndUpdateVelocities.cl");
|
||||
m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ,additionalMacros,"OpenCLC10/Integrate.cl");
|
||||
m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ,additionalMacros,"OpenCLC10/ApplyForces.cl");
|
||||
m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" , additionalMacros, "OpenCLC10/UpdateFixedVertexPositions.cl");
|
||||
|
||||
// TODO: Rename to UpdateSoftBodies
|
||||
m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" );
|
||||
m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" );
|
||||
m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" );
|
||||
m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
|
||||
m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
|
||||
m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
|
||||
|
||||
|
||||
if( m_clFunctions.getKernelCompilationFailures()==0 )
|
||||
if( m_currentCLFunctions->getKernelCompilationFailures()==0 )
|
||||
m_shadersInitialized = true;
|
||||
|
||||
return m_shadersInitialized;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ subject to the following restrictions:
|
||||
#include "vectormath/vmInclude.h"
|
||||
|
||||
#include "BulletSoftBody/btSoftBodySolvers.h"
|
||||
#include "BulletSoftBody/btSoftBody.h"
|
||||
#include "btSoftBodySolverBuffer_OpenCL.h"
|
||||
#include "btSoftBodySolverLinkData_OpenCL.h"
|
||||
#include "btSoftBodySolverVertexData_OpenCL.h"
|
||||
@@ -33,6 +34,7 @@ protected:
|
||||
|
||||
int m_kernelCompilationFailures;
|
||||
|
||||
|
||||
public:
|
||||
CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
|
||||
m_cqCommandQue( cqCommandQue ),
|
||||
@@ -49,7 +51,7 @@ public:
|
||||
/**
|
||||
* Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
|
||||
*/
|
||||
cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros = "" );
|
||||
virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
|
||||
|
||||
void clearKernelCompilationFailures()
|
||||
{
|
||||
@@ -285,7 +287,8 @@ public:
|
||||
|
||||
protected:
|
||||
|
||||
CLFunctions m_clFunctions;
|
||||
CLFunctions m_defaultCLFunctions;
|
||||
CLFunctions* m_currentCLFunctions;
|
||||
|
||||
/** Variable to define whether we need to update solver constants on the next iteration */
|
||||
bool m_updateSolverConstants;
|
||||
@@ -350,7 +353,20 @@ protected:
|
||||
btAlignedObjectArray< float > m_perClothFriction;
|
||||
btOpenCLBuffer< float > m_clPerClothFriction;
|
||||
|
||||
// anchor node info
|
||||
struct AnchorNodeInfoCL
|
||||
{
|
||||
int clVertexIndex;
|
||||
btSoftBody::Node* pNode;
|
||||
};
|
||||
|
||||
btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
|
||||
btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
|
||||
btOpenCLBuffer<Vectormath::Aos::Point3> m_clAnchorPosition;
|
||||
btAlignedObjectArray<int> m_anchorIndex;
|
||||
btOpenCLBuffer<int> m_clAnchorIndex;
|
||||
|
||||
bool m_bUpdateAnchoredNodePos;
|
||||
|
||||
cl_kernel m_prepareLinksKernel;
|
||||
cl_kernel m_solvePositionsFromLinksKernel;
|
||||
@@ -368,6 +384,7 @@ protected:
|
||||
|
||||
cl_kernel m_outputToVertexArrayKernel;
|
||||
cl_kernel m_applyForcesKernel;
|
||||
cl_kernel m_updateFixedVertexPositionsKernel;
|
||||
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_context m_cxMainContext;
|
||||
@@ -394,6 +411,8 @@ protected:
|
||||
|
||||
virtual void applyForces( float solverdt );
|
||||
|
||||
void updateFixedVertexPositions();
|
||||
|
||||
/**
|
||||
* Integrate motion on the solver.
|
||||
*/
|
||||
@@ -430,7 +449,7 @@ protected:
|
||||
void releaseKernels();
|
||||
|
||||
public:
|
||||
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx);
|
||||
btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
|
||||
|
||||
virtual ~btOpenCLSoftBodySolver();
|
||||
|
||||
@@ -456,7 +475,7 @@ public:
|
||||
|
||||
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
|
||||
|
||||
virtual void copyBackToSoftBodies();
|
||||
virtual void copyBackToSoftBodies(bool bMove = true);
|
||||
|
||||
virtual void solveConstraints( float solverdt );
|
||||
|
||||
@@ -475,6 +494,14 @@ public:
|
||||
return m_defaultWorkGroupSize;
|
||||
}
|
||||
|
||||
void setCLFunctions(CLFunctions* funcs)
|
||||
{
|
||||
if (funcs)
|
||||
m_currentCLFunctions = funcs;
|
||||
else
|
||||
m_currentCLFunctions = &m_defaultCLFunctions;
|
||||
}
|
||||
|
||||
}; // btOpenCLSoftBodySolver
|
||||
|
||||
|
||||
|
||||
@@ -33,32 +33,6 @@ static const size_t workGroupSize = GROUP_SIZE;
|
||||
|
||||
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
|
||||
|
||||
#if (0)//CL_VERSION_1_1 == 1)
|
||||
//OpenCL 1.1 kernels use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* UpdatePositionsFromVelocitiesCLString =
|
||||
#include "OpenCLC/UpdatePositionsFromVelocities.cl"
|
||||
static const char* SolvePositionsCLString =
|
||||
#include "OpenCLC/SolvePositionsSIMDBatched.cl"
|
||||
static const char* UpdateNodesCLString =
|
||||
#include "OpenCLC/UpdateNodes.cl"
|
||||
static const char* UpdatePositionsCLString =
|
||||
#include "OpenCLC/UpdatePositions.cl"
|
||||
static const char* UpdateConstantsCLString =
|
||||
#include "OpenCLC/UpdateConstants.cl"
|
||||
static const char* IntegrateCLString =
|
||||
#include "OpenCLC/Integrate.cl"
|
||||
static const char* ApplyForcesCLString =
|
||||
#include "OpenCLC/ApplyForces.cl"
|
||||
static const char* UpdateNormalsCLString =
|
||||
#include "OpenCLC/UpdateNormals.cl"
|
||||
static const char* VSolveLinksCLString =
|
||||
#include "OpenCLC/VSolveLinks.cl"
|
||||
static const char* SolveCollisionsAndUpdateVelocitiesCLString =
|
||||
#include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
|
||||
static const char* OutputToVertexArrayCLString =
|
||||
#include "OpenCLC/OutputToVertexArray.cl"
|
||||
#else
|
||||
////OpenCL 1.0 kernels don't use float3
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* UpdatePositionsFromVelocitiesCLString =
|
||||
@@ -75,6 +49,8 @@ static const char* IntegrateCLString =
|
||||
#include "OpenCLC10/Integrate.cl"
|
||||
static const char* ApplyForcesCLString =
|
||||
#include "OpenCLC10/ApplyForces.cl"
|
||||
static const char* UpdateFixedVertexPositionsCLString =
|
||||
#include "OpenCLC10/UpdateFixedVertexPositions.cl"
|
||||
static const char* UpdateNormalsCLString =
|
||||
#include "OpenCLC10/UpdateNormals.cl"
|
||||
static const char* VSolveLinksCLString =
|
||||
@@ -83,7 +59,6 @@ static const char* SolveCollisionsAndUpdateVelocitiesCLString =
|
||||
#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
|
||||
static const char* OutputToVertexArrayCLString =
|
||||
#include "OpenCLC10/OutputToVertexArray.cl"
|
||||
#endif //CL_VERSION_1_1
|
||||
|
||||
|
||||
|
||||
@@ -194,8 +169,8 @@ bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
|
||||
|
||||
|
||||
|
||||
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx) :
|
||||
btOpenCLSoftBodySolver( queue, ctx ),
|
||||
btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
|
||||
btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
|
||||
m_linkData(queue, ctx)
|
||||
{
|
||||
// Initial we will clearly need to update solver constants
|
||||
@@ -213,14 +188,17 @@ btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
|
||||
|
||||
void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
|
||||
{
|
||||
if( forceUpdate|| m_softBodySet.size() != softBodies.size() )
|
||||
if( forceUpdate || m_softBodySet.size() != softBodies.size() )
|
||||
{
|
||||
// Have a change in the soft body set so update, reloading all the data
|
||||
getVertexData().clear();
|
||||
getTriangleData().clear();
|
||||
getLinkData().clear();
|
||||
m_softBodySet.resize(0);
|
||||
m_anchorIndex.clear();
|
||||
|
||||
int maxPiterations = 0;
|
||||
int maxViterations = 0;
|
||||
|
||||
for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
|
||||
{
|
||||
@@ -238,8 +216,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
|
||||
m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
|
||||
m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
|
||||
m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
|
||||
|
||||
|
||||
// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
|
||||
m_perClothFriction.push_back( softBody->getFriction() );
|
||||
m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
|
||||
|
||||
@@ -253,6 +230,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
|
||||
// Allocate space for new vertices in all the vertex arrays
|
||||
getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
|
||||
|
||||
|
||||
int firstTriangle = getTriangleData().getNumTriangles();
|
||||
int numTriangles = softBody->m_faces.size();
|
||||
int maxTriangles = numTriangles;
|
||||
@@ -272,6 +250,8 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
|
||||
float vertexInverseMass = softBody->m_nodes[vertex].m_im;
|
||||
desc.setInverseMass(vertexInverseMass);
|
||||
getVertexData().setVertexAt( desc, firstVertex + vertex );
|
||||
|
||||
m_anchorIndex.push_back(-1.0);
|
||||
}
|
||||
|
||||
// Copy triangles similarly
|
||||
@@ -318,17 +298,78 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody
|
||||
newSoftBody->setMaxTriangles( maxTriangles );
|
||||
newSoftBody->setFirstLink( firstLink );
|
||||
newSoftBody->setNumLinks( numLinks );
|
||||
|
||||
// Find maximum piterations and viterations
|
||||
int piterations = softBody->m_cfg.piterations;
|
||||
|
||||
if ( piterations > maxPiterations )
|
||||
maxPiterations = piterations;
|
||||
|
||||
int viterations = softBody->m_cfg.viterations;
|
||||
|
||||
if ( viterations > maxViterations )
|
||||
maxViterations = viterations;
|
||||
|
||||
// zero mass
|
||||
for( int vertex = 0; vertex < numVertices; ++vertex )
|
||||
{
|
||||
if ( softBody->m_nodes[vertex].m_im == 0 )
|
||||
{
|
||||
AnchorNodeInfoCL nodeInfo;
|
||||
nodeInfo.clVertexIndex = firstVertex + vertex;
|
||||
nodeInfo.pNode = &softBody->m_nodes[vertex];
|
||||
|
||||
m_anchorNodeInfoArray.push_back(nodeInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// anchor position
|
||||
if ( numVertices > 0 )
|
||||
{
|
||||
for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
|
||||
{
|
||||
btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
|
||||
btSoftBody::Node* firstNode = &softBody->m_nodes[0];
|
||||
|
||||
AnchorNodeInfoCL nodeInfo;
|
||||
nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
|
||||
nodeInfo.pNode = anchorNode;
|
||||
|
||||
m_anchorNodeInfoArray.push_back(nodeInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_anchorPosition.clear();
|
||||
m_anchorPosition.resize(m_anchorNodeInfoArray.size());
|
||||
|
||||
for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
|
||||
{
|
||||
const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
|
||||
m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
|
||||
getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
|
||||
}
|
||||
|
||||
updateConstants(0.f);
|
||||
|
||||
// set position and velocity iterations
|
||||
setNumberOfPositionIterations(maxPiterations);
|
||||
setNumberOfVelocityIterations(maxViterations);
|
||||
|
||||
// set wind velocity
|
||||
m_perClothWindVelocity.resize( m_softBodySet.size() );
|
||||
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
|
||||
{
|
||||
btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
|
||||
m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
|
||||
}
|
||||
|
||||
m_clPerClothWindVelocity.changedOnCPU();
|
||||
|
||||
// generate batches
|
||||
m_linkData.generateBatches();
|
||||
m_triangleData.generateBatches();
|
||||
|
||||
|
||||
// Build the shaders to match the batching parameters
|
||||
buildShaders();
|
||||
}
|
||||
@@ -509,7 +550,9 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
|
||||
if( m_shadersInitialized )
|
||||
return true;
|
||||
|
||||
m_clFunctions.clearKernelCompilationFailures();
|
||||
const char* additionalMacros="";
|
||||
|
||||
m_currentCLFunctions->clearKernelCompilationFailures();
|
||||
|
||||
char *wavefrontMacros = new char[256];
|
||||
|
||||
@@ -522,22 +565,23 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
|
||||
WAVEFRONT_BLOCK_MULTIPLIER,
|
||||
WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
|
||||
|
||||
m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", "" );
|
||||
m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros );
|
||||
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", "" );
|
||||
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", "" );
|
||||
m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" );
|
||||
m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" );
|
||||
m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" );
|
||||
m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
|
||||
m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
|
||||
m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
|
||||
m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
|
||||
m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
|
||||
m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
|
||||
m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
|
||||
m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
|
||||
|
||||
// TODO: Rename to UpdateSoftBodies
|
||||
m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" );
|
||||
m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", "" );
|
||||
m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", "" );
|
||||
m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
||||
m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
||||
m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
|
||||
|
||||
delete [] wavefrontMacros;
|
||||
|
||||
if( m_clFunctions.getKernelCompilationFailures()==0)
|
||||
if( m_currentCLFunctions->getKernelCompilationFailures()==0)
|
||||
{
|
||||
m_shadersInitialized = true;
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ protected:
|
||||
|
||||
|
||||
|
||||
bool buildShaders();
|
||||
virtual bool buildShaders();
|
||||
|
||||
|
||||
void updateConstants( float timeStep );
|
||||
@@ -59,7 +59,7 @@ protected:
|
||||
/////////////////////////////////////
|
||||
|
||||
public:
|
||||
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx);
|
||||
btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false);
|
||||
|
||||
virtual ~btOpenCLSoftBodySolverSIMDAware();
|
||||
|
||||
|
||||
@@ -561,10 +561,14 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* Move data from host memory from the accelerator.
|
||||
* Move data to host memory from the accelerator if bCopy is false.
|
||||
* If bCopy is true, copy data to host memory from the accelerator so that data
|
||||
* won't be moved to accelerator when moveToAccelerator() is called next time.
|
||||
* If bCopyMinimum is true, only vertex position and normal are copied.
|
||||
* bCopyMinimum will be meaningful only if bCopy is true.
|
||||
* The CPU version will always return that it has moved it.
|
||||
*/
|
||||
virtual bool moveFromAccelerator()
|
||||
virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user