diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h index 2d460174a..1374f3ab3 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h @@ -55,7 +55,7 @@ public: virtual bool onAccelerator(); virtual bool moveToAccelerator(); - virtual bool moveFromAccelerator(); + virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true); }; diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp index 10f437738..f1f8e576f 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp @@ -308,18 +308,42 @@ bool btSoftBodyVertexDataDX11::moveToAccelerator() return success; } -bool btSoftBodyVertexDataDX11::moveFromAccelerator() +bool btSoftBodyVertexDataDX11::moveFromAccelerator(bool bCopy, bool bCopyMinimum) { bool success = true; - success = success && m_dx11ClothIdentifier.moveFromGPU(); - success = success && m_dx11VertexPosition.moveFromGPU(); - success = success && m_dx11VertexPreviousPosition.moveFromGPU(); - success = success && m_dx11VertexVelocity.moveFromGPU(); - success = success && m_dx11VertexForceAccumulator.moveFromGPU(); - success = success && m_dx11VertexNormal.moveFromGPU(); - success = success && m_dx11VertexInverseMass.moveFromGPU(); - success = success && m_dx11VertexArea.moveFromGPU(); - success = success && m_dx11VertexTriangleCount.moveFromGPU(); + + if (!bCopy) + { + success = success && m_dx11ClothIdentifier.moveFromGPU(); + success = success && m_dx11VertexPosition.moveFromGPU(); + success = success && m_dx11VertexPreviousPosition.moveFromGPU(); + success = success && m_dx11VertexVelocity.moveFromGPU(); + success = success && m_dx11VertexForceAccumulator.moveFromGPU(); + success = success && m_dx11VertexNormal.moveFromGPU(); + success = success && m_dx11VertexInverseMass.moveFromGPU(); + success = success && m_dx11VertexArea.moveFromGPU(); + success = success && m_dx11VertexTriangleCount.moveFromGPU(); + } + else + { + if (bCopyMinimum) + { + success = success && m_dx11VertexPosition.copyFromGPU(); + success = success && m_dx11VertexNormal.copyFromGPU(); + } + else + { + success = success && m_dx11ClothIdentifier.copyFromGPU(); + success = success && m_dx11VertexPosition.copyFromGPU(); + success = success && m_dx11VertexPreviousPosition.copyFromGPU(); + success = success && m_dx11VertexVelocity.copyFromGPU(); + success = success && m_dx11VertexForceAccumulator.copyFromGPU(); + success = success && m_dx11VertexNormal.copyFromGPU(); + success = success && m_dx11VertexInverseMass.copyFromGPU(); + success = success && m_dx11VertexArea.copyFromGPU(); + success = success && m_dx11VertexTriangleCount.copyFromGPU(); + } + } if( success ) m_onGPU = true; @@ -619,10 +643,10 @@ void btDX11SoftBodySolver::releaseKernels() } -void btDX11SoftBodySolver::copyBackToSoftBodies() +void btDX11SoftBodySolver::copyBackToSoftBodies(bool bMove) { // Move the vertex data back to the host first - m_vertexData.moveFromAccelerator(); + m_vertexData.moveFromAccelerator(!bMove); // Loop over soft bodies, copying all the vertex positions back for each body in turn for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h index 94520ca09..62e629bed 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h @@ -607,7 +607,7 @@ public: virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false); - virtual void copyBackToSoftBodies(); + virtual void copyBackToSoftBodies(bool bMove = true); virtual void solveConstraints( float solverdt ); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt index 995e275b7..d2a56ba31 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt @@ -34,7 +34,6 @@ SET(BulletSoftBodyOpenCLSolvers_HDRS ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD ${BulletSoftBodyOpenCLSolvers_SRCS} ${BulletSoftBodyOpenCLSolvers_HDRS} - ${BulletSoftBodyOpenCLSolvers_OpenCLC} ) SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION}) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt index 3b77c2100..27d57f718 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt @@ -45,7 +45,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders ) foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) - LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") endforeach(f) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp index 0c4da2c15..49ca12d76 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp @@ -25,6 +25,7 @@ subject to the following restrictions: #include "../OpenCLC10/UpdatePositions.cl" #include "../OpenCLC10/UpdatePositionsFromVelocities.cl" #include "../OpenCLC10/VSolveLinks.cl" +#include "../OpenCLC10/UpdateFixedVertexPositions.cl" //#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" @@ -39,7 +40,7 @@ MINICL_REGISTER(ApplyForcesKernel) MINICL_REGISTER(ResetNormalsAndAreasKernel) MINICL_REGISTER(NormalizeNormalsAndAreasKernel) MINICL_REGISTER(UpdateSoftBodiesKernel) - +MINICL_REGISTER(UpdateFixedVertexPositions) float mydot3a(float4 a, float4 b) { diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt index 5dcba5723..10ed90f2b 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt @@ -44,7 +44,7 @@ SET(BulletSoftBodyOpenCLSolvers_Shaders ) foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) - LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") endforeach(f) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl deleted file mode 100644 index 7204a80c6..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl +++ /dev/null @@ -1,91 +0,0 @@ -MSTRINGIFY( - -/*#define float3 float4 -float dot3(float3 a, float3 b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -}*/ - -float3 projectOnAxis( float3 v, float3 a ) -{ - return (a*dot(v, a)); -} - -__kernel void -ApplyForcesKernel( - const uint numNodes, - const float solverdt, - const float epsilon, - __global int * g_vertexClothIdentifier, - __global float4 * g_vertexNormal, - __global float * g_vertexArea, - __global float * g_vertexInverseMass, - __global float * g_clothLiftFactor, - __global float * g_clothDragFactor, - __global float4 * g_clothWindVelocity, - __global float4 * g_clothAcceleration, - __global float * g_clothMediumDensity, - __global float4 * g_vertexForceAccumulator, - __global float4 * g_vertexVelocity) -{ - unsigned int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - int clothId = g_vertexClothIdentifier[nodeID]; - float nodeIM = g_vertexInverseMass[nodeID]; - - if( nodeIM > 0.0f ) - { - float3 nodeV = g_vertexVelocity[nodeID].xyz; - float3 normal = g_vertexNormal[nodeID].xyz; - float area = g_vertexArea[nodeID]; - float3 nodeF = g_vertexForceAccumulator[nodeID].xyz; - - // Read per-cloth values - float3 clothAcceleration = g_clothAcceleration[clothId].xyz; - float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz; - float liftFactor = g_clothLiftFactor[clothId]; - float dragFactor = g_clothDragFactor[clothId]; - float mediumDensity = g_clothMediumDensity[clothId]; - - // Apply the acceleration to the cloth rather than do this via a force - nodeV += (clothAcceleration*solverdt); - - g_vertexVelocity[nodeID] = (float4)(nodeV, 0.f); - - float3 relativeWindVelocity = nodeV - clothWindVelocity; - float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity); - - if( relativeSpeedSquared > epsilon ) - { - // Correct direction of normal relative to wind direction and get dot product - normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f); - float dvNormal = dot(normal, relativeWindVelocity); - if( dvNormal > 0 ) - { - float3 force = (float3)(0.f, 0.f, 0.f); - float c0 = area * dvNormal * relativeSpeedSquared / 2.f; - float c1 = c0 * mediumDensity; - force += normal * (-c1 * liftFactor); - force += normalize(relativeWindVelocity)*(-c1 * dragFactor); - - float dtim = solverdt * nodeIM; - float3 forceDTIM = force * dtim; - - float3 nodeFPlusForce = nodeF + force; - - // m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim; - float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim); - - nodeF = nodeFPlusForce; - if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) - nodeF = nodeFMinus; - - g_vertexForceAccumulator[nodeID] = (float4)(nodeF, 0.0f); - } - } - } - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl deleted file mode 100644 index 15c0cdc67..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl +++ /dev/null @@ -1,80 +0,0 @@ -MSTRINGIFY( -#pragma OPENCL EXTENSION cl_amd_printf : enable \n - - - -__kernel void -ComputeBoundsKernel( - int numNodes, - int numSoftBodies, - __global int * g_vertexClothIdentifier, - __global float4 * g_vertexPositions, - volatile __global uint * g_clothMinBounds, - volatile __global uint * g_clothMaxBounds, - volatile __local uint * clothMinBounds, - volatile __local uint * clothMaxBounds) -{ - // Init min and max bounds arrays - if( get_local_id(0) < numSoftBodies ) - { - - clothMinBounds[get_local_id(0)*4] = UINT_MAX; - clothMinBounds[get_local_id(0)*4+1] = UINT_MAX; - clothMinBounds[get_local_id(0)*4+2] = UINT_MAX; - clothMinBounds[get_local_id(0)*4+3] = UINT_MAX; - clothMaxBounds[get_local_id(0)*4] = 0; - clothMaxBounds[get_local_id(0)*4+1] = 0; - clothMaxBounds[get_local_id(0)*4+2] = 0; - clothMaxBounds[get_local_id(0)*4+3] = 0; - - } - - - barrier(CLK_GLOBAL_MEM_FENCE); - - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)]; - if( clothIdentifier >= 0 ) - { - float3 position = g_vertexPositions[get_global_id(0)].xyz; - - /* Reinterpret position as uint */ - uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z)); - - /* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */ - positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000); - positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000); - positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000); - - /* Min/max with the LDS values */ - atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x); - atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y); - atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z); - - atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x); - atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y); - atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z); - } - } - - barrier(CLK_GLOBAL_MEM_FENCE); - - - /* Use global atomics to update the global versions of the data*/ - if( get_local_id(0) < numSoftBodies ) - { - /*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/ - atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]); - atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]); - atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]); - - atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]); - atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]); - atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]); - } -} - - -); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl deleted file mode 100644 index 4a2c9f2fc..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl +++ /dev/null @@ -1,35 +0,0 @@ -MSTRINGIFY( - -// Node indices for each link - -//#define float3 float4 - -__kernel void -IntegrateKernel( - const int numNodes, - const float solverdt, - __global float * g_vertexInverseMasses, - __global float4 * g_vertexPositions, - __global float4 * g_vertexVelocity, - __global float4 * g_vertexPreviousPositions, - __global float4 * g_vertexForceAccumulator) -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - float3 position = g_vertexPositions[nodeID].xyz; - float3 velocity = g_vertexVelocity[nodeID].xyz; - float3 force = g_vertexForceAccumulator[nodeID].xyz; - float inverseMass = g_vertexInverseMasses[nodeID]; - - g_vertexPreviousPositions[nodeID] = (float4)(position, 0.f); - velocity += force * inverseMass * solverdt; - position += velocity * solverdt; - - g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f); - g_vertexPositions[nodeID] = (float4)(position, 0.f); - g_vertexVelocity[nodeID] = (float4)(velocity, 0.f); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl deleted file mode 100644 index 435abef16..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl +++ /dev/null @@ -1,46 +0,0 @@ -MSTRINGIFY( - -cbuffer OutputToVertexArrayCB : register( b0 ) -{ - int startNode; - int numNodes; - int offsetX; - int strideX; - - int offsetN; - int strideN; - int padding1; - int padding2; -}; - - -StructuredBuffer g_nodesx : register( t0 ); -StructuredBuffer g_nodesn : register( t1 ); - -RWStructuredBuffer g_vertexBuffer : register( u0 ); - - -[numthreads(128, 1, 1)] -void -OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) -{ - int nodeID = DTid.x; - if( nodeID < numNodes ) - { - float4 nodeX = g_nodesx[nodeID + startNode]; - float4 nodeN = g_nodesn[nodeID + startNode]; - - // Stride should account for the float->float4 conversion - int positionDestination = nodeID * strideX + offsetX; - g_vertexBuffer[positionDestination] = nodeX.x; - g_vertexBuffer[positionDestination+1] = nodeX.y; - g_vertexBuffer[positionDestination+2] = nodeX.z; - - int normalDestination = nodeID * strideN + offsetN; - g_vertexBuffer[normalDestination] = nodeN.x; - g_vertexBuffer[normalDestination+1] = nodeN.y; - g_vertexBuffer[normalDestination+2] = nodeN.z; - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl deleted file mode 100644 index f37a2f359..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl +++ /dev/null @@ -1,34 +0,0 @@ -MSTRINGIFY( - -__kernel void -PrepareLinksKernel( - const int numLinks, - __global int2 * g_linksVertexIndices, - __global float * g_linksMassLSC, - __global float4 * g_nodesPreviousPosition, - __global float * g_linksLengthRatio, - __global float4 * g_linksCurrentLength) -{ - int linkID = get_global_id(0); - if( linkID < numLinks ) - { - int2 nodeIndices = g_linksVertexIndices[linkID]; - int node0 = nodeIndices.x; - int node1 = nodeIndices.y; - - float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0]; - float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1]; - - float massLSC = g_linksMassLSC[linkID]; - - float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0; - - float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC; - linkLengthRatio = 1.0f/linkLengthRatio; - - g_linksCurrentLength[linkID] = linkCurrentLength; - g_linksLengthRatio[linkID] = linkLengthRatio; - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl deleted file mode 100644 index 9f50da8a4..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl +++ /dev/null @@ -1,195 +0,0 @@ -MSTRINGIFY( - -typedef struct -{ - int firstObject; - int endObject; -} CollisionObjectIndices; - -typedef struct -{ - float4 shapeTransform[4]; // column major 4x4 matrix - float4 linearVelocity; - float4 angularVelocity; - - int softBodyIdentifier; - int collisionShapeType; - - - // Shape information - // Compressed from the union - float radius; - float halfHeight; - int upAxis; - - float margin; - float friction; - - int padding0; - -} CollisionShapeDescription; - -/* From btBroadphaseProxy.h */ -__constant int CAPSULE_SHAPE_PROXYTYPE = 10; - -/* Multiply column-major matrix against vector */ -float4 matrixVectorMul( float4 matrix[4], float4 vector ) -{ - float4 returnVector; - float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x); - float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y); - float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z); - float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w); - returnVector.x = dot(row0, vector); - returnVector.y = dot(row1, vector); - returnVector.z = dot(row2, vector); - returnVector.w = dot(row3, vector); - return returnVector; -} - -__kernel void -SolveCollisionsAndUpdateVelocitiesKernel( - const int numNodes, - const float isolverdt, - __global int *g_vertexClothIdentifier, - __global float4 *g_vertexPreviousPositions, - __global float * g_perClothFriction, - __global float * g_clothDampingFactor, - __global CollisionObjectIndices * g_perClothCollisionObjectIndices, - __global CollisionShapeDescription * g_collisionObjectDetails, - __global float4 * g_vertexForces, - __global float4 *g_vertexVelocities, - __global float4 *g_vertexPositions) -{ - int nodeID = get_global_id(0); - float3 forceOnVertex = (float3)(0.f, 0.f, 0.f); - if( get_global_id(0) < numNodes ) - { - int clothIdentifier = g_vertexClothIdentifier[nodeID]; - - // Abort if this is not a valid cloth - if( clothIdentifier < 0 ) - return; - - float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f); - float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f); - float3 velocity; - float clothFriction = g_perClothFriction[clothIdentifier]; - float dampingFactor = g_clothDampingFactor[clothIdentifier]; - float velocityCoefficient = (1.f - dampingFactor); - CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier]; - - if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject ) - { - velocity = (float3)(15, 0, 0); - - /* We have some possible collisions to deal with */ - for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision ) - { - CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision]; - float colliderFriction = shapeDescription.friction; - - if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE ) - { - /* Colliding with a capsule */ - - float capsuleHalfHeight = shapeDescription.halfHeight; - float capsuleRadius = shapeDescription.radius; - float capsuleMargin = shapeDescription.margin; - int capsuleupAxis = shapeDescription.upAxis; - - /* Four columns of worldTransform matrix */ - float4 worldTransform[4]; - worldTransform[0] = shapeDescription.shapeTransform[0]; - worldTransform[1] = shapeDescription.shapeTransform[1]; - worldTransform[2] = shapeDescription.shapeTransform[2]; - worldTransform[3] = shapeDescription.shapeTransform[3]; - - // Correctly define capsule centerline vector - float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); - float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f); - c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 ); - c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 ); - c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 ); - c2.x = -c1.x; - c2.y = -c1.y; - c2.z = -c1.z; - - float4 worldC1 = matrixVectorMul(worldTransform, c1); - float4 worldC2 = matrixVectorMul(worldTransform, c2); - float3 segment = (worldC2 - worldC1).xyz; - - /* compute distance of tangent to vertex along line segment in capsule */ - float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) ); - - float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f)); - float distanceFromLine = length(position - closestPoint); - float distanceFromC1 = length(worldC1 - position); - float distanceFromC2 = length(worldC2 - position); - - /* Final distance from collision, point to push from, direction to push in - for impulse force */ - float dist; - float3 normalVector; - if( distanceAlongSegment < 0 ) - { - dist = distanceFromC1; - normalVector = normalize(position - worldC1).xyz; - } else if( distanceAlongSegment > 1.f ) { - dist = distanceFromC2; - normalVector = normalize(position - worldC2).xyz; - } else { - dist = distanceFromLine; - normalVector = normalize(position - closestPoint).xyz; - } - - float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz; - float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz; - float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w)); - - float minDistance = capsuleRadius + capsuleMargin; - - /* In case of no collision, this is the value of velocity */ - velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt; - - - // Check for a collision - if( dist < minDistance ) - { - /* Project back to surface along normal */ - position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f); - velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt; - float3 relativeVelocity = velocity - velocityOfSurfacePoint; - - float3 p1 = normalize(cross(normalVector, segment)); - float3 p2 = normalize(cross(p1, normalVector)); - /* Full friction is sum of velocities in each direction of plane */ - float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2); - - /* Real friction is peak friction corrected by friction coefficients */ - frictionVector = frictionVector * (colliderFriction*clothFriction); - - float approachSpeed = dot(relativeVelocity, normalVector); - - if( approachSpeed <= 0.0f ) - forceOnVertex -= frictionVector; - } - - } - } - } else { - /* Update velocity */ - float3 difference = position.xyz - previousPosition.xyz; - velocity = difference*velocityCoefficient*isolverdt; - } - - g_vertexVelocities[nodeID] = (float4)(velocity, 0.f); - - /* Update external force */ - g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f); - - g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f); - } -} - -); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl deleted file mode 100644 index 5ab2a6208..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl +++ /dev/null @@ -1,213 +0,0 @@ -MSTRINGIFY( - -typedef struct -{ - int firstObject; - int endObject; -} CollisionObjectIndices; - -typedef struct -{ - float4 shapeTransform[4]; /* column major 4x4 matrix */ - float4 linearVelocity; - float4 angularVelocity; - - int softBodyIdentifier; - int collisionShapeType; - - - // Shape information - // Compressed from the union - float radius; - float halfHeight; - int upAxis; - - float margin; - float friction; - - int padding0; - -} CollisionShapeDescription; - -/* From btBroadphaseProxy.h */ -__constant int CAPSULE_SHAPE_PROXYTYPE = 10; - - -/* Multiply column-major matrix against vector */ -float4 matrixVectorMul( float4 matrix[4], float4 vector ) -{ - float4 returnVector; - float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x); - float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y); - float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z); - float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w); - returnVector.x = dot(row0, vector); - returnVector.y = dot(row1, vector); - returnVector.z = dot(row2, vector); - returnVector.w = dot(row3, vector); - return returnVector; -} - -__kernel void -SolveCollisionsAndUpdateVelocitiesKernel( - const int numNodes, - const float isolverdt, - __global int *g_vertexClothIdentifier, - __global float4 *g_vertexPreviousPositions, - __global float * g_perClothFriction, - __global float * g_clothDampingFactor, - __global CollisionObjectIndices * g_perClothCollisionObjectIndices, - __global CollisionShapeDescription * g_collisionObjectDetails, - __global float4 * g_vertexForces, - __global float4 *g_vertexVelocities, - __global float4 *g_vertexPositions, - __local CollisionShapeDescription *localCollisionShapes) -{ - int nodeID = get_global_id(0); - float3 forceOnVertex = (float3)(0.f, 0.f, 0.f); - - int clothIdentifier = g_vertexClothIdentifier[nodeID]; - - // Abort if this is not a valid cloth - if( clothIdentifier < 0 ) - return; - - float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f); - float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f); - float3 velocity; - float clothFriction = g_perClothFriction[clothIdentifier]; - float dampingFactor = g_clothDampingFactor[clothIdentifier]; - float velocityCoefficient = (1.f - dampingFactor); - CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier]; - - int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject; - if( numObjects > 0 ) - { - /* We have some possible collisions to deal with */ - - /* First load all of the collision objects into LDS */ - int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject; - if( get_local_id(0) < numObjects ) - { - localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ]; - } - } - - /* Safe as the vertices are padded so that not more than one soft body is in a group */ - barrier(CLK_LOCAL_MEM_FENCE); - - /* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */ - if( numObjects > 0 ) - { - velocity = (float3)(0, 0, 0); - - - // We have some possible collisions to deal with - for( int collision = 0; collision < numObjects; ++collision ) - { - CollisionShapeDescription shapeDescription = localCollisionShapes[collision]; - float colliderFriction = shapeDescription.friction; - - if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE ) - { - /* Colliding with a capsule */ - - float capsuleHalfHeight = localCollisionShapes[collision].halfHeight; - float capsuleRadius = localCollisionShapes[collision].radius; - float capsuleMargin = localCollisionShapes[collision].margin; - int capsuleupAxis = localCollisionShapes[collision].upAxis; - - float4 worldTransform[4]; - worldTransform[0] = localCollisionShapes[collision].shapeTransform[0]; - worldTransform[1] = localCollisionShapes[collision].shapeTransform[1]; - worldTransform[2] = localCollisionShapes[collision].shapeTransform[2]; - worldTransform[3] = localCollisionShapes[collision].shapeTransform[3]; - - // Correctly define capsule centerline vector - float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); - float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f); - c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 ); - c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 ); - c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 ); - c2.x = -c1.x; - c2.y = -c1.y; - c2.z = -c1.z; - - float4 worldC1 = matrixVectorMul(worldTransform, c1); - float4 worldC2 = matrixVectorMul(worldTransform, c2); - float3 segment = (worldC2 - worldC1).xyz; - - - /* compute distance of tangent to vertex along line segment in capsule */ - float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) ); - - float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f)); - float distanceFromLine = length(position - closestPoint); - float distanceFromC1 = length(worldC1 - position); - float distanceFromC2 = length(worldC2 - position); - - /* Final distance from collision, point to push from, direction to push in - for impulse force */ - float dist; - float3 normalVector; - if( distanceAlongSegment < 0 ) - { - dist = distanceFromC1; - normalVector = normalize(position - worldC1).xyz; - } else if( distanceAlongSegment > 1.f ) { - dist = distanceFromC2; - normalVector = normalize(position - worldC2).xyz; - } else { - dist = distanceFromLine; - normalVector = normalize(position - closestPoint).xyz; - } - - float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz; - float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz; - float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w)); - - float minDistance = capsuleRadius + capsuleMargin; - - /* In case of no collision, this is the value of velocity */ - velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt; - - - /* Check for a collision */ - if( dist < minDistance ) - { - /* Project back to surface along normal */ - position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f); - velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt; - float3 relativeVelocity = velocity - velocityOfSurfacePoint; - - float3 p1 = normalize(cross(normalVector, segment)); - float3 p2 = normalize(cross(p1, normalVector)); - /* Full friction is sum of velocities in each direction of plane */ - float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2); - - /* Real friction is peak friction corrected by friction coefficients */ - frictionVector = frictionVector * (colliderFriction*clothFriction); - - float approachSpeed = dot(relativeVelocity, normalVector); - - if( approachSpeed <= 0.0f ) - forceOnVertex -= frictionVector; - } - - } - } - } else { - /* Update velocity */ - float3 difference = position.xyz - previousPosition.xyz; - velocity = difference*velocityCoefficient*isolverdt; - } - - g_vertexVelocities[nodeID] = (float4)(velocity, 0.f); - - /* Update external force */ - g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f); - - g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f); -} - -); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl deleted file mode 100644 index 4a08a56c3..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl +++ /dev/null @@ -1,55 +0,0 @@ - -MSTRINGIFY( - -/*#define float3 float4 - -float dot3(float3 a, float3 b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -}*/ - -__kernel void -SolvePositionsFromLinksKernel( - const int startLink, - const int numLinks, - const float kst, - const float ti, - __global int2 * g_linksVertexIndices, - __global float * g_linksMassLSC, - __global float * g_linksRestLengthSquared, - __global float * g_verticesInverseMass, - __global float4 * g_vertexPositions) - -{ - int linkID = get_global_id(0) + startLink; - if( get_global_id(0) < numLinks ) - { - float massLSC = g_linksMassLSC[linkID]; - float restLengthSquared = g_linksRestLengthSquared[linkID]; - - if( massLSC > 0.0f ) - { - int2 nodeIndices = g_linksVertexIndices[linkID]; - int node0 = nodeIndices.x; - int node1 = nodeIndices.y; - - float3 position0 = g_vertexPositions[node0].xyz; - float3 position1 = g_vertexPositions[node1].xyz; - - float inverseMass0 = g_verticesInverseMass[node0]; - float inverseMass1 = g_verticesInverseMass[node1]; - - float3 del = position1 - position0; - float len = dot(del, del); - float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; - position0 = position0 - del*(k*inverseMass0); - position1 = position1 + del*(k*inverseMass1); - - g_vertexPositions[node0] = (float4)(position0, 0.f); - g_vertexPositions[node1] = (float4)(position1, 0.f); - - } - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl deleted file mode 100644 index 9a45570ac..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl +++ /dev/null @@ -1,129 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -MSTRINGIFY( - -float mydot3(float4 a, float4 b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -} - -__kernel void -SolvePositionsFromLinksKernel( - const int startWaveInBatch, - const int numWaves, - const float kst, - const float ti, - __global int2 *g_wavefrontBatchCountsVertexCounts, - __global int *g_vertexAddressesPerWavefront, - __global int2 * g_linksVertexIndices, - __global float * g_linksMassLSC, - __global float * g_linksRestLengthSquared, - __global float * g_verticesInverseMass, - __global float4 * g_vertexPositions, - __local int2 *wavefrontBatchCountsVertexCounts, - __local float4 *vertexPositionSharedData, - __local float *vertexInverseMassSharedData) -{ - const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1)); - const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE); - const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER; - const int localWavefront = wavefront - firstWavefrontInBlock; - - // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier - if( wavefront < (startWaveInBatch + numWaves) ) - { - // Load the batch counts for the wavefronts - - int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront]; - int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; - int verticesUsedByWave = batchesAndVerticesWithinWavefront.y; - - // Load the vertices for the wavefronts - for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) - { - int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; - - vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress]; - vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress]; - } - - mem_fence(CLK_LOCAL_MEM_FENCE); - - // Loop through the batches performing the solve on each in LDS - int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE; - - //for( int batch = 0; batch < batchesWithinWavefront; ++batch ) - - int batch = 0; - do - { - int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch; - int locationOfValue = baseDataLocation + laneInWavefront; - - - // These loads should all be perfectly linear across the WF - int2 localVertexIndices = g_linksVertexIndices[locationOfValue]; - float massLSC = g_linksMassLSC[locationOfValue]; - float restLengthSquared = g_linksRestLengthSquared[locationOfValue]; - - // LDS vertex addresses based on logical wavefront number in block and loaded index - int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x; - int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y; - - float4 position0 = vertexPositionSharedData[vertexAddress0]; - float4 position1 = vertexPositionSharedData[vertexAddress1]; - - float inverseMass0 = vertexInverseMassSharedData[vertexAddress0]; - float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; - - float4 del = position1 - position0; - float len = mydot3(del, del); - - float k = 0; - if( massLSC > 0.0f ) - { - k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; - } - - position0 = position0 - del*(k*inverseMass0); - position1 = position1 + del*(k*inverseMass1); - - // Ensure compiler does not re-order memory operations - mem_fence(CLK_LOCAL_MEM_FENCE); - - vertexPositionSharedData[vertexAddress0] = position0; - vertexPositionSharedData[vertexAddress1] = position1; - - // Ensure compiler does not re-order memory operations - mem_fence(CLK_LOCAL_MEM_FENCE); - - - ++batch; - } while( batch < batchesWithinWavefront ); - - // Update the global memory vertices for the wavefronts - for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) - { - int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; - - g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f); - } - - } - -} - -); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl deleted file mode 100644 index 488a58479..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl +++ /dev/null @@ -1,44 +0,0 @@ -MSTRINGIFY( - -/*#define float3 float4 - -float dot3(float3 a, float3 b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -}*/ - -__kernel void -UpdateConstantsKernel( - const int numLinks, - __global int2 * g_linksVertexIndices, - __global float4 * g_vertexPositions, - __global float * g_vertexInverseMasses, - __global float * g_linksMaterialLSC, - __global float * g_linksMassLSC, - __global float * g_linksRestLengthSquared, - __global float * g_linksRestLengths) -{ - int linkID = get_global_id(0); - if( linkID < numLinks ) - { - int2 nodeIndices = g_linksVertexIndices[linkID]; - int node0 = nodeIndices.x; - int node1 = nodeIndices.y; - float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ]; - - float3 position0 = g_vertexPositions[node0].xyz; - float3 position1 = g_vertexPositions[node1].xyz; - float inverseMass0 = g_vertexInverseMasses[node0]; - float inverseMass1 = g_vertexInverseMasses[node1]; - - float3 difference = position0 - position1; - float length2 = dot(difference, difference); - float length = sqrt(length2); - - g_linksRestLengths[linkID] = length; - g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient; - g_linksRestLengthSquared[linkID] = length*length; - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl deleted file mode 100644 index cad4b8ad7..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl +++ /dev/null @@ -1,40 +0,0 @@ -MSTRINGIFY( - -//#define float3 float4 - -__kernel void -updateVelocitiesFromPositionsWithVelocitiesKernel( - int numNodes, - float isolverdt, - __global float4 * g_vertexPositions, - __global float4 * g_vertexPreviousPositions, - __global int * g_vertexClothIndices, - __global float *g_clothVelocityCorrectionCoefficients, - __global float * g_clothDampingFactor, - __global float4 * g_vertexVelocities, - __global float4 * g_vertexForces) -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - float3 position = g_vertexPositions[nodeID].xyz; - float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz; - float3 velocity = g_vertexVelocities[nodeID].xyz; - int clothIndex = g_vertexClothIndices[nodeID]; - float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex]; - float dampingFactor = g_clothDampingFactor[clothIndex]; - float velocityCoefficient = (1.f - dampingFactor); - - float3 difference = position - previousPosition; - - velocity += difference*velocityCorrectionCoefficient*isolverdt; - - // Damp the velocity - velocity *= velocityCoefficient; - - g_vertexVelocities[nodeID] = (float4)(velocity, 0.f); - g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl deleted file mode 100644 index 37c8b3fad..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl +++ /dev/null @@ -1,103 +0,0 @@ -MSTRINGIFY( - -//#define float3 float4 - -/*float length3(float3 a) -{ - a.w = 0; - return length(a); -} - -float normalize3(float3 a) -{ - a.w = 0; - return normalize(a); -}*/ - -__kernel void -ResetNormalsAndAreasKernel( - const unsigned int numNodes, - __global float4 * g_vertexNormals, - __global float * g_vertexArea) -{ - if( get_global_id(0) < numNodes ) - { - g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f); - g_vertexArea[get_global_id(0)] = 0.0f; - } -} - -__kernel void -UpdateSoftBodiesKernel( - const unsigned int startFace, - const unsigned int numFaces, - __global int4 * g_triangleVertexIndexSet, - __global float4 * g_vertexPositions, - __global float4 * g_vertexNormals, - __global float * g_vertexArea, - __global float4 * g_triangleNormals, - __global float * g_triangleArea) -{ - int faceID = get_global_id(0) + startFace; - if( get_global_id(0) < numFaces ) - { - int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ]; - int nodeIndex0 = triangleIndexSet.x; - int nodeIndex1 = triangleIndexSet.y; - int nodeIndex2 = triangleIndexSet.z; - - float3 node0 = g_vertexPositions[nodeIndex0].xyz; - float3 node1 = g_vertexPositions[nodeIndex1].xyz; - float3 node2 = g_vertexPositions[nodeIndex2].xyz; - float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz; - float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz; - float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz; - float vertexArea0 = g_vertexArea[nodeIndex0]; - float vertexArea1 = g_vertexArea[nodeIndex1]; - float vertexArea2 = g_vertexArea[nodeIndex2]; - - float3 vector0 = node1 - node0; - float3 vector1 = node2 - node0; - - float3 faceNormal = cross(vector0.xyz, vector1.xyz); - float triangleArea = length(faceNormal); - - nodeNormal0 = nodeNormal0 + faceNormal; - nodeNormal1 = nodeNormal1 + faceNormal; - nodeNormal2 = nodeNormal2 + faceNormal; - vertexArea0 = vertexArea0 + triangleArea; - vertexArea1 = vertexArea1 + triangleArea; - vertexArea2 = vertexArea2 + triangleArea; - - g_triangleNormals[faceID] = (float4)(normalize(faceNormal), 0.f); - g_vertexNormals[nodeIndex0] = (float4)(nodeNormal0, 0.f); - g_vertexNormals[nodeIndex1] = (float4)(nodeNormal1, 0.f); - g_vertexNormals[nodeIndex2] = (float4)(nodeNormal2, 0.f); - g_triangleArea[faceID] = triangleArea; - g_vertexArea[nodeIndex0] = vertexArea0; - g_vertexArea[nodeIndex1] = vertexArea1; - g_vertexArea[nodeIndex2] = vertexArea2; - } -} - -__kernel void -NormalizeNormalsAndAreasKernel( - const unsigned int numNodes, - __global int * g_vertexTriangleCount, - __global float4 * g_vertexNormals, - __global float * g_vertexArea) -{ - if( get_global_id(0) < numNodes ) - { - float4 normal = g_vertexNormals[get_global_id(0)]; - float area = g_vertexArea[get_global_id(0)]; - int numTriangles = g_vertexTriangleCount[get_global_id(0)]; - - float vectorLength = length(normal); - - g_vertexNormals[get_global_id(0)] = normalize(normal); - g_vertexArea[get_global_id(0)] = area/(float)(numTriangles); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl deleted file mode 100644 index ae7599a81..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl +++ /dev/null @@ -1,36 +0,0 @@ -MSTRINGIFY( - -//#define float3 float4 - -__kernel void -updateVelocitiesFromPositionsWithoutVelocitiesKernel( - const int numNodes, - const float isolverdt, - __global float4 * g_vertexPositions, - __global float4 * g_vertexPreviousPositions, - __global int * g_vertexClothIndices, - __global float * g_clothDampingFactor, - __global float4 * g_vertexVelocities, - __global float4 * g_vertexForces) - -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - float3 position = g_vertexPositions[nodeID].xyz; - float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz; - float3 velocity = g_vertexVelocities[nodeID].xyz; - int clothIndex = g_vertexClothIndices[nodeID]; - float dampingFactor = g_clothDampingFactor[clothIndex]; - float velocityCoefficient = (1.f - dampingFactor); - - float3 difference = position - previousPosition; - - velocity = difference*velocityCoefficient*isolverdt; - - g_vertexVelocities[nodeID] = (float4)(velocity, 0.f); - g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl deleted file mode 100644 index a3c945186..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl +++ /dev/null @@ -1,26 +0,0 @@ -MSTRINGIFY( - -//#define float3 float4 - -__kernel void -UpdatePositionsFromVelocitiesKernel( - const int numNodes, - const float solverSDT, - __global float4 * g_vertexVelocities, - __global float4 * g_vertexPreviousPositions, - __global float4 * g_vertexCurrentPosition) -{ - int vertexID = get_global_id(0); - if( vertexID < numNodes ) - { - float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz; - float3 velocity = g_vertexVelocities[vertexID].xyz; - - float3 newPosition = previousPosition + velocity*solverSDT; - - g_vertexCurrentPosition[vertexID] = (float4)(newPosition, 0.f); - g_vertexPreviousPositions[vertexID] = (float4)(newPosition, 0.f); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl deleted file mode 100644 index b7345e379..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl +++ /dev/null @@ -1,45 +0,0 @@ -MSTRINGIFY( - -__kernel void -VSolveLinksKernel( - int startLink, - int numLinks, - float kst, - __global int2 * g_linksVertexIndices, - __global float * g_linksLengthRatio, - __global float4 * g_linksCurrentLength, - __global float * g_vertexInverseMass, - __global float4 * g_vertexVelocity) -{ - int linkID = get_global_id(0) + startLink; - if( get_global_id(0) < numLinks ) - { - int2 nodeIndices = g_linksVertexIndices[linkID]; - int node0 = nodeIndices.x; - int node1 = nodeIndices.y; - - float linkLengthRatio = g_linksLengthRatio[linkID]; - float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz; - - float3 vertexVelocity0 = g_vertexVelocity[node0].xyz; - float3 vertexVelocity1 = g_vertexVelocity[node1].xyz; - - float vertexInverseMass0 = g_vertexInverseMass[node0]; - float vertexInverseMass1 = g_vertexInverseMass[node1]; - - float3 nodeDifference = vertexVelocity0 - vertexVelocity1; - float dotResult = dot(linkCurrentLength, nodeDifference); - float j = -dotResult*linkLengthRatio*kst; - - float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0); - float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1); - - vertexVelocity0 += velocityChange0; - vertexVelocity1 -= velocityChange1; - - g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f); - g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f); - } -} - -); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl index 8f472d121..3d4d61097 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl @@ -6,6 +6,18 @@ float adot3(float4 a, float4 b) return a.x*b.x + a.y*b.y + a.z*b.z; } +float alength3(float4 a) +{ + a.w = 0; + return length(a); +} + +float4 anormalize3(float4 a) +{ + a.w = 0; + return normalize(a); +} + float4 projectOnAxis( float4 v, float4 a ) { return (a*adot3(v, a)); @@ -53,36 +65,35 @@ ApplyForcesKernel( g_vertexVelocity[nodeID] = nodeV; - float4 relativeWindVelocity = nodeV - clothWindVelocity; - float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity); + // Aerodynamics + float4 rel_v = nodeV - clothWindVelocity; + float rel_v_len = alength3(rel_v); + float rel_v2 = dot(rel_v, rel_v); - if( relativeSpeedSquared > epsilon ) + if( rel_v2 > epsilon ) { - // Correct direction of normal relative to wind direction and get dot product - normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f); - float dvNormal = dot(normal, relativeWindVelocity); - if( dvNormal > 0 ) - { - float4 force = (float4)(0.f, 0.f, 0.f, 0.f); - float c0 = area * dvNormal * relativeSpeedSquared / 2.f; - float c1 = c0 * mediumDensity; - force += normal * (-c1 * liftFactor); - force += normalize(relativeWindVelocity)*(-c1 * dragFactor); - - float dtim = solverdt * nodeIM; - float4 forceDTIM = force * dtim; - - float4 nodeFPlusForce = nodeF + force; - - // m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim; - float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim); - - nodeF = nodeFPlusForce; - //if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) - // nodeF = nodeFMinus; + float4 rel_v_nrm = anormalize3(rel_v); + float4 nrm = normal; + nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f); + + float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f); + float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f); + + float n_dot_v = dot(nrm, rel_v_nrm); + + // drag force + if ( dragFactor > 0.f ) + fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm; + + // lift force + // Check angle of attack + // cos(10º) = 0.98480 + if ( 0 < n_dot_v && n_dot_v < 0.98480f) + fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm)); + + nodeF += fDrag + fLift; g_vertexForceAccumulator[nodeID] = nodeF; - } } } } diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl new file mode 100644 index 000000000..c631f1662 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl @@ -0,0 +1,25 @@ +MSTRINGIFY( + +__kernel void +UpdateFixedVertexPositions( + const uint numNodes, + __global int * g_anchorIndex, + __global float4 * g_vertexPositions, + __global float4 * g_anchorPositions GUID_ARG) +{ + unsigned int nodeID = get_global_id(0); + + if( nodeID < numNodes ) + { + int anchorIndex = g_anchorIndex[nodeID]; + float4 position = g_vertexPositions[nodeID]; + + if ( anchorIndex >= 0 ) + { + float4 anchorPosition = g_anchorPositions[anchorIndex]; + g_vertexPositions[nodeID] = anchorPosition; + } + } +} + +); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp index b00795a9a..e18b575cb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp @@ -5,17 +5,10 @@ #include "btSoftBodySolverVertexBuffer_OpenGL.h" #include "BulletSoftBody/btSoftBody.h" -#if (0)//CL_VERSION_1_1 == 1) - //OpenCL 1.1 kernels use float3 -#define MSTRINGIFY(A) #A -static char* OutputToVertexArrayCLString = -#include "OpenCLC/OutputToVertexArray.cl" -#else ////OpenCL 1.0 kernels don't use float3 #define MSTRINGIFY(A) #A static char* OutputToVertexArrayCLString = #include "OpenCLC10/OutputToVertexArray.cl" -#endif //CL_VERSION_1_1 #define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }} @@ -105,8 +98,8 @@ bool btSoftBodySolverOutputCLtoGL::buildShaders() if( m_shadersInitialized ) return true; - outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ); - outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ); + outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl"); + outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl"); if( returnVal ) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h index a077978cb..3420c3f02 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h @@ -45,7 +45,7 @@ public: virtual bool moveToAccelerator(); - virtual bool moveFromAccelerator(); + virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true); }; diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp index 5099c79af..b45cc6bce 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp @@ -20,10 +20,13 @@ subject to the following restrictions: #include "btSoftBodySolver_OpenCL.h" #include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" #include "BulletSoftBody/btSoftBody.h" +#include "BulletSoftBody/btSoftBodyInternals.h" #include "BulletCollision/CollisionShapes/btCapsuleShape.h" +#include "BulletCollision/COllisionShapes/btSphereShape.h" #include "LinearMath/btQuickprof.h" #include + #define BT_SUPPRESS_OPENCL_ASSERTS #ifdef USE_MINICL @@ -36,8 +39,7 @@ subject to the following restrictions: #endif //__APPLE__ #endif//USE_MINICL -#define BT_DEFAULT_WORKGROUPSIZE 128 - +#define BT_DEFAULT_WORKGROUPSIZE 64 #define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }} @@ -45,32 +47,6 @@ subject to the following restrictions: //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it -#if (0)//CL_VERSION_1_1 == 1) - //OpenCL 1.1 kernels use float3 -#define MSTRINGIFY(A) #A -static const char* PrepareLinksCLString = -#include "OpenCLC/PrepareLinks.cl" -static const char* UpdatePositionsFromVelocitiesCLString = -#include "OpenCLC/UpdatePositionsFromVelocities.cl" -static const char* SolvePositionsCLString = -#include "OpenCLC/SolvePositions.cl" -static const char* UpdateNodesCLString = -#include "OpenCLC/UpdateNodes.cl" -static const char* UpdatePositionsCLString = -#include "OpenCLC/UpdatePositions.cl" -static const char* UpdateConstantsCLString = -#include "OpenCLC/UpdateConstants.cl" -static const char* IntegrateCLString = -#include "OpenCLC/Integrate.cl" -static const char* ApplyForcesCLString = -#include "OpenCLC/ApplyForces.cl" -static const char* UpdateNormalsCLString = -#include "OpenCLC/UpdateNormals.cl" -static const char* VSolveLinksCLString = -#include "OpenCLC/VSolveLinks.cl" -static const char* SolveCollisionsAndUpdateVelocitiesCLString = -#include "OpenCLC/SolveCollisionsAndUpdateVelocities.cl" -#else ////OpenCL 1.0 kernels don't use float3 #define MSTRINGIFY(A) #A static const char* PrepareLinksCLString = @@ -89,13 +65,14 @@ static const char* IntegrateCLString = #include "OpenCLC10/Integrate.cl" static const char* ApplyForcesCLString = #include "OpenCLC10/ApplyForces.cl" +static const char* UpdateFixedVertexPositionsCLString = +#include "OpenCLC10/UpdateFixedVertexPositions.cl" static const char* UpdateNormalsCLString = #include "OpenCLC10/UpdateNormals.cl" static const char* VSolveLinksCLString = #include "OpenCLC10/VSolveLinks.cl" static const char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" -#endif //CL_VERSION_1_1 btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) : @@ -141,18 +118,42 @@ bool btSoftBodyVertexDataOpenCL::moveToAccelerator() return success; } -bool btSoftBodyVertexDataOpenCL::moveFromAccelerator() +bool btSoftBodyVertexDataOpenCL::moveFromAccelerator(bool bCopy, bool bCopyMinimum) { bool success = true; - success = success && m_clClothIdentifier.moveFromGPU(); - success = success && m_clVertexPosition.moveFromGPU(); - success = success && m_clVertexPreviousPosition.moveFromGPU(); - success = success && m_clVertexVelocity.moveFromGPU(); - success = success && m_clVertexForceAccumulator.moveFromGPU(); - success = success && m_clVertexNormal.moveFromGPU(); - success = success && m_clVertexInverseMass.moveFromGPU(); - success = success && m_clVertexArea.moveFromGPU(); - success = success && m_clVertexTriangleCount.moveFromGPU(); + + if (!bCopy) + { + success = success && m_clClothIdentifier.moveFromGPU(); + success = success && m_clVertexPosition.moveFromGPU(); + success = success && m_clVertexPreviousPosition.moveFromGPU(); + success = success && m_clVertexVelocity.moveFromGPU(); + success = success && m_clVertexForceAccumulator.moveFromGPU(); + success = success && m_clVertexNormal.moveFromGPU(); + success = success && m_clVertexInverseMass.moveFromGPU(); + success = success && m_clVertexArea.moveFromGPU(); + success = success && m_clVertexTriangleCount.moveFromGPU(); + } + else + { + if (bCopyMinimum) + { + success = success && m_clVertexPosition.copyFromGPU(); + success = success && m_clVertexNormal.copyFromGPU(); + } + else + { + success = success && m_clClothIdentifier.copyFromGPU(); + success = success && m_clVertexPosition.copyFromGPU(); + success = success && m_clVertexPreviousPosition.copyFromGPU(); + success = success && m_clVertexVelocity.copyFromGPU(); + success = success && m_clVertexForceAccumulator.copyFromGPU(); + success = success && m_clVertexNormal.copyFromGPU(); + success = success && m_clVertexInverseMass.copyFromGPU(); + success = success && m_clVertexArea.copyFromGPU(); + success = success && m_clVertexTriangleCount.copyFromGPU(); + } + } if( success ) m_onGPU = true; @@ -160,9 +161,6 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator() return success; } - - - btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx) :m_cqCommandQue(queue), m_clLinks( queue, ctx, &m_links, false ), @@ -602,11 +600,12 @@ void btSoftBodyTriangleDataOpenCL::generateBatches() -btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) : +btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) : m_linkData(queue, ctx), m_vertexData(queue, ctx), m_triangleData(queue, ctx), - m_clFunctions(queue, ctx), + m_defaultCLFunctions(queue, ctx), + m_currentCLFunctions(&m_defaultCLFunctions), m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ), m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ), m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ), @@ -617,10 +616,14 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ), m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ), m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ), + m_clAnchorPosition( queue, ctx, &m_anchorPosition, true ), + m_clAnchorIndex( queue, ctx, &m_anchorIndex, true), m_cqCommandQue( queue ), m_cxMainContext(ctx), - m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE) + m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE), + m_bUpdateAnchoredNodePos(bUpdateAchchoredNodePos) { + // Initial we will clearly need to update solver constants // For now this is global for the cloths linked with this solver - we should probably make this body specific // for performance in future once we understand more clearly when constants need to be updated @@ -643,6 +646,7 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex m_normalizeNormalsAndAreasKernel = 0; m_outputToVertexArrayKernel = 0; m_applyForcesKernel = 0; + m_updateFixedVertexPositionsKernel = 0; } btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver() @@ -666,14 +670,16 @@ void btOpenCLSoftBodySolver::releaseKernels() RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel ); RELEASE_CL_KERNEL( m_outputToVertexArrayKernel ); RELEASE_CL_KERNEL( m_applyForcesKernel ); + RELEASE_CL_KERNEL( m_updateFixedVertexPositionsKernel ); m_shadersInitialized = false; } -void btOpenCLSoftBodySolver::copyBackToSoftBodies() +void btOpenCLSoftBodySolver::copyBackToSoftBodies(bool bMove) { + // Move the vertex data back to the host first - m_vertexData.moveFromAccelerator(); + m_vertexData.moveFromAccelerator(!bMove); // Loop over soft bodies, copying all the vertex positions back for each body in turn for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) @@ -688,17 +694,18 @@ void btOpenCLSoftBodySolver::copyBackToSoftBodies() for( int vertex = 0; vertex < numVertices; ++vertex ) { using Vectormath::Aos::Point3; - Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] ); + Point3 vertexPosition( m_vertexData.getVertexPositions()[firstVertex + vertex] ); + Point3 normal(m_vertexData.getNormal(firstVertex + vertex)); softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() ); softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() ); softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() ); - softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() ); - softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() ); - softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() ); + softBody->m_nodes[vertex].m_n.setX( normal.getX() ); + softBody->m_nodes[vertex].m_n.setY( normal.getY() ); + softBody->m_nodes[vertex].m_n.setZ( normal.getZ() ); } - } + } } // btOpenCLSoftBodySolver::copyBackToSoftBodies void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate ) @@ -710,7 +717,10 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof getTriangleData().clear(); getLinkData().clear(); m_softBodySet.resize(0); + m_anchorIndex.clear(); + int maxPiterations = 0; + int maxViterations = 0; for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) { @@ -759,6 +769,8 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof float vertexInverseMass = softBody->m_nodes[vertex].m_im; desc.setInverseMass(vertexInverseMass); getVertexData().setVertexAt( desc, firstVertex + vertex ); + + m_anchorIndex.push_back(-1.0); } // Copy triangles similarly @@ -805,13 +817,76 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof newSoftBody->setMaxTriangles( maxTriangles ); newSoftBody->setFirstLink( firstLink ); newSoftBody->setNumLinks( numLinks ); + + // Find maximum piterations and viterations + int piterations = softBody->m_cfg.piterations; + + if ( piterations > maxPiterations ) + maxPiterations = piterations; + + int viterations = softBody->m_cfg.viterations; + + if ( viterations > maxViterations ) + maxViterations = viterations; + + // zero mass + for( int vertex = 0; vertex < numVertices; ++vertex ) + { + if ( softBody->m_nodes[vertex].m_im == 0 ) + { + AnchorNodeInfoCL nodeInfo; + nodeInfo.clVertexIndex = firstVertex + vertex; + nodeInfo.pNode = &softBody->m_nodes[vertex]; + + m_anchorNodeInfoArray.push_back(nodeInfo); + } + } + + // anchor position + if ( numVertices > 0 ) + { + for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ ) + { + btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node; + btSoftBody::Node* firstNode = &softBody->m_nodes[0]; + + AnchorNodeInfoCL nodeInfo; + nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode); + nodeInfo.pNode = anchorNode; + + m_anchorNodeInfoArray.push_back(nodeInfo); + } + } } + + m_anchorPosition.clear(); + m_anchorPosition.resize(m_anchorNodeInfoArray.size()); - + for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ ) + { + const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode]; + m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode; + getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f; + } + updateConstants(0.f); + // set position and velocity iterations + setNumberOfPositionIterations(maxPiterations); + setNumberOfVelocityIterations(maxViterations); + // set wind velocity + m_perClothWindVelocity.resize( m_softBodySet.size() ); + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); + m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); + } + + m_clPerClothWindVelocity.changedOnCPU(); + + // generate batches m_linkData.generateBatches(); m_triangleData.generateBatches(); @@ -861,7 +936,6 @@ void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices ) void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) { - cl_int ciErrNum; ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices); @@ -882,7 +956,6 @@ void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) { - cl_int ciErrNum; ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle); ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles); @@ -948,16 +1021,41 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath } } +void btOpenCLSoftBodySolver::updateFixedVertexPositions() +{ + // Ensure data is on accelerator + m_vertexData.moveToAccelerator(); + m_clAnchorPosition.moveToGPU(); + m_clAnchorIndex.moveToGPU(); + + cl_int ciErrNum ; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,1, sizeof(cl_mem), &m_clAnchorIndex.m_buffer); + ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,3, sizeof(cl_mem), &m_clAnchorPosition.m_buffer); + + size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize); + if (numWorkItems) + { + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateFixedVertexPositionsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0); + if( ciErrNum != CL_SUCCESS ) + { + btAssert( 0 && "enqueueNDRangeKernel(m_updateFixedVertexPositionsKernel)"); + } + } + +} + void btOpenCLSoftBodySolver::applyForces( float solverdt ) { - // Ensure data is on accelerator m_vertexData.moveToAccelerator(); m_clPerClothAcceleration.moveToGPU(); m_clPerClothLiftFactor.moveToGPU(); m_clPerClothDragFactor.moveToGPU(); m_clPerClothMediumDensity.moveToGPU(); - m_clPerClothWindVelocity.moveToGPU(); + m_clPerClothWindVelocity.moveToGPU(); cl_int ciErrNum ; int numVerts = m_vertexData.getNumVertices(); @@ -976,6 +1074,7 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer); ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize); if (numWorkItems) { @@ -993,8 +1092,6 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) */ void btOpenCLSoftBodySolver::integrate( float solverdt ) { - - // Ensure data is on accelerator m_vertexData.moveToAccelerator(); @@ -1186,7 +1283,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); } - // Solve drift + // Solve position for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) { for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) @@ -1210,7 +1307,6 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) // Kernel dispatches void btOpenCLSoftBodySolver::prepareLinks() { - cl_int ciErrNum; int numLinks = m_linkData.getNumLinks(); ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks); @@ -1231,7 +1327,6 @@ void btOpenCLSoftBodySolver::prepareLinks() void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) { - cl_int ciErrNum; int numVerts = m_vertexData.getNumVertices(); ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts); @@ -1251,7 +1346,6 @@ void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti ) { - cl_int ciErrNum; ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink); ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks); @@ -1275,7 +1369,6 @@ void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst ) { - cl_int ciErrNum; ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink); ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks); @@ -1297,7 +1390,6 @@ void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) { - cl_int ciErrNum; int numVerts = m_vertexData.getNumVertices(); ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts); @@ -1322,7 +1414,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) { - cl_int ciErrNum; int numVerts = m_vertexData.getNumVertices(); ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts); @@ -1347,7 +1438,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( flo void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt ) { - // Copy kernel parameters to GPU m_vertexData.moveToAccelerator(); m_clPerClothFriction.moveToGPU(); @@ -1355,7 +1445,6 @@ void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt m_clPerClothCollisionObjects.moveToGPU(); m_clCollisionObjectDetails.moveToGPU(); - cl_int ciErrNum; int numVerts = m_vertexData.getNumVertices(); ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts); @@ -1446,7 +1535,7 @@ void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody -cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ) +cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ,const char* orgSrcFileNameForCaching) { printf("compiling kernelName: %s ",kernelName); cl_kernel kernel=0; @@ -1552,24 +1641,29 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep ) // Ensure that the DX11 ones are moved off the device so they will be updated correctly m_clCollisionObjectDetails.changedOnCPU(); m_clPerClothCollisionObjects.changedOnCPU(); - m_collisionObjectDetails.clear(); - - { - BT_PROFILE("perClothWindVelocity"); - // Fill the force arrays with current acceleration data etc - m_perClothWindVelocity.resize( m_softBodySet.size() ); - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); - - m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); - } - } - { - BT_PROFILE("changedOnCPU"); - m_clPerClothWindVelocity.changedOnCPU(); - } + m_collisionObjectDetails.clear(); + if ( m_bUpdateAnchoredNodePos ) + { + // In OpenCL cloth solver, if softbody node has zero inverse mass(infinite mass) or anchor attached, + // we need to update the node position in case the node or anchor is animated externally. + // If there is no such node, we can eliminate the unnecessary CPU-to-GPU data trasferring. + for ( int i = 0; i < m_anchorNodeInfoArray.size(); i++ ) + { + const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[i]; + btSoftBody::Node* node = anchorNodeInfo.pNode; + + using Vectormath::Aos::Point3; + Point3 pos((float)node->m_x.getX(), (float)node->m_x.getY(), (float)node->m_x.getZ()); + m_anchorPosition[i] = pos; + } + + if ( m_anchorNodeInfoArray.size() > 0 ) + m_clAnchorPosition.changedOnCPU(); + + updateFixedVertexPositions(); + } + { BT_PROFILE("applyForces"); // Apply forces that we know about to the cloths @@ -1601,7 +1695,7 @@ static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform ) void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound ) { - float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin(); + float scalarMargin = (float)getSoftBody()->getCollisionShape()->getMargin(); btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin ); m_softBody->m_bounds[0] = lowerBound - vectorMargin; m_softBody->m_bounds[1] = upperBound + vectorMargin; @@ -1641,7 +1735,8 @@ void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollision newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity()); m_collisionObjectDetails.push_back( newCollisionShapeDescription ); - } else { + } + else { printf("Unsupported collision shape type\n"); //btAssert(0 && "Unsupported collision shape type\n"); } @@ -1688,32 +1783,36 @@ bool btOpenCLSoftBodySolver::checkInitialized() bool btOpenCLSoftBodySolver::buildShaders() { + if( m_shadersInitialized ) + return true; + + const char* additionalMacros=""; + // Ensure current kernels are released first releaseKernels(); - if( m_shadersInitialized ) - return true; - - m_clFunctions.clearKernelCompilationFailures(); + m_currentCLFunctions->clearKernelCompilationFailures(); - m_prepareLinksKernel = m_clFunctions.compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel" ); - m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ); - m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" ); - m_vSolveLinksKernel = m_clFunctions.compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ); - m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ); - m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ); - m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ); - m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ); - m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ); + m_prepareLinksKernel = m_currentCLFunctions->compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel",additionalMacros,"OpenCLC10/PrepareLinks.cl" ); + m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl"); + m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel",additionalMacros,"OpenCLC10/SolvePositions.cl" ); + m_vSolveLinksKernel = m_currentCLFunctions->compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ,additionalMacros,"OpenCLC10/VSolveLinks.cl"); + m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdateNodes.cl"); + m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositions.cl"); + m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ,additionalMacros,"OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"); + m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ,additionalMacros,"OpenCLC10/Integrate.cl"); + m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ,additionalMacros,"OpenCLC10/ApplyForces.cl"); + m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" , additionalMacros, "OpenCLC10/UpdateFixedVertexPositions.cl"); // TODO: Rename to UpdateSoftBodies - m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ); - m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ); - m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ); + m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl"); + m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl"); + m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl"); - if( m_clFunctions.getKernelCompilationFailures()==0 ) + if( m_currentCLFunctions->getKernelCompilationFailures()==0 ) m_shadersInitialized = true; return m_shadersInitialized; } + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h index d1c4940d3..4c9c8e90e 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h @@ -20,6 +20,7 @@ subject to the following restrictions: #include "vectormath/vmInclude.h" #include "BulletSoftBody/btSoftBodySolvers.h" +#include "BulletSoftBody/btSoftBody.h" #include "btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverLinkData_OpenCL.h" #include "btSoftBodySolverVertexData_OpenCL.h" @@ -33,6 +34,7 @@ protected: int m_kernelCompilationFailures; + public: CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) : m_cqCommandQue( cqCommandQue ), @@ -49,7 +51,7 @@ public: /** * Compile a compute shader kernel from a string and return the appropriate cl_kernel object. */ - cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros = "" ); + virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching); void clearKernelCompilationFailures() { @@ -285,7 +287,8 @@ public: protected: - CLFunctions m_clFunctions; + CLFunctions m_defaultCLFunctions; + CLFunctions* m_currentCLFunctions; /** Variable to define whether we need to update solver constants on the next iteration */ bool m_updateSolverConstants; @@ -350,7 +353,20 @@ protected: btAlignedObjectArray< float > m_perClothFriction; btOpenCLBuffer< float > m_clPerClothFriction; + // anchor node info + struct AnchorNodeInfoCL + { + int clVertexIndex; + btSoftBody::Node* pNode; + }; + btAlignedObjectArray m_anchorNodeInfoArray; + btAlignedObjectArray m_anchorPosition; + btOpenCLBuffer m_clAnchorPosition; + btAlignedObjectArray m_anchorIndex; + btOpenCLBuffer m_clAnchorIndex; + + bool m_bUpdateAnchoredNodePos; cl_kernel m_prepareLinksKernel; cl_kernel m_solvePositionsFromLinksKernel; @@ -368,6 +384,7 @@ protected: cl_kernel m_outputToVertexArrayKernel; cl_kernel m_applyForcesKernel; + cl_kernel m_updateFixedVertexPositionsKernel; cl_command_queue m_cqCommandQue; cl_context m_cxMainContext; @@ -394,6 +411,8 @@ protected: virtual void applyForces( float solverdt ); + void updateFixedVertexPositions(); + /** * Integrate motion on the solver. */ @@ -430,7 +449,7 @@ protected: void releaseKernels(); public: - btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx); + btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false); virtual ~btOpenCLSoftBodySolver(); @@ -456,7 +475,7 @@ public: virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false); - virtual void copyBackToSoftBodies(); + virtual void copyBackToSoftBodies(bool bMove = true); virtual void solveConstraints( float solverdt ); @@ -474,7 +493,15 @@ public: { return m_defaultWorkGroupSize; } - + + void setCLFunctions(CLFunctions* funcs) + { + if (funcs) + m_currentCLFunctions = funcs; + else + m_currentCLFunctions = &m_defaultCLFunctions; + } + }; // btOpenCLSoftBodySolver diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp index b320926c5..e9c6f7de4 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp @@ -33,32 +33,6 @@ static const size_t workGroupSize = GROUP_SIZE; //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it -#if (0)//CL_VERSION_1_1 == 1) - //OpenCL 1.1 kernels use float3 -#define MSTRINGIFY(A) #A -static const char* UpdatePositionsFromVelocitiesCLString = -#include "OpenCLC/UpdatePositionsFromVelocities.cl" -static const char* SolvePositionsCLString = -#include "OpenCLC/SolvePositionsSIMDBatched.cl" -static const char* UpdateNodesCLString = -#include "OpenCLC/UpdateNodes.cl" -static const char* UpdatePositionsCLString = -#include "OpenCLC/UpdatePositions.cl" -static const char* UpdateConstantsCLString = -#include "OpenCLC/UpdateConstants.cl" -static const char* IntegrateCLString = -#include "OpenCLC/Integrate.cl" -static const char* ApplyForcesCLString = -#include "OpenCLC/ApplyForces.cl" -static const char* UpdateNormalsCLString = -#include "OpenCLC/UpdateNormals.cl" -static const char* VSolveLinksCLString = -#include "OpenCLC/VSolveLinks.cl" -static const char* SolveCollisionsAndUpdateVelocitiesCLString = -#include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl" -static const char* OutputToVertexArrayCLString = -#include "OpenCLC/OutputToVertexArray.cl" -#else ////OpenCL 1.0 kernels don't use float3 #define MSTRINGIFY(A) #A static const char* UpdatePositionsFromVelocitiesCLString = @@ -75,6 +49,8 @@ static const char* IntegrateCLString = #include "OpenCLC10/Integrate.cl" static const char* ApplyForcesCLString = #include "OpenCLC10/ApplyForces.cl" +static const char* UpdateFixedVertexPositionsCLString = +#include "OpenCLC10/UpdateFixedVertexPositions.cl" static const char* UpdateNormalsCLString = #include "OpenCLC10/UpdateNormals.cl" static const char* VSolveLinksCLString = @@ -83,7 +59,6 @@ static const char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl" static const char* OutputToVertexArrayCLString = #include "OpenCLC10/OutputToVertexArray.cl" -#endif //CL_VERSION_1_1 @@ -194,8 +169,8 @@ bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator() -btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx) : - btOpenCLSoftBodySolver( queue, ctx ), +btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) : + btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ), m_linkData(queue, ctx) { // Initial we will clearly need to update solver constants @@ -213,14 +188,17 @@ btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware() void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate) { - if( forceUpdate|| m_softBodySet.size() != softBodies.size() ) + if( forceUpdate || m_softBodySet.size() != softBodies.size() ) { // Have a change in the soft body set so update, reloading all the data getVertexData().clear(); getTriangleData().clear(); getLinkData().clear(); m_softBodySet.resize(0); + m_anchorIndex.clear(); + int maxPiterations = 0; + int maxViterations = 0; for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) { @@ -238,8 +216,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); - - + // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time m_perClothFriction.push_back( softBody->getFriction() ); m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) ); @@ -253,6 +230,7 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody // Allocate space for new vertices in all the vertex arrays getVertexData().createVertices( numVertices, softBodyIndex, maxVertices ); + int firstTriangle = getTriangleData().getNumTriangles(); int numTriangles = softBody->m_faces.size(); int maxTriangles = numTriangles; @@ -272,6 +250,8 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody float vertexInverseMass = softBody->m_nodes[vertex].m_im; desc.setInverseMass(vertexInverseMass); getVertexData().setVertexAt( desc, firstVertex + vertex ); + + m_anchorIndex.push_back(-1.0); } // Copy triangles similarly @@ -318,17 +298,78 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody newSoftBody->setMaxTriangles( maxTriangles ); newSoftBody->setFirstLink( firstLink ); newSoftBody->setNumLinks( numLinks ); + + // Find maximum piterations and viterations + int piterations = softBody->m_cfg.piterations; + + if ( piterations > maxPiterations ) + maxPiterations = piterations; + + int viterations = softBody->m_cfg.viterations; + + if ( viterations > maxViterations ) + maxViterations = viterations; + + // zero mass + for( int vertex = 0; vertex < numVertices; ++vertex ) + { + if ( softBody->m_nodes[vertex].m_im == 0 ) + { + AnchorNodeInfoCL nodeInfo; + nodeInfo.clVertexIndex = firstVertex + vertex; + nodeInfo.pNode = &softBody->m_nodes[vertex]; + + m_anchorNodeInfoArray.push_back(nodeInfo); + } + } + + // anchor position + if ( numVertices > 0 ) + { + for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ ) + { + btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node; + btSoftBody::Node* firstNode = &softBody->m_nodes[0]; + + AnchorNodeInfoCL nodeInfo; + nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode); + nodeInfo.pNode = anchorNode; + + m_anchorNodeInfoArray.push_back(nodeInfo); + } + } } + m_anchorPosition.clear(); + m_anchorPosition.resize(m_anchorNodeInfoArray.size()); - + for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ ) + { + const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode]; + m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode; + getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f; + } + updateConstants(0.f); + // set position and velocity iterations + setNumberOfPositionIterations(maxPiterations); + setNumberOfVelocityIterations(maxViterations); + // set wind velocity + m_perClothWindVelocity.resize( m_softBodySet.size() ); + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); + m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); + } + + m_clPerClothWindVelocity.changedOnCPU(); + + // generate batches m_linkData.generateBatches(); m_triangleData.generateBatches(); - // Build the shaders to match the batching parameters buildShaders(); } @@ -509,7 +550,9 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders() if( m_shadersInitialized ) return true; - m_clFunctions.clearKernelCompilationFailures(); + const char* additionalMacros=""; + + m_currentCLFunctions->clearKernelCompilationFailures(); char *wavefrontMacros = new char[256]; @@ -522,22 +565,23 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders() WAVEFRONT_BLOCK_MULTIPLIER, WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize()); - m_updatePositionsFromVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", "" ); - m_solvePositionsFromLinksKernel = m_clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ); - m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", "" ); - m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", "" ); - m_integrateKernel = m_clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" ); - m_applyForcesKernel = m_clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" ); - m_solveCollisionsAndUpdateVelocitiesKernel = m_clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" ); + m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl"); + m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl"); + m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl"); + m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl"); + m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl"); + m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" ); + m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl"); + m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"); // TODO: Rename to UpdateSoftBodies - m_resetNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" ); - m_normalizeNormalsAndAreasKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", "" ); - m_updateSoftBodiesKernel = m_clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", "" ); + m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl"); + m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl"); + m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl"); delete [] wavefrontMacros; - if( m_clFunctions.getKernelCompilationFailures()==0) + if( m_currentCLFunctions->getKernelCompilationFailures()==0) { m_shadersInitialized = true; } diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h index 507a4d354..9dda3ed97 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h @@ -39,7 +39,7 @@ protected: - bool buildShaders(); + virtual bool buildShaders(); void updateConstants( float timeStep ); @@ -59,7 +59,7 @@ protected: ///////////////////////////////////// public: - btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx); + btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context ctx, bool bUpdateAchchoredNodePos = false); virtual ~btOpenCLSoftBodySolverSIMDAware(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h index eb4e98b08..e7d715b7b 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h @@ -561,10 +561,14 @@ public: } /** - * Move data from host memory from the accelerator. + * Move data to host memory from the accelerator if bCopy is false. + * If bCopy is true, copy data to host memory from the accelerator so that data + * won't be moved to accelerator when moveToAccelerator() is called next time. + * If bCopyMinimum is true, only vertex position and normal are copied. + * bCopyMinimum will be meaningful only if bCopy is true. * The CPU version will always return that it has moved it. */ - virtual bool moveFromAccelerator() + virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true) { return true; }