added OpenCL cloth demo, contributed by AMD.

updated GpuSoftBodySolvers
updated DirectCompute cloth demo
This commit is contained in:
erwin.coumans
2010-08-14 00:56:17 +00:00
parent 40958f2b4a
commit 4f9b450200
72 changed files with 7524 additions and 843 deletions

View File

@@ -14,7 +14,7 @@ IF(BUILD_CPU_DEMOS)
CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo
DoublePrecisionDemo ConcaveDemo CollisionDemo DoublePrecisionDemo ConcaveDemo CollisionDemo
ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo
MultiMaterialDemo SerializeDemo InternalEdgeDemo MultiMaterialDemo SerializeDemo InternalEdgeDemo
) )
ELSE() ELSE()
SET(SharedDemoSubdirs SET(SharedDemoSubdirs
@@ -28,6 +28,7 @@ ENDIF()
MultiThreadedDemo MultiThreadedDemo
VectorAdd_OpenCL VectorAdd_OpenCL
ParticlesOpenCL ParticlesOpenCL
OpenCLClothDemo
) )
ELSE (USE_GLUT) ELSE (USE_GLUT)

View File

@@ -1,6 +1,6 @@
/* /*
Bullet Continuous Collision Detection and Physics Library Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty. This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software. In no event will the authors be held liable for any damages arising from the use of this software.
@@ -13,6 +13,8 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
#ifndef BT_DIRECT_COMPUTE_SUPPORT_HPP #ifndef BT_DIRECT_COMPUTE_SUPPORT_HPP
#define BT_DIRECT_COMPUTE_SUPPORT_HPP #define BT_DIRECT_COMPUTE_SUPPORT_HPP

View File

@@ -1,3 +1,18 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
class cap class cap
{ {

View File

@@ -1,4 +1,22 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <fstream>
#include <iostream>
#include <iomanip>
class piece_of_cloth class piece_of_cloth
{ {
@@ -171,7 +189,8 @@ public:
pd3dImmediateContext->PSSetShaderResources(0,1,&texture2D_view); pd3dImmediateContext->PSSetShaderResources(0,1,&texture2D_view);
pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart ); //pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart );
pd3dImmediateContext->DrawIndexed( ((height-1)*(width-1)*3*2), 0, ( UINT )pSubset->VertexStart );
} }
SAFE_RELEASE(pd3dImmediateContext); SAFE_RELEASE(pd3dImmediateContext);
@@ -246,7 +265,7 @@ public:
//unsigned int indices[] = {0,1,2, 1,3,2}; //unsigned int indices[] = {0,1,2, 1,3,2};
unsigned int* indices = new unsigned int[width*3*2+2 + height*width*3*2]; unsigned int* indices = new unsigned int[(height-1)*(width-1)*3*2];
for(int y = 0; y < height-1; y++) for(int y = 0; y < height-1; y++)
{ {
@@ -265,7 +284,8 @@ public:
} }
} }
bufferDesc.ByteWidth = sizeof(unsigned int)*(width*3*2+2 + height*width*3*2);
bufferDesc.ByteWidth = sizeof(unsigned int)*((height-1)*(width-1)*3*2);
bufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; bufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
InitData.pSysMem = indices; InitData.pSysMem = indices;

View File

@@ -32,18 +32,15 @@ class btDX11SIMDAwareSoftBodySolver;
#include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btSoftBodySolvers.h"
#include "BulletSoftBody/btDefaultSoftBodySolver.h" #include "BulletSoftBody/btDefaultSoftBodySolver.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h"
//#include "BulletSoftBody/Solvers/CPU/btAcceleratedSoftBody_CPUVertexSolver.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h"
//#include "BulletSoftBody/Solvers/DX11/btAcceleratedSoftBody_DX11SIMDAwareSolver.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h"
//#include "BulletSoftBody/btAcceleratedSoftBody_DXVertexBuffers.h"
#include "BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h" #include "BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h"
//#define USE_SIMDAWARE_SOLVER #define USE_SIMDAWARE_SOLVER
#define USE_GPU_SOLVER //#define USE_GPU_SOLVER
//#define USE_VERTEX_SOLVER
#define USE_GPU_COPY #define USE_GPU_COPY
const int numFlags = 2; const int numFlags = 5;
const int clothWidth = 40; const int clothWidth = 40;
const int clothHeight = 60;//60; const int clothHeight = 60;//60;
float _windAngle = 1.0;//0.4; float _windAngle = 1.0;//0.4;
@@ -206,6 +203,7 @@ btSoftRigidDynamicsWorld* m_dynamicsWorld;
btDefaultSoftBodySolver *g_defaultSolver = NULL; btDefaultSoftBodySolver *g_defaultSolver = NULL;
btCPUSoftBodySolver *g_cpuSolver = NULL; btCPUSoftBodySolver *g_cpuSolver = NULL;
btDX11SoftBodySolver *g_dx11Solver = NULL; btDX11SoftBodySolver *g_dx11Solver = NULL;
btDX11SIMDAwareSoftBodySolver *g_dx11SIMDSolver = NULL;
btSoftBodySolver *g_solver = NULL; btSoftBodySolver *g_solver = NULL;
@@ -454,12 +452,17 @@ void initBullet(void)
#ifdef USE_GPU_SOLVER #ifdef USE_GPU_SOLVER
g_dx11Solver = new btDX11SoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() ); g_dx11Solver = new btDX11SoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() );
g_solver = g_dx11Solver; g_solver = g_dx11Solver;
#else
#ifdef USE_SIMDAWARE_SOLVER
g_dx11SIMDSolver = new btDX11SIMDAwareSoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() );
g_solver = g_dx11SIMDSolver;
#else #else
g_cpuSolver = new btCPUSoftBodySolver; g_cpuSolver = new btCPUSoftBodySolver;
g_solver = g_cpuSolver; g_solver = g_cpuSolver;
//g_defaultSolver = new btDefaultSoftBodySolver; //g_defaultSolver = new btDefaultSoftBodySolver;
//g_solver = g_defaultSolver; //g_solver = g_defaultSolver;
#endif #endif
#endif
@@ -1260,6 +1263,9 @@ void CALLBACK OnD3D11DestroyDevice( void* pUserContext )
delete g_cpuSolver; delete g_cpuSolver;
if( g_dx11Solver ) if( g_dx11Solver )
delete g_dx11Solver; delete g_dx11Solver;
if( g_dx11SIMDSolver )
delete g_dx11SIMDSolver;
for(int i=0; i< m_collisionShapes.size(); i++) for(int i=0; i< m_collisionShapes.size(); i++)
delete m_collisionShapes[i]; delete m_collisionShapes[i];

View File

@@ -1,3 +1,18 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
class cylinder class cylinder
{ {

View File

@@ -0,0 +1,102 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL
${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL
)
ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include )
IF (CMAKE_CL_64)
SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86_64 )
ELSE(CMAKE_CL_64)
SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86 )
ENDIF(CMAKE_CL_64)
ELSE()
INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include )
IF (CMAKE_CL_64)
SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86_64 )
ELSE(CMAKE_CL_64)
SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86 )
ENDIF(CMAKE_CL_64)
ENDIF()
IF (CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY
${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib )
ELSE(CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib )
ENDIF(CMAKE_CL_64)
IF (USE_GLUT)
LINK_LIBRARIES(
OpenGLSupport
BulletSoftBodySolvers_OpenCL_AMD
BulletSoftBodySolvers_CPU
BulletMultiThreaded
BulletSoftBody
BulletDynamics
BulletCollision
LinearMath
${GLUT_glut_LIBRARY}
${OPENGL_gl_LIBRARY}
${OPENGL_glu_LIBRARY}
${CMAK_GLEW_LIBRARY}
${CMAK_ATISTREAMSDK_LIBPATH}/OpenCL.lib
)
ADD_EXECUTABLE(AppOpenCLClothDemo_AMD
../cl_cloth_demo.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
../gl_win.cpp
../clstuff.cpp
../bmpLoader.cpp
../bmpLoader.h
../clstuff.h
../gl_win.h
)
ELSE (USE_GLUT)
ENDIF (USE_GLUT)
IF(WIN32)
IF (CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ELSE(CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ENDIF(CMAKE_CL_64)
ENDIF(WIN32)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
)
IF (UNIX)
TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_AMD pthread)
ENDIF(UNIX)

View File

@@ -0,0 +1,60 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL
${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL
)
IF (APPLE)
FIND_LIBRARY(OPENCL_LIBRARY OpenCL DOC "OpenCL lib for OSX")
FIND_PATH(OPENCL_INCLUDE_DIR OpenCL/cl.h DOC "Include for OpenCL on OSX")
ENDIF (APPLE)
IF (USE_GLUT)
LINK_LIBRARIES(
OpenGLSupport
BulletSoftBodySolvers_OpenCL_Apple
BulletSoftBodySolvers_CPU
BulletMultiThreaded
BulletSoftBody
BulletDynamics
BulletCollision
LinearMath
${OPENCL_LIBRARY}
${GLUT_glut_LIBRARY}
${OPENGL_gl_LIBRARY}
${OPENGL_glu_LIBRARY}
${CMAK_GLEW_LIBRARY}
)
ADD_EXECUTABLE(AppOpenCLClothDemo_Apple
../cl_cloth_demo.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
../gl_win.cpp
../clstuff.cpp
../bmpLoader.cpp
../bmpLoader.h
../clstuff.h
../gl_win.h
)
ELSE (USE_GLUT)
ENDIF (USE_GLUT)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Apple POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
)
IF (UNIX)
TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Apple pthread)
ENDIF(UNIX)

View File

@@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLClothDemo", "CLClothDemo.vcproj", "{A61906AF-B5DE-454E-99F6-B653C250D221}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.ActiveCfg = Debug|Win32
{A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.Build.0 = Debug|Win32
{A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.ActiveCfg = Release|Win32
{A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,233 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="CLClothDemo"
ProjectGUID="{A61906AF-B5DE-454E-99F6-B653C250D221}"
RootNamespace="CLClothDemo"
Keyword="Win32Proj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="S:\SVN\GpuClothAMD\Bullet\BulletTrunk\Glut;&quot;C:\Program Files (x86)\ATI Stream\include&quot;;..\..\..\projects\physics\Bullet\BulletTrunk\src;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkLibraryDependencies="false"
AdditionalDependencies="glew32.lib OpenCL.lib ..\..\lib\Debug\BulletDynamics.lib ..\..\lib\Debug\BulletCollision.lib ..\..\lib\Debug\LinearMath.lib ..\..\lib\Debug\BulletSoftBody.lib ..\..\lib\Debug\BulletSoftBodySolvers_CPU.lib ..\..\lib\Debug\BulletSoftBodySolvers_OpenCL.lib"
LinkIncremental="1"
AdditionalLibraryDirectories="&quot;C:\Program Files (x86)\ATI Stream\lib\x86&quot;;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\Glut;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\lib\Debug"
GenerateDebugInformation="true"
SubSystem="1"
ImportLibrary="S:\SVN\GpuClothAMD\Bullet\BulletTrunk\Demos\DX11ClothDemo\Debug\AppDX11ClothDemo.lib"
TargetMachine="0"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="S:\SVN\GpuClothAMD\Bullet\BulletTrunk\Glut;&quot;C:\Program Files (x86)\ATI Stream\include&quot;;..\..\..\projects\physics\Bullet\BulletTrunk\src;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="glew32.lib OpenCL.lib BulletSoftBody.lib BulletDynamics.lib BulletCollision.lib LinearMath.lib BulletSoftBodySolvers_CPU.lib BulletSoftBodySolvers_OpenCL.lib"
LinkIncremental="1"
AdditionalLibraryDirectories="..\Bullet\BulletTrunk\lib\Release\;&quot;C:\Program Files (x86)\ATI Stream\lib\x86&quot;;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\Glut;S:\SVN\GpuClothAMD\Bullet\BulletTrunk\lib\Release"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\bmpLoader.cpp"
>
</File>
<File
RelativePath=".\cl_cloth_demo.cpp"
>
</File>
<File
RelativePath=".\clstuff.cpp"
>
</File>
<File
RelativePath=".\gl_win.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\bmpLoader.hpp"
>
</File>
<File
RelativePath=".\btOpenCLSupport.h"
>
</File>
<File
RelativePath=".\cloth.h"
>
</File>
<File
RelativePath=".\clstuff.hpp"
>
</File>
<File
RelativePath=".\gl_win.hpp"
>
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,15 @@
IF(BUILD_MINICL_OPENCL_DEMOS)
SUBDIRS( MiniCL )
ENDIF()
IF(BUILD_AMD_OPENCL_DEMOS)
SUBDIRS(AMD)
ENDIF()
IF(BUILD_NVIDIA_OPENCL_DEMOS)
SUBDIRS(NVidia)
ENDIF()
IF(APPLE)
SUBDIRS(Apple)
ENDIF()

View File

@@ -0,0 +1,86 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL
${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL
)
ADD_DEFINITIONS(-DUSE_MINICL)
IF (WIN32)
IF (CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY
${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib )
ELSE(CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib )
ENDIF(CMAKE_CL_64)
ENDIF()
IF (USE_GLUT)
LINK_LIBRARIES(
OpenGLSupport
BulletSoftBodySolvers_OpenCL_Mini
BulletSoftBodySolvers_CPU
MiniCL
BulletMultiThreaded
BulletSoftBody
BulletDynamics
BulletCollision
LinearMath
${GLUT_glut_LIBRARY}
${OPENGL_gl_LIBRARY}
${OPENGL_glu_LIBRARY}
${CMAK_GLEW_LIBRARY}
)
ADD_EXECUTABLE(AppOpenCLClothDemo_Mini
../cl_cloth_demo.cpp
../gl_win.cpp
../clstuff.cpp
../bmpLoader.cpp
../bmpLoader.h
../clstuff.h
../gl_win.h
${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
)
ELSE (USE_GLUT)
ENDIF (USE_GLUT)
IF(WIN32)
IF (CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ELSE(CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ENDIF(CMAKE_CL_64)
ENDIF(WIN32)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
)
IF (UNIX)
TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Mini pthread)
ENDIF(UNIX)

View File

@@ -0,0 +1,102 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL
${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL
)
IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc )
IF (CMAKE_CL_64)
SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/x64 )
ELSE(CMAKE_CL_64)
SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/Win32 )
ENDIF(CMAKE_CL_64)
ELSE()
INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc )
IF (CMAKE_CL_64)
SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/x64 )
ELSE(CMAKE_CL_64)
SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/Win32 )
ENDIF(CMAKE_CL_64)
ENDIF()
IF (CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY
${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib )
ELSE(CMAKE_CL_64)
SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib )
ENDIF(CMAKE_CL_64)
IF (USE_GLUT)
LINK_LIBRARIES(
OpenGLSupport
BulletSoftBodySolvers_OpenCL_NVidia
BulletSoftBodySolvers_CPU
BulletMultiThreaded
BulletSoftBody
BulletDynamics
BulletCollision
LinearMath
${GLUT_glut_LIBRARY}
${OPENGL_gl_LIBRARY}
${OPENGL_glu_LIBRARY}
${CMAK_GLEW_LIBRARY}
${CMAK_NVSDKCOMPUTE_LIBPATH}/OpenCL.lib
)
ADD_EXECUTABLE(AppOpenCLClothDemo_NVidia
../cl_cloth_demo.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
../gl_win.cpp
../clstuff.cpp
../bmpLoader.cpp
../bmpLoader.h
../clstuff.h
../gl_win.h
)
ELSE (USE_GLUT)
ENDIF (USE_GLUT)
IF(WIN32)
IF (CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ELSE(CMAKE_CL_64)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}
)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR})
ENDIF()
ENDIF(CMAKE_CL_64)
ENDIF(WIN32)
ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR}
)
IF (UNIX)
TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_NVidia pthread)
ENDIF(UNIX)

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

View File

@@ -0,0 +1,325 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "bmpLoader.h"
#include <new>
#include <cstring>
#include <cstdio>
namespace amd
{
static const short bitMapID = 19778;
void
BitMap::releaseResources(void)
{
if (pixels_ != NULL) {
delete[] pixels_;
}
if (colors_ != NULL) {
delete[] colors_;
}
pixels_ = NULL;
colors_ = NULL;
isLoaded_ = false;
}
BitMap& BitMap::operator=(const BitMap& rhs)
{
if (this == &rhs) {
return *this;
}
// Copy header
id = rhs.id;
size = rhs.size;
reserved1 = rhs.reserved1;
reserved2 = rhs.reserved2;
offset = rhs.offset;
// Copy header info
sizeInfo = rhs.sizeInfo;
width = rhs.width;
height = rhs.height;
planes = rhs.planes;
bitsPerPixel = rhs.bitsPerPixel;
compression = rhs.compression;
imageSize = rhs.imageSize;
xPelsPerMeter = rhs.xPelsPerMeter;
yPelsPerMeter = rhs.yPelsPerMeter;
clrUsed = rhs.clrUsed;
clrImportant = rhs.clrImportant;
numColors_ = rhs.numColors_;
isLoaded_ = rhs.isLoaded_;
pixels_ = NULL;
colors_ = NULL;
if (isLoaded_) {
if (rhs.colors_ != NULL) {
colors_ = new ColorPalette[numColors_];
if (colors_ == NULL) {
isLoaded_ = false;
return *this;
}
memcpy(colors_, rhs.colors_, numColors_ * sizeof(ColorPalette));
}
pixels_ = new uchar4[width * height];
if (pixels_ == NULL) {
delete[] colors_;
colors_ = NULL;
isLoaded_ = false;
return *this;
}
memcpy(pixels_, rhs.pixels_, width * height * sizeof(uchar4));
}
return *this;
}
void
BitMap::load(const char * filename)
{
// Release any existing resources
releaseResources();
// Open BMP file
FILE * fd = fopen(filename, "rb");
// Opened OK
if (fd != NULL) {
// Read header
fread((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd);
// Failed to read header
if (ferror(fd)) {
fclose(fd);
return;
}
// Confirm that we have a bitmap file
if (id != bitMapID) {
fclose(fd);
return;
}
// Read map info header
fread((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd);
// Failed to read map info header
if (ferror(fd)) {
fclose(fd);
return;
}
// No support for compressed images
if (compression) {
fclose(fd);
return;
}
// Support only 8 or 24 bits images
if (bitsPerPixel < 8) {
fclose(fd);
return;
}
// Store number of colors
numColors_ = 1 << bitsPerPixel;
//load the palate for 8 bits per pixel
if(bitsPerPixel == 8) {
colors_ = new ColorPalette[numColors_];
if (colors_ == NULL) {
fclose(fd);
return;
}
fread(
(char *)colors_,
numColors_ * sizeof(ColorPalette),
1,
fd);
// Failed to read colors
if (ferror(fd)) {
fclose(fd);
return;
}
}
// Allocate buffer to hold all pixels
unsigned int sizeBuffer = size - offset;
unsigned char * tmpPixels = new unsigned char[sizeBuffer];
if (tmpPixels == NULL) {
delete colors_;
colors_ = NULL;
fclose(fd);
return;
}
// Read pixels from file, including any padding
fread(tmpPixels, sizeBuffer * sizeof(unsigned char), 1, fd);
// Failed to read pixel data
if (ferror(fd)) {
delete colors_;
colors_ = NULL;
delete tmpPixels;
fclose(fd);
return;
}
// Allocate image
pixels_ = new uchar4[width * height];
if (pixels_ == NULL) {
delete colors_;
colors_ = NULL;
delete tmpPixels;
fclose(fd);
return;
}
// Set image, including w component (white)
memset(pixels_, 0xff, width * height * sizeof(uchar4));
unsigned int index = 0;
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
// Read RGB values
if (bitsPerPixel == 8) {
pixels_[(y * width + x)] = colors_[tmpPixels[index++]];
}
else { // 24 bit
pixels_[(y * width + x)].z = tmpPixels[index++];
pixels_[(y * width + x)].y = tmpPixels[index++];
pixels_[(y * width + x)].x = tmpPixels[index++];
}
}
// Handle padding
for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) {
index++;
}
}
// Loaded file so we can close the file.
fclose(fd);
delete[] tmpPixels;
// Loaded file so record this fact
isLoaded_ = true;
}
}
int
BitMap::colorIndex(uchar4 color)
{
for (int i = 0; i < numColors_; i++) {
if (colors_[i].x == color.x &&
colors_[i].y == color.y &&
colors_[i].z == color.z &&
colors_[i].w == color.w) {
return i;
}
}
return 0;
}
bool
BitMap::write(const char * filename)
{
if (!isLoaded_) {
return false;
}
// Open BMP file
FILE * fd = fopen(filename, "wb");
// Opened OK
if (fd != NULL) {
// Write header
fwrite((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd);
// Failed to write header
if (ferror(fd)) {
fclose(fd);
return false;
}
// Write map info header
fwrite((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd);
// Failed to write map info header
if (ferror(fd)) {
fclose(fd);
return false;
}
// Write palate for 8 bits per pixel
if(bitsPerPixel == 8) {
fwrite(
(char *)colors_,
numColors_ * sizeof(ColorPalette),
1,
fd);
// Failed to write colors
if (ferror(fd)) {
fclose(fd);
return false;
}
}
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
// Read RGB values
if (bitsPerPixel == 8) {
fputc(
colorIndex(
pixels_[(y * width + x)]),
fd);
}
else { // 24 bit
fputc(pixels_[(y * width + x)].z, fd);
fputc(pixels_[(y * width + x)].y, fd);
fputc(pixels_[(y * width + x)].x, fd);
if (ferror(fd)) {
fclose(fd);
return false;
}
}
}
// Add padding
for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) {
fputc(0, fd);
}
}
return true;
}
return false;
}
} // amd

View File

@@ -0,0 +1,201 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2010 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BMPLOADER_H_
#define BMPLOADER_H_
#include <cstdlib>
#include <iostream>
namespace amd
{
//! @fixme this needs to be moved to common types header?
#pragma pack(1)
typedef struct
{
unsigned char x;
unsigned char y;
unsigned char z;
unsigned char w;
} uchar4;
typedef uchar4 ColorPalette;
//! \struct Bitmap header info
typedef struct {
short id;
int size;
short reserved1;
short reserved2;
int offset;
} BitMapHeader;
//! \struct Bitmap info header
typedef struct {
int sizeInfo;
int width;
int height;
short planes;
short bitsPerPixel;
unsigned compression;
unsigned imageSize;
int xPelsPerMeter;
int yPelsPerMeter;
int clrUsed;
int clrImportant;
} BitMapInfoHeader;
//! \class Bitmap used to load a bitmap image from a file.
class BitMap : public BitMapHeader, public BitMapInfoHeader
{
private:
uchar4 * pixels_;
int numColors_;
ColorPalette * colors_;
bool isLoaded_;
void releaseResources(void);
int colorIndex(uchar4 color);
public:
//! \brief Default constructor
BitMap()
: pixels_(NULL),
numColors_(0),
colors_(NULL),
isLoaded_(false)
{}
/*!\brief Constructor
*
* Tries to load bitmap image from filename provided.
*
* \param filename pointer to null terminated string that is the path and
* filename to the bitmap image to be loaded.
*
* In the base of an error, e.g. the bitmap file could not be loaded for
* some reason, then a following call to isLoaded will return false.
*/
BitMap(const char * filename)
: pixels_(NULL),
numColors_(0),
colors_(NULL),
isLoaded_(false)
{
load(filename);
}
/*! \brief Copy constructor
*
* \param rhs is the bitmap to be copied (cloned).
*/
BitMap(const BitMap& rhs)
{
*this = rhs;
}
//! \brief Destructor
~BitMap()
{
releaseResources();
}
/*! \brief Assignment
* \param rhs is the bitmap to be assigned (cloned).
*/
BitMap& operator=(const BitMap& rhs);
/*! \brief Load Bitmap image
*
* \param filename is a pointer to a null terminated string that is the
* path and filename name to the the bitmap file to be loaded.
*
* In the base of an error, e.g. the bitmap file could not be loaded for
* some reason, then a following call to isLoaded will return false.
*/
void
load(const char * filename);
/*! \brief Write Bitmap image
*
* \param filename is a pointer to a null terminated string that is the
* path and filename name to the the bitmap file to be written.
*
* \return In the case that the bitmap is written true is returned. In
* the case that a bitmap image is not already loaded or the write fails
* for some reason false is returned.
*/
bool
write(const char * filename);
/*! \brief Get image width
*
* \return If a bitmap image has been successfully loaded, then the width
* image is returned, otherwise -1;
*/
int
getWidth(void) const
{
if (isLoaded_) {
return width;
}
else {
return -1;
}
}
/*! \brief Get image height
*
* \return If a bitmap image has been successfully loaded, then the height
* image is returned, otherwise -1.
*/
int
getHeight(void) const
{
if (isLoaded_) {
return height;
}
else {
return -1;
}
}
/*! \brief Get image width
*
* \return If a bitmap image has been successfully loaded, then returns
* a pointer to image's pixels, otherwise NULL.
*/
const uchar4 *
getPixels(void) const { return pixels_; }
/*! \brief Is an image currently loaded
*
* \return If a bitmap image has been successfully loaded, then returns
* true, otherwise if an image could not be loaded or an image has yet
* to be loaded false is returned.
*/
bool
isLoaded(void) const { return isLoaded_; }
};
#pragma pack()
}
#endif // BMPLOADER_H_

View File

@@ -0,0 +1,189 @@
//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef BMPLOADER_H_
#define BMPLOADER_H_
#include <cstdlib>
#include <iostream>
namespace amd
{
//! @fixme this needs to be moved to common types header?
#pragma pack(1)
typedef struct
{
unsigned char x;
unsigned char y;
unsigned char z;
unsigned char w;
} uchar4;
typedef uchar4 ColorPalette;
//! \struct Bitmap header info
typedef struct {
short id;
int size;
short reserved1;
short reserved2;
int offset;
} BitMapHeader;
//! \struct Bitmap info header
typedef struct {
int sizeInfo;
int width;
int height;
short planes;
short bitsPerPixel;
unsigned compression;
unsigned imageSize;
int xPelsPerMeter;
int yPelsPerMeter;
int clrUsed;
int clrImportant;
} BitMapInfoHeader;
//! \class Bitmap used to load a bitmap image from a file.
class BitMap : public BitMapHeader, public BitMapInfoHeader
{
private:
uchar4 * pixels_;
int numColors_;
ColorPalette * colors_;
bool isLoaded_;
void releaseResources(void);
int colorIndex(uchar4 color);
public:
//! \brief Default constructor
BitMap()
: pixels_(NULL),
numColors_(0),
colors_(NULL),
isLoaded_(false)
{}
/*!\brief Constructor
*
* Tries to load bitmap image from filename provided.
*
* \param filename pointer to null terminated string that is the path and
* filename to the bitmap image to be loaded.
*
* In the base of an error, e.g. the bitmap file could not be loaded for
* some reason, then a following call to isLoaded will return false.
*/
BitMap(const char * filename)
: pixels_(NULL),
numColors_(0),
colors_(NULL),
isLoaded_(false)
{
load(filename);
}
/*! \brief Copy constructor
*
* \param rhs is the bitmap to be copied (cloned).
*/
BitMap(const BitMap& rhs)
{
*this = rhs;
}
//! \brief Destructor
~BitMap()
{
releaseResources();
}
/*! \brief Assignment
* \param rhs is the bitmap to be assigned (cloned).
*/
BitMap& operator=(const BitMap& rhs);
/*! \brief Load Bitmap image
*
* \param filename is a pointer to a null terminated string that is the
* path and filename name to the the bitmap file to be loaded.
*
* In the base of an error, e.g. the bitmap file could not be loaded for
* some reason, then a following call to isLoaded will return false.
*/
void
load(const char * filename);
/*! \brief Write Bitmap image
*
* \param filename is a pointer to a null terminated string that is the
* path and filename name to the the bitmap file to be written.
*
* \return In the case that the bitmap is written true is returned. In
* the case that a bitmap image is not already loaded or the write fails
* for some reason false is returned.
*/
bool
write(const char * filename);
/*! \brief Get image width
*
* \return If a bitmap image has been successfully loaded, then the width
* image is returned, otherwise -1;
*/
int
getWidth(void) const
{
if (isLoaded_) {
return width;
}
else {
return -1;
}
}
/*! \brief Get image height
*
* \return If a bitmap image has been successfully loaded, then the height
* image is returned, otherwise -1.
*/
int
getHeight(void) const
{
if (isLoaded_) {
return height;
}
else {
return -1;
}
}
/*! \brief Get image width
*
* \return If a bitmap image has been successfully loaded, then returns
* a pointer to image's pixels, otherwise NULL.
*/
const uchar4 *
getPixels(void) const { return pixels_; }
/*! \brief Is an image currently loaded
*
* \return If a bitmap image has been successfully loaded, then returns
* true, otherwise if an image could not be loaded or an image has yet
* to be loaded false is returned.
*/
bool
isLoaded(void) const { return isLoaded_; }
};
#pragma pack()
}
#endif // BMPLOADER_H_

View File

@@ -0,0 +1,84 @@
#ifndef BT_OPENCL_SUPPORT_HPP
#define BT_OPENCL_SUPPORT_HPP
// OpenCL support
#include <CL/cl.hpp>
namespace BTAcceleratedSoftBody
{
class OpenCLSupportHelper
{
private:
cl::Context m_context;
std::vector<cl::Device> m_devices;
cl::CommandQueue m_queue;
public:
OpenCLSupportHelper()
{
}
virtual ~OpenCLSupportHelper()
{
}
cl::Device getDevice()
{
return m_devices[0];
}
cl::CommandQueue getCommandQueue()
{
return m_queue;
}
cl::Context getContext()
{
return m_context;
}
bool InitOpenCLDevice()
{
cl_int err;
std::vector<cl::Platform> platforms;
err = cl::Platform::get(&platforms);
checkErr(platforms.size() != 0 ? CL_SUCCESS : -1, "Platform::get()");
std::string platformVendor;
platforms[0].getInfo(CL_PLATFORM_VENDOR, &platformVendor);
//std::cout << "Platform is by: " << platformVendor << "\n";
intptr_t properties[] = {
CL_CONTEXT_PLATFORM, (intptr_t)platforms[0](),
0, 0
};
m_context = cl::Context(
CL_DEVICE_TYPE_GPU,
properties,
NULL,
NULL,
&err);
if (err != CL_SUCCESS)
{
btAssert( "Context::Context()" );
}
m_devices = m_context.getInfo<CL_CONTEXT_DEVICES>();
if( m_devices.size() <= 0 )
{
btAssert( "devices.size() > 0" );
}
m_queue = cl::CommandQueue(m_context, m_devices[0], 0, &err);
if (err != CL_SUCCESS)
{
btAssert( "CommandQueue::CommandQueue()");
}
}
};
} // namespace BTAcceleratedSoftBody
#endif // #ifndef BT_OPENCL_SUPPORT_HPP

View File

@@ -0,0 +1,470 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2008 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifdef _WIN32
#include <GL/glew.h>
#endif
#include "clstuff.h"
#include "gl_win.h"
#include "cloth.h"
#define USE_GPU_SOLVER
const int numFlags = 5;
const int clothWidth = 40;
const int clothHeight = 60;//60;
float _windAngle = 1.0;//0.4;
float _windStrength = 15;
#include <iostream>
using namespace std;
#include "btBulletDynamicsCommon.h"
#include "LinearMath/btHashMap.h"
#include "BulletSoftBody/btSoftRigidDynamicsWorld.h"
#include "vectormath/vmInclude.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h"
#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h"
using Vectormath::Aos::Vector3;
class piece_of_cloth;
class btBroadphaseInterface;
class btCollisionShape;
class btOverlappingPairCache;
class btCollisionDispatcher;
class btConstraintSolver;
struct btCollisionAlgorithmCreateFunc;
class btDefaultCollisionConfiguration;
namespace Vectormath
{
namespace Aos
{
class Transform3;
}
}
btAlignedObjectArray<btCollisionShape*> m_collisionShapes;
btBroadphaseInterface* m_broadphase;
btCollisionDispatcher* m_dispatcher;
btConstraintSolver* m_solver;
btDefaultCollisionConfiguration* m_collisionConfiguration;
btCPUSoftBodySolver *g_cpuSolver = NULL;
btOpenCLSoftBodySolver *g_openCLSolver = NULL;
btSoftBodySolver *g_solver = NULL;
btAlignedObjectArray<btSoftBody *> m_flags;
btSoftRigidDynamicsWorld* m_dynamicsWorld;
btAlignedObjectArray<piece_of_cloth> cloths;
extern cl_context g_cxMainContext;
extern cl_device_id g_cdDevice;
extern cl_command_queue g_cqCommandQue;
const float flagSpacing = 30.f;
// Helper to test and add links correctly.
// Records links that have already been generated
static bool testAndAddLink( btAlignedObjectArray<int> &trianglesForLinks, btSoftBody *softBody, int triangle, int *triangleVertexIndexArray, int numVertices, int vertex0, int vertex1, int nonLinkVertex, btSoftBody::Material *structuralMaterial, bool createBendLinks, btSoftBody::Material *bendMaterial )
{
if( trianglesForLinks[ numVertices * vertex0 + vertex1 ] >= 0 && createBendLinks)
{
// Already have link so find other triangle and generate cross link
int otherTriangle = trianglesForLinks[numVertices * vertex0 + vertex1];
int otherIndices[3] = {triangleVertexIndexArray[otherTriangle * 3], triangleVertexIndexArray[otherTriangle * 3 + 1], triangleVertexIndexArray[otherTriangle * 3 + 2]};
int nodeA;
// Test all links of the other triangle against this link. The one that's not part of it is what we want.
if( otherIndices[0] != vertex0 && otherIndices[0] != vertex1 )
nodeA = otherIndices[0];
if( otherIndices[1] != vertex0 && otherIndices[1] != vertex1 )
nodeA = otherIndices[1];
if( otherIndices[2] != vertex0 && otherIndices[2] != vertex1 )
nodeA = otherIndices[2];
softBody->appendLink( nodeA, nonLinkVertex, bendMaterial );
} else {
// Don't yet have link so create it
softBody->appendLink( vertex0, vertex1, structuralMaterial );
// If we added a new link, set the triangle array
trianglesForLinks[numVertices * vertex0 + vertex1] = triangle;
trianglesForLinks[numVertices * vertex1 + vertex0] = triangle;
}
return true;
}
btSoftBody *createFromIndexedMesh( btVector3 *vertexArray, int numVertices, int *triangleVertexIndexArray, int numTriangles, bool createBendLinks )
{
btSoftBody* softBody = new btSoftBody(&(m_dynamicsWorld->getWorldInfo()), numVertices, vertexArray, 0);
btSoftBody::Material * structuralMaterial = softBody->appendMaterial();
btSoftBody::Material * bendMaterial;
if( createBendLinks )
{
bendMaterial = softBody->appendMaterial();
bendMaterial->m_kLST = 0.7;
} else {
bendMaterial = NULL;
}
structuralMaterial->m_kLST = 1.0;
// List of values for each link saying which triangle is associated with that link
// -1 to start. Once a value is entered we know the "other" triangle
// and can add a link across the link
btAlignedObjectArray<int> triangleForLinks;
triangleForLinks.resize( numVertices * numVertices, -1 );
int numLinks = 0;
for( int triangle = 0; triangle < numTriangles; ++triangle )
{
int index[3] = {triangleVertexIndexArray[triangle * 3], triangleVertexIndexArray[triangle * 3 + 1], triangleVertexIndexArray[triangle * 3 + 2]};
softBody->appendFace( index[0], index[1], index[2] );
// Generate the structural links directly from the triangles
testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[0], index[1], index[2], structuralMaterial, createBendLinks, bendMaterial );
testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[1], index[2], index[0], structuralMaterial, createBendLinks, bendMaterial );
testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[2], index[0], index[1], structuralMaterial, createBendLinks, bendMaterial);
}
return softBody;
}
/**
* Create a sequence of flag objects and add them to the world.
*/
void createFlag( btSoftBodySolver &solver, int width, int height, btAlignedObjectArray<btSoftBody *> &flags )
{
// First create a triangle mesh to represent a flag
using Vectormath::Aos::Matrix3;
using Vectormath::Aos::Vector3;
// Allocate a simple mesh consisting of a vertex array and a triangle index array
btIndexedMesh mesh;
mesh.m_numVertices = width*height;
mesh.m_numTriangles = 2*(width-1)*(height-1);
btVector3 *vertexArray = new btVector3[mesh.m_numVertices];
mesh.m_vertexBase = reinterpret_cast<const unsigned char*>(vertexArray);
int *triangleVertexIndexArray = new int[3*mesh.m_numTriangles];
mesh.m_triangleIndexBase = reinterpret_cast<const unsigned char*>(triangleVertexIndexArray);
mesh.m_triangleIndexStride = sizeof(int)*3;
mesh.m_vertexStride = sizeof(Vector3);
// Generate normalised object space vertex coordinates for a rectangular flag
float zCoordinate = 0.0f;
Matrix3 defaultScale(Vector3(5.f, 0.f, 0.f), Vector3(0.f, 20.f, 0.f), Vector3(0.f, 0.f, 1.f));
for( int y = 0; y < height; ++y )
{
float yCoordinate = y*2.0f/float(height) - 1.0f;
for( int x = 0; x < width; ++x )
{
float xCoordinate = x*2.0f/float(width) - 1.0f;
Vector3 vertex(xCoordinate, yCoordinate, zCoordinate);
Vector3 transformedVertex = defaultScale*vertex;
vertexArray[y*width + x] = btVector3(transformedVertex.getX(), transformedVertex.getY(), transformedVertex.getZ() );
}
}
// Generate vertex indices for triangles
for( int y = 0; y < (height-1); ++y )
{
for( int x = 0; x < (width-1); ++x )
{
// Triangle 0
// Top left of square on mesh
{
int vertex0 = y*width + x;
int vertex1 = vertex0 + 1;
int vertex2 = vertex0 + width;
int triangleIndex = 2*y*(width-1) + 2*x;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+1)/sizeof(int)+1] = vertex1;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+2)/sizeof(int)+2] = vertex2;
}
// Triangle 1
// Bottom right of square on mesh
{
int vertex0 = y*width + x + 1;
int vertex1 = vertex0 + width;
int vertex2 = vertex1 - 1;
int triangleIndex = 2*y*(width-1) + 2*x + 1;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+1] = vertex1;
triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+2] = vertex2;
}
}
}
float rotateAngleRoundZ = 0.5;
float rotateAngleRoundX = 0.5;
btMatrix3x3 defaultRotate;
defaultRotate[0] = btVector3(cos(rotateAngleRoundZ), sin(rotateAngleRoundZ), 0.f);
defaultRotate[1] = btVector3(-sin(rotateAngleRoundZ), cos(rotateAngleRoundZ), 0.f);
defaultRotate[2] = btVector3(0.f, 0.f, 1.f);
btMatrix3x3 defaultRotateX;
defaultRotateX[0] = btVector3(1.f, 0.f, 0.f);
defaultRotateX[1] = btVector3( 0.f, cos(rotateAngleRoundX), sin(rotateAngleRoundX));
defaultRotateX[2] = btVector3(0.f, -sin(rotateAngleRoundX), cos(rotateAngleRoundX));
btMatrix3x3 defaultRotateAndScale( (defaultRotateX*defaultRotate) );
// Construct the sequence flags applying a slightly different translation to each one to arrange them
// appropriately in the scene.
for( int i = 0; i < numFlags; ++i )
{
float zTranslate = flagSpacing * (i-numFlags/2);
btVector3 defaultTranslate(0.f, 20.f, zTranslate);
btTransform transform( defaultRotateAndScale, defaultTranslate );
btSoftBody *softBody = createFromIndexedMesh( vertexArray, mesh.m_numVertices, triangleVertexIndexArray, mesh.m_numTriangles, true );
for( int i = 0; i < mesh.m_numVertices; ++i )
{
softBody->setMass(i, 10.f/mesh.m_numVertices);
}
softBody->setMass((height-1)*(width), 0.f);
softBody->setMass((height-1)*(width) + width - 1, 0.f);
softBody->setMass((height-1)*width + width/2, 0.f);
softBody->m_cfg.collisions = btSoftBody::fCollision::CL_SS+btSoftBody::fCollision::CL_RS;
flags.push_back( softBody );
softBody->transform( transform );
m_dynamicsWorld->addSoftBody( softBody );
}
delete [] vertexArray;
delete [] triangleVertexIndexArray;
}
void updatePhysicsWorld()
{
static int counter = 0;
// Change wind velocity a bit based on a frame counter
if( (counter % 400) == 0 )
{
_windAngle = (_windAngle + 0.05f);
if( _windAngle > (2*3.141) )
_windAngle = 0;
for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex )
{
btSoftBody *cloth = 0;
cloth = m_flags[flagIndex];
float localWind = _windAngle + 0.5*(((float(rand())/RAND_MAX))-0.1);
float xCoordinate = cos(localWind)*_windStrength;
float zCoordinate = sin(localWind)*_windStrength;
cloth->setWindVelocity( btVector3(xCoordinate, 0, zCoordinate) );
}
}
//btVector3 origin( capCollider->getWorldTransform().getOrigin() );
//origin.setX( origin.getX() + 0.05 );
//capCollider->getWorldTransform().setOrigin( origin );
counter++;
}
void initBullet(void)
{
#ifdef USE_GPU_SOLVER
g_openCLSolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext);
g_solver = g_openCLSolver;
#else
g_cpuSolver = new btCPUSoftBodySolver;
g_solver = g_cpuSolver;
#endif
m_collisionConfiguration = new btDefaultCollisionConfiguration();
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
m_broadphase = new btDbvtBroadphase();
btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver;
m_solver = sol;
m_dynamicsWorld = new btSoftRigidDynamicsWorld(m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration, g_solver);
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
m_collisionShapes.push_back(groundShape);
btTransform groundTransform;
groundTransform.setIdentity();
groundTransform.setOrigin(btVector3(0,-50,0));
m_dynamicsWorld->getWorldInfo().air_density = (btScalar)1.2;
m_dynamicsWorld->getWorldInfo().water_density = 0;
m_dynamicsWorld->getWorldInfo().water_offset = 0;
m_dynamicsWorld->getWorldInfo().water_normal = btVector3(0,0,0);
m_dynamicsWorld->getWorldInfo().m_gravity.setValue(0,-10,0);
#if 0
{
btScalar mass(0.);
//rigidbody is dynamic if and only if mass is non zero, otherwise static
bool isDynamic = (mass != 0.f);
btVector3 localInertia(0,0,0);
if (isDynamic)
groundShape->calculateLocalInertia(mass,localInertia);
//using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects
btDefaultMotionState* myMotionState = new btDefaultMotionState(groundTransform);
btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,groundShape,localInertia);
btRigidBody* body = new btRigidBody(rbInfo);
//add the body to the dynamics world
m_dynamicsWorld->addRigidBody(body);
}
#endif
#ifdef USE_GPU_SOLVER
createFlag( *g_openCLSolver, clothWidth, clothHeight, m_flags );
#else
createFlag( *g_cpuSolver, clothWidth, clothHeight, m_flags );
#endif
// Create output buffer descriptions for ecah flag
// These describe where the simulation should send output data to
for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex )
{
// m_flags[flagIndex]->setWindVelocity( Vectormath::Aos::Vector3( 0.f, 0.f, 15.f ) );
// In this case we have a DX11 output buffer with a vertex at index 0, 8, 16 and so on as well as a normal at 3, 11, 19 etc.
// Copies will be performed GPU-side directly into the output buffer
btCPUVertexBufferDescriptor *vertexBufferDescriptor = new btCPUVertexBufferDescriptor(reinterpret_cast< float* >(cloths[flagIndex].cpu_buffer), 0, 8, 3, 8);
cloths[flagIndex].m_vertexBufferDescriptor = vertexBufferDescriptor;
}
g_solver->optimize( m_dynamicsWorld->getSoftBodyArray() );
}
btClock m_clock;
void doFlags()
{
//float ms = getDeltaTimeMicroseconds();
btScalar dt = (btScalar)m_clock.getTimeMicroseconds();
m_clock.reset();
///step the simulation
if( m_dynamicsWorld )
{
m_dynamicsWorld->stepSimulation(dt/1000000.);
static int frameCount = 0;
frameCount++;
if (frameCount==100)
{
m_dynamicsWorld->stepSimulation(1./60.,0);
CProfileManager::dumpAll();
}
updatePhysicsWorld();
}
for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex )
{
g_solver->copySoftBodyToVertexBuffer( m_flags[flagIndex], cloths[flagIndex].m_vertexBufferDescriptor );
cloths[flagIndex].draw();
}
}
int main(int argc, char *argv[])
{
initCL();
cloths.resize(numFlags);
for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex )
{
cloths[flagIndex].create_buffers(clothWidth, clothHeight);
}
initBullet();
m_dynamicsWorld->stepSimulation(1./60.,0);
preInitGL(argc, argv);
std::string flagTexs[] = {
"atiFlag.bmp",
"amdFlag.bmp",
};
int numFlagTexs = 2;
for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex )
{
cloths[flagIndex].create_texture(flagTexs[flagIndex % numFlagTexs]);
cloths[flagIndex].x_offset = 0;
cloths[flagIndex].y_offset = 0;
cloths[flagIndex].z_offset = 0;
}
goGL();
return 0;
}

View File

@@ -0,0 +1,183 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2008 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "gl_win.h" //for OpenGL stuff
#include "bmpLoader.h"
#include <string>
#include "LinearMath/btScalar.h"
struct vertex_struct
{
float pos[3];
float normal[3];
float texcoord[2];
};
class btVertexBufferDescriptor;
class piece_of_cloth
{
public:
void destroy(void)
{
if(created)
{
if(cpu_buffer) delete [] cpu_buffer;
}
}
piece_of_cloth()
{
created = false;
cpu_buffer = NULL;
m_vertexBufferDescriptor = NULL;
}
bool created;
vertex_struct* cpu_buffer;
unsigned int* indices;
btVertexBufferDescriptor *m_vertexBufferDescriptor;
double x_offset, y_offset, z_offset;
int width;
int height;
GLuint texture;
void draw(void)
{
glEnable(GL_TEXTURE_2D);
glBindTexture (GL_TEXTURE_2D, texture);
glEnable(GL_DEPTH_TEST);
glColor3f(0.0f, 1.0f, 1.0f);
glEnableClientState(GL_VERTEX_ARRAY);
//glEnableClientState(GL_NORMAL_ARRAY);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
glBindTexture(GL_TEXTURE_2D, texture);
glVertexPointer( 3, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].pos[0])) );
//glNormalPointer( 3, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].normal[0])) );
glTexCoordPointer( 2, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].texcoord[0])) );
glDrawElements(GL_TRIANGLES, (height-1 )*(width-1)*3*2, GL_UNSIGNED_INT, indices);
// glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glBindTexture(GL_TEXTURE_2D, 0);
}
void create_texture(std::string filename)
{
amd::BitMap texBMP(filename.c_str());
if ( texBMP.isLoaded() ) {
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &texture);
glBindTexture(GL_TEXTURE_2D, texture);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL);
glTexImage2D(
GL_TEXTURE_2D,
0,
GL_RGBA8,
texBMP.getWidth(),
texBMP.getHeight(),
0,
GL_RGBA,
GL_UNSIGNED_BYTE,
texBMP.getPixels());
glBindTexture(GL_TEXTURE_2D, 0);
}
else {
std::cout << "ERROR: could not load bitmap " << "texture.bmp" << std::endl;
exit(1);
}
}
void create_buffers(int width_, int height_)
{
width = width_;
height = height_;
created = true;
cpu_buffer = new vertex_struct[width*height];
memset(cpu_buffer, 0, width*height*sizeof(vertex_struct));
// Initial test data for rendering
for(int y = 0; y < height; y++)
{
for(int x = 0; x < width; x++)
{
double coord = btSin(x/5.0)*0.01;
//coord = sin(y/);
cpu_buffer[y*width+x].pos[0] = (x/((float)(width-1)))*1;
cpu_buffer[y*width+x].pos[1] = coord;
cpu_buffer[y*width+x].pos[2] = (y/((float)(height-1)))*1;
cpu_buffer[y*width+x].normal[0] = 1;
cpu_buffer[y*width+x].normal[1] = 0;
cpu_buffer[y*width+x].normal[2] = 0;
cpu_buffer[y*width+x].texcoord[0] = x/((float)(width-1));
cpu_buffer[y*width+x].texcoord[1] = y/((float)(height-1));
}
}
// Generate and fill index array for rendering
indices = new unsigned int[width*3*2+2 + height*width*3*2];
for(int y = 0; y < height-1; y++)
{
for(int x = 0; x < width-1; x++)
{
// *3 indices/triangle, *2 triangles/quad
int baseIndex = (x + y*(width-1))*3*2;
indices[baseIndex] = x + y*width;
indices[baseIndex+1] = x+1 + y*width;
indices[baseIndex+2] = x+width + y*width;
indices[baseIndex+3] = x + 1 + y*width;
indices[baseIndex+4] = x+(width+1) + y*width;
indices[baseIndex+5] = x+width + y*width;
}
}
}
};

View File

@@ -0,0 +1,53 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2008 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "clstuff.h"
#include "gl_win.h"
#include "btOclCommon.h"
#include "btOclUtils.h"
#include "LinearMath/btScalar.h"
cl_context g_cxMainContext;
cl_device_id g_cdDevice;
cl_command_queue g_cqCommandQue;
void initCL(void)
{
int ciErrNum = 0;
//g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
//g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum);
//g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum);
//try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel
//#ifdef USE_MINICL
// g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum);
//#else
g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
//#endif
oclCHECKERROR(ciErrNum, CL_SUCCESS);
g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext);
btOclPrintDevInfo(g_cdDevice);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}

View File

@@ -0,0 +1,10 @@
#ifndef __CLSTUFF_HDR__
#define __CLSTUFF_HDR__
void initCL(void);
#endif //__CLSTUFF_HDR__

View File

@@ -0,0 +1,10 @@
#ifndef __CLSTUFF_HDR__
#define __CLSTUFF_HDR__
void initCL(void);
#endif //__CLSTUFF_HDR__

View File

@@ -0,0 +1,7 @@
uniform sampler2D tex;
void main()
{
vec4 color = texture2D(tex,gl_TexCoord[0].st);
gl_FragColor = color;
}

View File

@@ -0,0 +1,272 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2008 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "clstuff.h"
#include "gl_win.h"
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <iterator>
#include <math.h>
#include <cmath>
#include <cstring>
//#ifndef _WIN32 && !defined(__APPLE__)
//#include <GL/glx.h>
//#endif //!_WIN32
static GLuint vbo = 0;
#ifdef _WIN32
#include <windows.h>
#endif
static unsigned int windowWidth = 1280;
static unsigned int windowHeight = 1024;
// mouse controls
int mouseOldX;
int mouseOldY;
int mouseButtons = 0;
float rotateX;
float rotateY;
float translateZ;
float translateX;
float translateY;
static GLuint glProgram;
void doFlags();
void render( void)
{
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
// glDisable ( GL_CULL_FACE );
glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
glTranslatef( translateX, translateY, translateZ );
glRotatef( rotateX, 0.5f , 0.0f, 0.0f );
glRotatef( rotateY, 0.0f, 0.5f, 0.0f );
// glDisable (GL_BLEND);
doFlags();
// TODO:
//glBindBuffer(GL_ARRAY_BUFFER, vbo);
//glVertexPointer(4, GL_FLOAT, 0, NULL);
//glEnableClientState(GL_VERTEX_ARRAY);
//glDrawArrays(GL_POINTS, 0, 4*4);
// glDisableClientState(GL_VERTEX_ARRAY);
// glBindBuffer(GL_ARRAY_BUFFER, 0);
// glUseProgram(0);
}
static void initGL(void)
{
//glClearColor( 0.05f, 0.0f, 0.1f, 0.1f );
glClearColor( 0.0f, 0.45f, 0.45f, 1.f);
#if 0
GLfloat mat_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f };
GLfloat mat_shininess[] = { 50.0f };
GLfloat light_position[] = {
-10.f,
5.f,
-1.f,
1.0f };
glEnable ( GL_COLOR_MATERIAL );
glShadeModel( GL_SMOOTH );
glEnable( GL_LINE_SMOOTH );
glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular );
glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess );
glLightfv( GL_LIGHT0, GL_POSITION, light_position );
//glEnable( GL_LIGHTING );
//glEnable( GL_LIGHT0 ); // Switch on and crashes!
glEnable( GL_DEPTH_TEST );
#endif
#if 0
glEnable ( GL_COLOR_MATERIAL );
glShadeModel( GL_SMOOTH );
glEnable( GL_LINE_SMOOTH );
glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular );
glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess );
glLightfv( GL_LIGHT0, GL_POSITION, light_position );
glEnable( GL_LIGHTING );
glEnable( GL_LIGHT0 );
glEnable( GL_DEPTH_TEST );
#endif
rotateX = 0;
rotateY = 30;
translateX = 0.0f;
translateY = -30.0f;
translateZ = -120.0;
}
void display(void)
{
render();
glutSwapBuffers();
glutPostRedisplay();
}
void keyboard( unsigned char key, int /*x*/, int /*y*/)
{
switch( key) {
case('q') :
#ifdef _WIN32
case VK_ESCAPE:
#endif //_WIN32
exit(0);
break;
case('a'):
translateY += 0.1f;
break;
case('z'):
translateY -= 0.1f;
break;
case('d'):
translateX += 0.1f;
break;
case('s'):
translateX -= 0.1f;
break;
case('f'):
translateZ += 0.1f;
break;
case('g'):
translateZ -= 0.1f;
break;
}
}
void mouse(int button, int state, int x, int y)
{
if (state == GLUT_DOWN) {
mouseButtons |= 1<<button;
} else if (state == GLUT_UP) {
mouseButtons = 0;
}
mouseOldX = x;
mouseOldY = y;
glutPostRedisplay();
}
void motion(int x, int y)
{
float dx, dy;
dx = x - mouseOldX;
dy = y - mouseOldY;
if (mouseButtons & 1) {
rotateX += dy * 0.2;
rotateY += dx * 0.2;
}
else if (mouseButtons & 5) {
translateY -= dy * 0.01;
translateX -= dx * 0.01;
}
else if (mouseButtons & 4) {
translateZ += dy * 0.01;
}
mouseOldX = x;
mouseOldY = y;
}
void reshape (int w, int h)
{
windowWidth = w;
windowHeight = h;
glViewport(0, 0, windowWidth, windowHeight);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(
60.0,
(GLfloat)windowWidth / (GLfloat) windowHeight,
0.1,
600.0f );
}
void goGL(void)
{
glutMainLoop();
}
void preInitGL(int argc, char ** argv)
{
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA | GLUT_DEPTH );
glutInitWindowSize( windowWidth, windowHeight );
glutCreateWindow ("OpenCL Renderer");
initGL();
glViewport( 0, 0, windowWidth, windowHeight);
reshape( windowWidth, windowHeight );
glutDisplayFunc(display);
glutReshapeFunc(reshape);
glutKeyboardFunc(keyboard);
glutMouseFunc(mouse);
glutMotionFunc(motion);
}
/*
int getVBO( std::string, int s)
{
GLuint size = (GLuint)s;
if (vbo == 0) {
// Create VBO
// create buffer object
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
return vbo;
}
*/

View File

@@ -0,0 +1,49 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2008 Advanced Micro Devices
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef __GL_WIN_HDR__
#define __GL_WIN_HDR__
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
#include <string>
void goGL(void);
void preInitGL(int argc, char ** argv);
//int getVBO( std::string, int size );
#endif //__GL_WIN_HDR__

View File

@@ -0,0 +1,34 @@
#ifndef __GL_WIN_HDR__
#define __GL_WIN_HDR__
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
#include <string>
void goGL(void);
void preInitGL(int argc, char ** argv);
int getVBO( std::string, int size );
#endif //__GL_WIN_HDR__

View File

@@ -0,0 +1,535 @@
#pragma OPENCL EXTENSION cl_amd_printf : enable
#define float3 float4
#define uint3 uint4
#define PARTICLE_RADIUS 0.05;
#define width 1280
#define height 1024
#define B 0
#define T height
#define L 0
#define R width
#define shiftNumber 4
#define shiftMask 0xF
#define shiftValue 16.0f
#define stride 4
#define screenWidth1 width
#define screenHeight1 height
#define halfScreenWidth1 screenWidth1/2
#define halfScreenHeight1 screenHeight1/2
#define screenWidth1SubOne (screenWidth1-1)
#define screenHeight1SubOne (screenHeight1-1)
#define stride screenWidth1
#define screenPixelNumber screenWidth1*screenHeight1
#define depthBufferSize screenPixelNumber*depthComplexity
#define WGS 1
//---------------------------------------------------------------
struct __VSSpriteOut
{
float4 position;
float4 particlePosition;
};
typedef struct __VSSpriteout VSSpriteOut;
struct __GSSpriteOut
{
float4 position;
float2 textureUV;
// float4 viewSpacePosition;
// float4 particlePosition;
};
typedef struct __GSSpriteout GSSpriteOut;
//------------------------------------------------------------------------------
__constant float4 g_positions[4] =
{
(float4)(-1.0f, 1.0f, 0.0f, 0.0f),
(float4)( 1.0f, 1.0f, 0.0f, 0.0f),
(float4)( -1.0f, -1.0f, 0.0f, 0.0f),
(float4)( 1.0f, -1.0f, 0.0f, 0.0f)
};
__constant float2 g_texcoords[4] =
{
(float2)(0.0f,0.0f),
(float2)(1.0f,0.0f),
(float2)(0.0f,1.0f),
(float2)(1.0f,1.0f)
};
//------------------------------------------------------------------------------
void copyMatrix(
float matrix[16],
__constant float matrix0[16])
{
uint i;
for (i = 0; i < 16; i++) {
matrix[i] = matrix0[i];
}
}
void matrixMulLoopBody(
uint i,
float matrix[16],
__constant float matrix0[16],
__constant float matrix1[16])
{
matrix[i] = 0.0f;
matrix[i] += matrix0[(i%4) + (0*4)] * matrix1[(0) + ((i/4)*4)];
matrix[i] += matrix0[(i%4) + (1*4)] * matrix1[(1) + ((i/4)*4)];
matrix[i] += matrix0[(i%4) + (2*4)] * matrix1[(2) + ((i/4)*4)];
matrix[i] += matrix0[(i%4) + (3*4)] * matrix1[(3) + ((i/4)*4)];
}
void matrixMul(
float matrix[16],
__constant float matrix0[16],
__constant float matrix1[16])
{
matrixMulLoopBody(0, matrix, matrix0, matrix1);
matrixMulLoopBody(1, matrix, matrix0, matrix1);
matrixMulLoopBody(2, matrix, matrix0, matrix1);
matrixMulLoopBody(3, matrix, matrix0, matrix1);
matrixMulLoopBody(4, matrix, matrix0, matrix1);
matrixMulLoopBody(5, matrix, matrix0, matrix1);
matrixMulLoopBody(6, matrix, matrix0, matrix1);
matrixMulLoopBody(7, matrix, matrix0, matrix1);
matrixMulLoopBody(8, matrix, matrix0, matrix1);
matrixMulLoopBody(9, matrix, matrix0, matrix1);
matrixMulLoopBody(10, matrix, matrix0, matrix1);
matrixMulLoopBody(11, matrix, matrix0, matrix1);
matrixMulLoopBody(12, matrix, matrix0, matrix1);
matrixMulLoopBody(13, matrix, matrix0, matrix1);
matrixMulLoopBody(14, matrix, matrix0, matrix1);
matrixMulLoopBody(15, matrix, matrix0, matrix1);
}
float4 matrixVectorMul(float matrix[16], float4 vector)
{
float4 result;
result.x = matrix[0]*vector.x + matrix[4+0]*vector.y + matrix[8+0]*vector.z + matrix[12+0]*vector.w;
result.y = matrix[1]*vector.x + matrix[4+1]*vector.y + matrix[8+1]*vector.z + matrix[12+1]*vector.w;
result.z = matrix[2]*vector.x + matrix[4+2]*vector.y + matrix[8+2]*vector.z + matrix[12+2]*vector.w;
result.w = matrix[3]*vector.x + matrix[4+3]*vector.y + matrix[8+3]*vector.z + matrix[12+3]*vector.w;
return result;
}
float3 matrixVector3Mul(__constant float matrix[9], float3 vector)
{
float3 result;
result.x = matrix[0]*vector.x + matrix[3+0]*vector.y + matrix[6+0]*vector.z;
result.y = matrix[1]*vector.x + matrix[3+1]*vector.y + matrix[6+1]*vector.z;
result.z = matrix[2]*vector.x + matrix[3+2]*vector.y + matrix[6+2]*vector.z;
return result;
}
//------------------------------------------------------------------------------
//#define DEVICE_CPU 1
#if defined(DEVICE_CPU)
void printMatrix(char * name, __constant float matrix[16])
{
printf("%s[0] = %f, %f, %f, %f\n", name, matrix[0], matrix[1], matrix[2], matrix[3]);
printf("%s[1] = %f, %f, %f, %f\n", name, matrix[4], matrix[5], matrix[6], matrix[7]);
printf("%s[2] = %f, %f, %f, %f\n", name, matrix[8], matrix[9], matrix[10], matrix[11]);
printf("%s[3] = %f, %f, %f, %f\n", name, matrix[12], matrix[13], matrix[14], matrix[15]);
}
#endif
#if 1
__kernel void vertexShader(
__constant float modelview[16],
__constant float projection[16],
__global float4 * inputPrimitives,
__global float4 * outputPrimitives)
{
float matrix[16];
float4 gl_Vertex;
float4 gl_Position;
uint id = get_global_id(0);
gl_Vertex = inputPrimitives[id];
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
matrixMul(matrix, projection, modelview);
gl_Position = matrixVectorMul(matrix, gl_Vertex);
outputPrimitives[id] = gl_Position;
}
#else
__kernel void vertexShader(
__constant float modelview[16],
__constant float projection[16],
__global float4 * inputPrimitives,
__global float4 * outputPrimitives)
{
uint id = get_global_id(0);
outputPrimitives[id] = inputPrimitives[id];
}
#endif
//-----------------------------------------------------------------------------------
__kernel void
clearImage(
__write_only image2d_t image,
float4 color)
{
int2 coords = (int2)(get_global_id(0), get_global_id(1));
write_imagef(image, coords, color);
}
// OpenGL viewport transformation
// The site http://research.cs.queensu.ca/~jstewart/454/notes/pipeline/
// contains a description of this process
void
viewportTransform(float4 v, __constant int4 viewport[1], float2 * output)
{
int4 vp = viewport[0];
*output
= 0.5f *
(float2)(v.x+1,v.y+1) *
(float2)((vp.s2-vp.s0) + vp.s0,
(vp.s3-vp.s1) + vp.s1);
}
#define PARTICLE_WIDTH 32.0f
#define PARTICLE_HEIGHT 32.0f
// Unoptimized triangle rasterizer function
// Details of the algorithm can be found here:
// http://www.devmaster.net/forums/showthread.php?t=1884
//
void
rasterizerUnOpt(
__global struct __GSSpriteOut * outputPrimitives,
// __global float4 * outputPrimitives,
__constant int4 viewport[1],
__write_only image2d_t screen,
__read_only image2d_t particle,
uint v1Offset,
uint v2Offset,
uint v3Offset,
__global float4 * debugOut1)
{
sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
uint id = get_global_id(0);
struct __GSSpriteOut output;
float2 v1, v2, v3;
float2 uv1, uv2, uv3;
output = outputPrimitives[id*4+v1Offset];
uv1 = output.textureUV;
viewportTransform(output.position, viewport, &v1);
output = outputPrimitives[id*4+v2Offset];
uv2 = output.textureUV;
viewportTransform(output.position, viewport, &v2);
output = outputPrimitives[id*4+v3Offset];
uv3 = output.textureUV;
viewportTransform(output.position, viewport, &v3);
// Bounding rectangle
int2 min_ = convert_int2(min(v1, min(v2, v3)));
int2 max_ = convert_int2(max(v1, max(v2, v3)));
// naive bi-linear interploation for texture coords, note this is
// broken with respect to OpenGL and needs to be fixed for the
// general case.
float p1x = v2.x - v1.x;
float p1y = v2.y - v1.y;
float p2x = v3.x - v1.x;
float p2y = v3.y - v1.y;
// Scan through bounding rectangle
for(int y = min_.y; y < max_.y; y++) {
for(int x = min_.x; x < max_.x; x++) {
// When all half-space functions positive, pixel is in triangle
if((v1.x - v2.x) * (y - v1.y) - (v1.y - v2.y) * (x - v1.x) > 0 &&
(v2.x - v3.x) * (y - v2.y) - (v2.y - v3.y) * (x - v2.x) > 0 &&
(v3.x - v1.x) * (y - v3.y) - (v3.y - v1.y) * (x - v3.x) > 0) {
float px = x - v1.x;
float py = y - v1.y;
write_imagef(
screen,
(int2)(x,y),
// texel);
(float4)(1.0f,1.0f,1.0f,1.0f));
}
}
}
}
// Optimized rasterizer function
// Details of the algorithm can be found here:
// http://www.devmaster.net/forums/showthread.php?t=1884
//
// Currently has a bug, still work in progess
__kernel void
rasterizerXX(
__global float4 * outputPrimitives,
__write_only image2d_t screen,
__global float4 * debugOut1,
__global int2 * debugOut2)
{
uint id = get_global_id(0);
// printf("ras\n");
float4 v1 = outputPrimitives[id*4+0];
float4 v2 = outputPrimitives[id*4+1];
float4 v3 = outputPrimitives[id*4+2];
float y1 = 0.5f* (v1.y+1) * (T - B) + B;
float y2 = 0.5f* (v2.y+1) * (T - B) + B;
float y3 = 0.5f* (v3.y+1) * (T - B) + B;
float x1 = 0.5f * (v1.x+1) * (R - L) + L;
float x2 = 0.5f * (v2.x+1) * (R - L) + L;
float x3 = 0.5f * (v3.x+1) * (R - L) + L;
const int Y1 = convert_int(shiftValue * y1);
const int Y2 = convert_int(shiftValue * y2);
const int Y3 = convert_int(shiftValue * y3);
const int X1 = convert_int(shiftValue * x1);
const int X2 = convert_int(shiftValue * x2);
const int X3 = convert_int(shiftValue * x3);
debugOut1[id*4+0] = v1;
debugOut1[id*4+1] = v2;
debugOut1[id*4+2] = v3;
debugOut2[id*3+0] = (int2)(X1, Y1);
debugOut2[id*3+1] = (int2)(X2, Y2);
debugOut2[id*3+2] = (int2)(X3, Y3);
// Deltas
const int DX12 = X1 - X2;
const int DX23 = X2 - X3;
const int DX31 = X3 - X1;
const int DY12 = Y1 - Y2;
const int DY23 = Y2 - Y3;
const int DY31 = Y3 - Y1;
// Fixed-point deltas
const int FDX12 = DX12 << shiftNumber;
const int FDX23 = DX23 << shiftNumber;
const int FDX31 = DX31 << shiftNumber;
const int FDY12 = DY12 << shiftNumber;
const int FDY23 = DY23 << shiftNumber;
const int FDY31 = DY31 << shiftNumber;
// Bounding rectangle
int minx = (min(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
//minx = max(0,minx);
int maxx = (max(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
//min(maxx , screenWidth1SubOne);
int miny = (min(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
//max(0,miny);
int maxy = (max(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
//min(maxy , screenHeight1SubOne);
//(char*&)colorBuffer += miny * stride;
int offset = miny * stride;
// Half-edge constants
int C1 = DY12 * X1 - DX12 * Y1;
int C2 = DY23 * X2 - DX23 * Y2;
int C3 = DY31 * X3 - DX31 * Y3;
// Correct for fill convention
if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
int CY1 = C1 + DX12 * (miny << shiftNumber) - DY12 * (minx << shiftNumber);
int CY2 = C2 + DX23 * (miny << shiftNumber) - DY23 * (minx << shiftNumber);
int CY3 = C3 + DX31 * (miny << shiftNumber) - DY31 * (minx << shiftNumber);
for(int y = miny; y < maxy; y++) {
int CX1 = CY1;
int CX2 = CY2;
int CX3 = CY3;
debugOut2[id*3+0] = (int2)(minx, maxx);
for(int x = minx; x < maxx; x++) {
debugOut2[id*3+0] = (int2)(CX1, CX2);
if(CX1 > 0 && CX2 > 0 && CX3 > 0) {
debugOut2[id*3+0] = (int2)(1, 1);
write_imagef(
screen,
(int2)(x,y),
(float4)(1.0f,1.0f,1.0f,1.0f));
}
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
//(char*&)colorBuffer += stride;
offset += stride;
}
}
//------------------------------------------------------------------------------
void geometryShader(
__constant float modelview[16],
__constant float projection[16],
__constant float inverseView[9],
__constant int4 viewport[1],
__local struct __VSSpriteOut * vsOutputPrimitives,
__global struct __GSSpriteOut * outputPrimitives,
// __global float4 * outputPrimitives,
__write_only image2d_t screen,
__read_only image2d_t particle,
__global float4 * debugOut1,
__global int * debugOut2)
{
float2 texcoords[4] =
{
(float2)(0.0f,0.0f),
(float2)(1.0f,0.0f),
(float2)(0.0f,1.0f),
(float2)(1.0f,1.0f)
};
float matrix[16];
uint id = get_global_id(0);
uint lid = get_local_id(0);
float4 vsPosition = vsOutputPrimitives[lid].position;
matrixMul(matrix, projection, modelview);
//
// Emit two new triangles
//
for (uint i = 0; i<4; i++) {
float3 position = g_positions[i] * PARTICLE_RADIUS;
position = matrixVector3Mul(inverseView, position) + vsPosition;
float3 particlePosition =
matrixVector3Mul(
inverseView,
(float4)(0.0f,0.0f,0.0f,0.0f)) + vsPosition; // world space
// Compute view space position
position.w = 1.0f;
position = matrixVectorMul(matrix, position);
//perspective division
position /= position.w;
struct __GSSpriteOut output;
output.position = position;
//output.textureUV = g_texcoords[i];
output.textureUV = texcoords[i];
outputPrimitives[id*4+i] = output;
}
// Render QUAD - Triangle 1
rasterizerUnOpt(
outputPrimitives,
viewport,
screen,
particle,
0,
1,
2,
debugOut1);
// Render QUAD - Triangle 2
rasterizerUnOpt(
outputPrimitives,
viewport,
screen,
particle,
2,
1,
3,
debugOut1);
}
__kernel void vertexShaderSprite(
__constant float modelview[16],
__constant float projection[16],
__constant float inverseView[9],
__constant int4 viewport[1],
__local struct __VSSpriteOut * vsOutputPrimitives,
__global float4 * inputPrimitives,
__global struct __GSSpriteOut * outputPrimitives,
// __global float4 * outputPrimitives,
__write_only image2d_t screen,
__read_only image2d_t particle,
__global float4 * debugOut1,
__global int * debugOut2)
{
float matrix[16];
uint id = get_global_id(0);
uint lid = get_local_id(0);
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
matrixMul(matrix, projection, modelview);
float4 position = inputPrimitives[id];
vsOutputPrimitives[lid].position = position;
vsOutputPrimitives[lid].particlePosition =
matrixVectorMul(matrix, position);
geometryShader(
modelview,
projection,
inverseView,
viewport,
vsOutputPrimitives,
outputPrimitives,
screen,
particle,
debugOut1,
debugOut2);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 768 KiB

View File

@@ -0,0 +1,7 @@
void main()
{
//gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;
gl_TexCoord[0] = gl_MultiTexCoord0;
gl_Position = gl_Vertex;
}

View File

@@ -7,6 +7,8 @@ ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL
) )
ADD_DEFINITIONS(-DUSE_AMD_OPENCL) ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include )
@@ -53,15 +55,17 @@ IF (USE_GLUT)
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp
${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl
) )
ELSE (USE_GLUT) ELSE (USE_GLUT)

View File

@@ -329,7 +329,9 @@ void btParticlesDynamicsWorld::initCLKernels(int argc, char** argv)
if (!m_cxMainContext) if (!m_cxMainContext)
{ {
// m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); // m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum);
m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum);
//m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS); oclCHECKERROR(ciErrNum, CL_SUCCESS);
m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext); m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext);

View File

@@ -85,7 +85,7 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int*
/* Use NULL for backward compatibility */ /* Use NULL for backward compatibility */
cl_context_properties* cprops = (NULL == platform) ? NULL : cps; cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
cl_context retContext = clCreateContextFromType(cprops, cl_context retContext = clCreateContextFromType(cprops,
CL_DEVICE_TYPE_ALL, deviceType,
NULL, NULL,
NULL, NULL,
&ciErrNum); &ciErrNum);

View File

@@ -1,3 +1,18 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>

View File

@@ -1,3 +1,17 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_OCL_UTILS_H #ifndef BT_OCL_UTILS_H
#define BT_OCL_UTILS_H #define BT_OCL_UTILS_H

View File

@@ -1,13 +1,4 @@
#ifndef GUID_ARG
#define GUID_ARG
#endif
#ifndef MSTRINGIFY
#define MSTRINGIFY(A) A
#endif
MSTRINGIFY( MSTRINGIFY(

View File

@@ -67,10 +67,8 @@ ADD_LIBRARY(BulletMultiThreaded
) )
#for now, only Direct 11 (Direct Compute)
IF(USE_DX11) SUBDIRS(GpuSoftBodySolvers)
SUBDIRS(GpuSoftBodySolvers)
ENDIF(USE_DX11)
IF (BUILD_SHARED_LIBS) IF (BUILD_SHARED_LIBS)

View File

@@ -3,20 +3,12 @@ INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src ${BULLET_PHYSICS_SOURCE_DIR}/src
) )
LIST(APPEND SubDirList "CPU")
SUBDIRS (
OpenCL
CPU
)
# Configure use of OpenCL and DX11 IF( USE_DX11 )
# Generates the settings file and defines libraries and include paths SUBDIRS( DX11 )
OPTION(USE_OPENCL "Use OpenCL" OFF) ENDIF( USE_DX11 )
if( USE_OPENCL )
LIST(APPEND SubDirList "OpenCL")
endif( USE_OPENCL )
if( USE_DX11 )
LIST(APPEND SubDirList "DX11")
endif( USE_DX11 )
SUBDIRS( ${SubDirList} )

View File

@@ -14,14 +14,17 @@ ${VECTOR_MATH_INCLUDE}
SET(BulletSoftBodyDX11Solvers_SRCS SET(BulletSoftBodyDX11Solvers_SRCS
btSoftBodySolver_DX11.cpp btSoftBodySolver_DX11.cpp
btSoftBodySolver_DX11SIMDAware.cpp
) )
SET(BulletSoftBodyDX11Solvers_HDRS SET(BulletSoftBodyDX11Solvers_HDRS
btSoftBodySolver_DX11.h btSoftBodySolver_DX11.h
btSoftBodySolver_DX11SIMDAware.h
../cpu/btSoftBodySolverData.h ../cpu/btSoftBodySolverData.h
btSoftBodySolverVertexData_DX11.h btSoftBodySolverVertexData_DX11.h
btSoftBodySolverTriangleData_DX11.h btSoftBodySolverTriangleData_DX11.h
btSoftBodySolverLinkData_DX11.h btSoftBodySolverLinkData_DX11.h
btSoftBodySolverLinkData_DX11SIMDAware.h
btSoftBodySolverBuffer_DX11.h btSoftBodySolverBuffer_DX11.h
btSoftBodySolverVertexBuffer_DX11.h btSoftBodySolverVertexBuffer_DX11.h
@@ -37,6 +40,7 @@ SET(BulletSoftBodyDX11Solvers_Shaders
UpdatePositions UpdatePositions
UpdateNodes UpdateNodes
SolvePositions SolvePositions
SolvePositionsSIMDBatched
UpdatePositionsFromVelocities UpdatePositionsFromVelocities
ApplyForces ApplyForces
PrepareLinks PrepareLinks

View File

@@ -0,0 +1,128 @@
MSTRINGIFY(
cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
{
int startWaveInBatch;
int numWaves;
float kst;
float ti;
};
// Number of batches per wavefront stored one element per logical wavefront
StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
// Set of up to maxNumVertices vertex addresses per wavefront
StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
StructuredBuffer<float> g_verticesInverseMass : register( t2 );
// Per-link data layed out structured in terms of sub batches within wavefronts
StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
StructuredBuffer<float> g_linksMassLSC : register( t4 );
StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
// Data loaded on a per-wave basis
groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
// Storing the vertex addresses actually slowed things down a little
//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
[numthreads(BLOCK_SIZE, 1, 1)]
void
SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
{
const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
const int localWavefront = wavefront - firstWavefrontInBlock;
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( wavefront < (startWaveInBatch + numWaves) )
{
// Load the batch counts for the wavefronts
// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
if( laneInWavefront == 0 )
{
int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[firstWavefrontInBlock + localWavefront];
wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront;
}
int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront];
int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
// Load the vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
}
// Loop through the batches performing the solve on each in LDS
int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
int batch = 0;
do
{
int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
int locationOfValue = baseDataLocation + laneInWavefront;
// These loads should all be perfectly linear across the WF
int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
float massLSC = g_linksMassLSC[locationOfValue];
float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
// LDS vertex addresses based on logical wavefront number in block and loaded index
int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
float3 del = position1 - position0;
float len = dot(del, del);
float k = 0;
if( massLSC > 0.0f )
{
k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
}
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
++batch;
} while( batch < batchesWithinWavefront );
// Update the global memory vertices for the wavefronts
for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
{
int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
}
}
}
);

View File

@@ -0,0 +1,173 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
#include "btSoftBodySolverBuffer_DX11.h"
#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
struct ID3D11Device;
struct ID3D11DeviceContext;
class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
{
public:
bool m_onGPU;
ID3D11Device *m_d3dDevice;
ID3D11DeviceContext *m_d3dDeviceContext;
const int m_wavefrontSize;
const int m_linksPerWorkItem;
const int m_maxLinksPerWavefront;
int m_maxBatchesWithinWave;
int m_maxVerticesWithinWave;
int m_numWavefronts;
int m_maxVertex;
struct NumBatchesVerticesPair
{
int numBatches;
int numVertices;
};
// Array storing number of links in each wavefront
btAlignedObjectArray<int> m_linksPerWavefront;
btAlignedObjectArray<NumBatchesVerticesPair> m_numBatchesAndVerticesWithinWaves;
btDX11Buffer< NumBatchesVerticesPair > m_dx11NumBatchesAndVerticesWithinWaves;
// All arrays here will contain batches of m_maxLinksPerWavefront links
// ordered by wavefront.
// with either global vertex pairs or local vertex pairs
btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
btDX11Buffer<int> m_dx11WavefrontVerticesGlobalAddresses;
btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link
btDX11Buffer<LinkNodePair> m_dx11LinkVerticesLocalAddresses;
btDX11Buffer<float> m_dx11LinkStrength;
btDX11Buffer<float> m_dx11LinksMassLSC;
btDX11Buffer<float> m_dx11LinksRestLengthSquared;
btDX11Buffer<float> m_dx11LinksRestLength;
btDX11Buffer<float> m_dx11LinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/**
* Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching.
*/
btAlignedObjectArray< int > m_linkAddresses;
/**
* Start and length values for computation batches over link data.
*/
btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths;
//ID3D11Buffer* readBackBuffer;
btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
virtual ~btSoftBodyLinkDataDX11SIMDAware();
/** Allocate enough space in all link-related arrays to fit numLinks links */
virtual void createLinks( int numLinks );
/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
virtual void setLinkAt( const LinkDescription &link, int linkIndex );
virtual bool onAccelerator();
virtual bool moveToAccelerator();
virtual bool moveFromAccelerator();
/**
* Generate (and later update) the batching for the entire link set.
* This redoes a lot of work because it batches the entire set when each cloth is inserted.
* In theory we could delay it until just before we need the cloth.
* It's a one-off overhead, though, so that is a later optimisation.
*/
void generateBatches();
int getMaxVerticesPerWavefront()
{
return m_maxVerticesWithinWave;
}
int getWavefrontSize()
{
return m_wavefrontSize;
}
int getLinksPerWorkItem()
{
return m_linksPerWorkItem;
}
int getMaxLinksPerWavefront()
{
return m_maxLinksPerWavefront;
}
int getMaxBatchesPerWavefront()
{
return m_maxBatchesWithinWave;
}
int getNumWavefronts()
{
return m_numWavefronts;
}
NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
{
return m_numBatchesAndVerticesWithinWaves[wavefront];
}
int getVertexGlobalAddresses( int vertexIndex )
{
return m_wavefrontVerticesGlobalAddresses[vertexIndex];
}
/**
* Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
*/
LinkNodePair getVertexPairLocalAddresses( int linkIndex )
{
return m_linkVerticesLocalAddresses[linkIndex];
}
};
#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H

View File

@@ -622,7 +622,7 @@ void btDX11SoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softB
using Vectormath::Aos::Point3; using Vectormath::Aos::Point3;
// Create SoftBody that will store the information within the solver // Create SoftBody that will store the information within the solver
btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); btDX11AcceleratedSoftBodyInterface *newSoftBody = new btDX11AcceleratedSoftBodyInterface( softBody );
m_softBodySet.push_back( newSoftBody ); m_softBodySet.push_back( newSoftBody );
m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
@@ -1451,11 +1451,11 @@ void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float
btDX11SoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) btDX11AcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
{ {
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{ {
btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; btDX11AcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
if( softBodyInterface->getSoftBody() == softBody ) if( softBodyInterface->getSoftBody() == softBody )
return softBodyInterface; return softBodyInterface;
} }
@@ -1466,7 +1466,7 @@ void btDX11SoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const
{ {
checkInitialized(); checkInitialized();
btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); btDX11AcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody );
const int firstVertex = currentCloth->getFirstVertex(); const int firstVertex = currentCloth->getFirstVertex();

View File

@@ -13,6 +13,9 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#include "vectormath/vmInclude.h" #include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btSoftBodySolvers.h"
@@ -22,185 +25,184 @@ subject to the following restrictions:
#include "btSoftBodySolverTriangleData_DX11.h" #include "btSoftBodySolverTriangleData_DX11.h"
#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H /**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btDX11AcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btDX11AcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
#if 0
void setAcceleration( Vectormath::Aos::Vector3 acceleration )
{
m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration );
}
void setWindVelocity( Vectormath::Aos::Vector3 windVelocity )
{
m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity );
}
/**
* Set the density of the air in which the cloth is situated.
*/
void setAirDensity( btScalar density )
{
m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast<float>(density) );
}
/**
* Add a collision object to this soft body.
*/
void addCollisionObject( btCollisionObject *collisionObject )
{
m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject );
}
#endif
};
class btDX11SoftBodySolver : public btSoftBodySolver class btDX11SoftBodySolver : public btSoftBodySolver
{ {
public: public:
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
#if 0
void setAcceleration( Vectormath::Aos::Vector3 acceleration )
{
m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration );
}
void setWindVelocity( Vectormath::Aos::Vector3 windVelocity )
{
m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity );
}
/**
* Set the density of the air in which the cloth is situated.
*/
void setAirDensity( btScalar density )
{
m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast<float>(density) );
}
/**
* Add a collision object to this soft body.
*/
void addCollisionObject( btCollisionObject *collisionObject )
{
m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject );
}
#endif
};
class KernelDesc class KernelDesc
{ {
@@ -344,7 +346,7 @@ private:
* Cloths owned by this solver. * Cloths owned by this solver.
* Only our cloths are in this array. * Only our cloths are in this array.
*/ */
btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; btAlignedObjectArray< btDX11AcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver. /** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver.
@@ -429,7 +431,7 @@ private:
void updateConstants( float timeStep ); void updateConstants( float timeStep );
btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); btDX11AcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
////////////////////////////////////// //////////////////////////////////////
// Kernel dispatches // Kernel dispatches

View File

@@ -0,0 +1,432 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "btSoftBodySolverVertexBuffer_DX11.h"
#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
#include "btSoftBodySolverVertexData_DX11.h"
#include "btSoftBodySolverTriangleData_DX11.h"
#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
class btDX11SIMDAwareSoftBodySolver : public btSoftBodySolver
{
public:
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
class KernelDesc
{
protected:
public:
ID3D11ComputeShader* kernel;
ID3D11Buffer* constBuffer;
KernelDesc()
{
kernel = 0;
constBuffer = 0;
}
virtual ~KernelDesc()
{
// TODO: this should probably destroy its kernel but we need to be careful
// in case KernelDescs are copied
}
};
struct SolvePositionsFromLinksKernelCB
{
int startWave;
int numWaves;
float kst;
float ti;
};
struct IntegrateCB
{
int numNodes;
float solverdt;
int padding1;
int padding2;
};
struct UpdatePositionsFromVelocitiesCB
{
int numNodes;
float solverSDT;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateVelocitiesFromPositionsWithVelocitiesCB
{
int numNodes;
float isolverdt;
int padding1;
int padding2;
};
struct UpdateSoftBodiesCB
{
int numNodes;
int startFace;
int numFaces;
float epsilon;
};
struct OutputToVertexArrayCB
{
int startNode;
int numNodes;
int positionOffset;
int positionStride;
int normalOffset;
int normalStride;
int padding1;
int padding2;
};
struct ApplyForcesCB
{
unsigned int numNodes;
float solverdt;
float epsilon;
int padding3;
};
struct AddVelocityCB
{
int startNode;
int lastNode;
float velocityX;
float velocityY;
float velocityZ;
int padding1;
int padding2;
int padding3;
};
private:
ID3D11Device * m_dx11Device;
ID3D11DeviceContext* m_dx11Context;
/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
btSoftBodyLinkDataDX11SIMDAware m_linkData;
btSoftBodyVertexDataDX11 m_vertexData;
btSoftBodyTriangleDataDX11 m_triangleData;
/** Variable to define whether we need to update solver constants on the next iteration */
bool m_updateSolverConstants;
bool m_shadersInitialized;
/**
* Cloths owned by this solver.
* Only our cloths are in this array.
*/
btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothAcceleration;
/** Wind velocity to be applied normal to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver.
*/
btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity;
btDX11Buffer<Vectormath::Aos::Vector3> m_dx11PerClothWindVelocity;
/** Velocity damping factor */
btAlignedObjectArray< float > m_perClothDampingFactor;
btDX11Buffer<float> m_dx11PerClothDampingFactor;
/** Velocity correction coefficient */
btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient;
btDX11Buffer<float> m_dx11PerClothVelocityCorrectionCoefficient;
/** Lift parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothLiftFactor;
btDX11Buffer<float> m_dx11PerClothLiftFactor;
/** Drag parameter for wind effect on cloth. */
btAlignedObjectArray< float > m_perClothDragFactor;
btDX11Buffer<float> m_dx11PerClothDragFactor;
/** Density of the medium in which each cloth sits */
btAlignedObjectArray< float > m_perClothMediumDensity;
btDX11Buffer<float> m_dx11PerClothMediumDensity;
KernelDesc solvePositionsFromLinksKernel;
KernelDesc integrateKernel;
KernelDesc addVelocityKernel;
KernelDesc updatePositionsFromVelocitiesKernel;
KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel;
KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel;
KernelDesc resetNormalsAndAreasKernel;
KernelDesc normalizeNormalsAndAreasKernel;
KernelDesc updateSoftBodiesKernel;
KernelDesc outputToVertexArrayWithNormalsKernel;
KernelDesc outputToVertexArrayWithoutNormalsKernel;
KernelDesc outputToVertexArrayKernel;
KernelDesc applyForcesKernel;
KernelDesc collideSphereKernel;
KernelDesc collideCylinderKernel;
/**
* Integrate motion on the solver.
*/
virtual void integrate( float solverdt );
float computeTriangleArea(
const Vectormath::Aos::Point3 &vertex0,
const Vectormath::Aos::Point3 &vertex1,
const Vectormath::Aos::Point3 &vertex2 );
/**
* Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
*/
KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
bool buildShaders();
void resetNormalsAndAreas( int numVertices );
void normalizeNormalsAndAreas( int numVertices );
void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
virtual void applyForces( float solverdt );
void updateConstants( float timeStep );
btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
//////////////////////////////////////
// Kernel dispatches
void prepareLinks();
void updatePositionsFromVelocities( float solverdt );
void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
void solveLinksForVelocity( int startLink, int numLinks, float kst );
void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
// End kernel dispatches
/////////////////////////////////////
void releaseKernels();
public:
btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context);
virtual ~btDX11SIMDAwareSoftBodySolver();
virtual btSoftBodyLinkData &getLinkData();
virtual btSoftBodyVertexData &getVertexData();
virtual btSoftBodyTriangleData &getTriangleData();
virtual bool checkInitialized();
virtual void updateSoftBodies( );
virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies );
virtual void solveConstraints( float solverdt );
virtual void predictMotion( float solverdt );
virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer );
};
#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H

View File

@@ -0,0 +1,82 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include )
ELSE()
INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include )
ENDIF()
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../CPU/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View File

@@ -0,0 +1,73 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../CPU/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View File

@@ -1,71 +1,16 @@
INCLUDE_DIRECTORIES( IF(BUILD_MINICL_OPENCL_DEMOS)
${BULLET_PHYSICS_SOURCE_DIR}/src SUBDIRS( MiniCL )
) ENDIF()
IF(BUILD_AMD_OPENCL_DEMOS)
SUBDIRS(AMD)
ENDIF()
SET(OPENCL_DIR $ENV{ATISTREAMSDKROOT}) IF(BUILD_NVIDIA_OPENCL_DEMOS)
SET(OPENCL_INCLUDE_PATH "${ATISTREAMSDKROOT}/include" CACHE DOCSTRING "OpenCL SDK include path") SUBDIRS(NVidia)
ENDIF()
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_PATH} "../cpu/") IF(APPLE)
SUBDIRS(Apple)
SET(BulletSoftBodyOpenCLSolvers_SRCS ENDIF()
btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
btSoftBodySolver_OpenCL.h
../cpu/btSoftBodySolverData.h
btSoftBodySolverVertexData_OpenCL.h
btSoftBodySolverTriangleData_OpenCL.h
btSoftBodySolverLinkData_OpenCL.h
btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "OpenCLC/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL ${BulletSoftBodyOpenCLSolvers_SRCS} ${BulletSoftBodyOpenCLSolvers_HDRS} ${BulletSoftBodyOpenCLSolvers_OpenCLC})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View File

@@ -0,0 +1,75 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
ADD_DEFINITIONS(-DUSE_MINICL)
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../CPU/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View File

@@ -0,0 +1,40 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <MiniCL/cl_MiniCL_Defs.h>
#define MSTRINGIFY(A) A
#include "../OpenCLC10/ApplyForces.cl"
#include "../OpenCLC10/Integrate.cl"
#include "../OpenCLC10/PrepareLinks.cl"
#include "../OpenCLC10/SolvePositions.cl"
#include "../OpenCLC10/UpdateNodes.cl"
#include "../OpenCLC10/UpdateNormals.cl"
#include "../OpenCLC10/UpdatePositions.cl"
#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
//#include "../OpenCLC10/VSolveLinks.cl"
MINICL_REGISTER(PrepareLinksKernel)
MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
MINICL_REGISTER(SolvePositionsFromLinksKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
MINICL_REGISTER(IntegrateKernel)
MINICL_REGISTER(ApplyForcesKernel)
MINICL_REGISTER(ResetNormalsAndAreasKernel)
MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
MINICL_REGISTER(UpdateSoftBodiesKernel)

View File

@@ -0,0 +1,79 @@
INCLUDE_DIRECTORIES(
${BULLET_PHYSICS_SOURCE_DIR}/src
)
IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc )
ELSE()
INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc )
ENDIF()
SET(BulletSoftBodyOpenCLSolvers_SRCS
../btSoftBodySolver_OpenCL.cpp
)
SET(BulletSoftBodyOpenCLSolvers_HDRS
../btSoftBodySolver_OpenCL.h
../../CPU/btSoftBodySolverData.h
../btSoftBodySolverVertexData_OpenCL.h
../btSoftBodySolverTriangleData_OpenCL.h
../btSoftBodySolverLinkData_OpenCL.h
../btSoftBodySolverBuffer_OpenCL.h
)
# OpenCL and HLSL Shaders.
# Build rules generated to stringify these into headers
# which are needed by some of the sources
SET(BulletSoftBodyOpenCLSolvers_Shaders
# OutputToVertexArray
UpdateNormals
Integrate
UpdatePositions
UpdateNodes
SolvePositions
UpdatePositionsFromVelocities
ApplyForces
PrepareLinks
VSolveLinks
)
foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl")
endforeach(f)
ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia
${BulletSoftBodyOpenCLSolvers_SRCS}
${BulletSoftBodyOpenCLSolvers_HDRS}
${BulletSoftBodyOpenCLSolvers_OpenCLC}
)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION})
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION})
IF (BUILD_SHARED_LIBS)
TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
ENDIF (BUILD_SHARED_LIBS)
IF (INSTALL_LIBS)
IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .)
ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX})
INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true)
SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
ENDIF (INSTALL_LIBS)

View File

@@ -0,0 +1,91 @@
MSTRINGIFY(
float adot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
float4 projectOnAxis( float4 v, float4 a )
{
return (a*adot3(v, a));
}
__kernel void
ApplyForcesKernel(
const uint numNodes,
const float solverdt,
const float epsilon,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexNormal,
__global float * g_vertexArea,
__global float * g_vertexInverseMass,
__global float * g_clothLiftFactor,
__global float * g_clothDragFactor,
__global float4 * g_clothWindVelocity,
__global float4 * g_clothAcceleration,
__global float * g_clothMediumDensity,
__global float4 * g_vertexForceAccumulator,
__global float4 * g_vertexVelocity GUID_ARG)
{
unsigned int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothId = g_vertexClothIdentifier[nodeID];
float nodeIM = g_vertexInverseMass[nodeID];
if( nodeIM > 0.0f )
{
float4 nodeV = g_vertexVelocity[nodeID];
float4 normal = g_vertexNormal[nodeID];
float area = g_vertexArea[nodeID];
float4 nodeF = g_vertexForceAccumulator[nodeID];
// Read per-cloth values
float4 clothAcceleration = g_clothAcceleration[clothId];
float4 clothWindVelocity = g_clothWindVelocity[clothId];
float liftFactor = g_clothLiftFactor[clothId];
float dragFactor = g_clothDragFactor[clothId];
float mediumDensity = g_clothMediumDensity[clothId];
// Apply the acceleration to the cloth rather than do this via a force
nodeV += (clothAcceleration*solverdt);
g_vertexVelocity[nodeID] = nodeV;
float4 relativeWindVelocity = nodeV - clothWindVelocity;
float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
if( relativeSpeedSquared > epsilon )
{
// Correct direction of normal relative to wind direction and get dot product
normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
float dvNormal = dot(normal, relativeWindVelocity);
if( dvNormal > 0 )
{
float4 force = (float4)(0.f, 0.f, 0.f, 0.f);
float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
float c1 = c0 * mediumDensity;
force += normal * (-c1 * liftFactor);
force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
float dtim = solverdt * nodeIM;
float4 forceDTIM = force * dtim;
float4 nodeFPlusForce = nodeF + force;
// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
nodeF = nodeFPlusForce;
if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
nodeF = nodeFMinus;
g_vertexForceAccumulator[nodeID] = nodeF;
}
}
}
}
}
);

View File

@@ -0,0 +1,35 @@
MSTRINGIFY(
// Node indices for each link
__kernel void
IntegrateKernel(
const int numNodes,
const float solverdt,
__global float * g_vertexInverseMasses,
__global float4 * g_vertexPositions,
__global float4 * g_vertexVelocity,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexForceAccumulator GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 velocity = g_vertexVelocity[nodeID];
float4 force = g_vertexForceAccumulator[nodeID];
float inverseMass = g_vertexInverseMasses[nodeID];
g_vertexPreviousPositions[nodeID] = position;
velocity += force * inverseMass * solverdt;
position += velocity * solverdt;
g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
g_vertexPositions[nodeID] = position;
g_vertexVelocity[nodeID] = velocity;
}
}
);

View File

@@ -0,0 +1,41 @@
MSTRINGIFY(
float dot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
PrepareLinksKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float4 * g_nodesPreviousPosition,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength GUID_ARG)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
float massLSC = g_linksMassLSC[linkID];
float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
float linkLengthRatio = dot3(linkCurrentLength, linkCurrentLength)*massLSC;
linkLengthRatio = 1.0f/linkLengthRatio;
g_linksCurrentLength[linkID] = linkCurrentLength;
g_linksLengthRatio[linkID] = linkLengthRatio;
}
}
);

View File

@@ -0,0 +1,57 @@
MSTRINGIFY(
float mydot3(float4 a, float4 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
__kernel void
SolvePositionsFromLinksKernel(
const int startLink,
const int numLinks,
const float kst,
const float ti,
__global int2 * g_linksVertexIndices,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_verticesInverseMass,
__global float4 * g_vertexPositions GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
float massLSC = g_linksMassLSC[linkID];
float restLengthSquared = g_linksRestLengthSquared[linkID];
if( massLSC > 0.0f )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float4 position0 = g_vertexPositions[node0];
float4 position1 = g_vertexPositions[node1];
float inverseMass0 = g_verticesInverseMass[node0];
float inverseMass1 = g_verticesInverseMass[node1];
float4 del = position1 - position0;
float len = mydot3(del, del);
float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
position0 = position0 - del*(k*inverseMass0);
position1 = position1 + del*(k*inverseMass1);
g_vertexPositions[node0] = position0;
g_vertexPositions[node1] = position1;
}
}
}
);

View File

@@ -0,0 +1,44 @@
MSTRINGIFY(
/*#define float3 float4
float dot3(float3 a, float3 b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}*/
__kernel void
UpdateConstantsKernel(
const int numLinks,
__global int2 * g_linksVertexIndices,
__global float4 * g_vertexPositions,
__global float * g_vertexInverseMasses,
__global float * g_linksMaterialLSC,
__global float * g_linksMassLSC,
__global float * g_linksRestLengthSquared,
__global float * g_linksRestLengths)
{
int linkID = get_global_id(0);
if( linkID < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
float3 position0 = g_vertexPositions[node0].xyz;
float3 position1 = g_vertexPositions[node1].xyz;
float inverseMass0 = g_vertexInverseMasses[node0];
float inverseMass1 = g_vertexInverseMasses[node1];
float3 difference = position0 - position1;
float length2 = dot(difference, difference);
float length = sqrt(length2);
g_linksRestLengths[linkID] = length;
g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
g_linksRestLengthSquared[linkID] = length*length;
}
}
);

View File

@@ -0,0 +1,39 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithVelocitiesKernel(
int numNodes,
float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float *g_clothVelocityCorrectionCoefficients,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity += difference*velocityCorrectionCoefficient*isolverdt;
// Damp the velocity
velocity *= velocityCoefficient;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View File

@@ -0,0 +1,102 @@
MSTRINGIFY(
float length3(float4 a)
{
a.w = 0;
return length(a);
}
float4 normalize3(float4 a)
{
a.w = 0;
return normalize(a);
}
__kernel void
ResetNormalsAndAreasKernel(
const unsigned int numNodes,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
g_vertexArea[get_global_id(0)] = 0.0f;
}
}
__kernel void
UpdateSoftBodiesKernel(
const unsigned int startFace,
const unsigned int numFaces,
__global int4 * g_triangleVertexIndexSet,
__global float4 * g_vertexPositions,
__global float4 * g_vertexNormals,
__global float * g_vertexArea,
__global float4 * g_triangleNormals,
__global float * g_triangleArea GUID_ARG)
{
int faceID = get_global_id(0) + startFace;
if( get_global_id(0) < numFaces )
{
int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
int nodeIndex0 = triangleIndexSet.x;
int nodeIndex1 = triangleIndexSet.y;
int nodeIndex2 = triangleIndexSet.z;
float4 node0 = g_vertexPositions[nodeIndex0];
float4 node1 = g_vertexPositions[nodeIndex1];
float4 node2 = g_vertexPositions[nodeIndex2];
float4 nodeNormal0 = g_vertexNormals[nodeIndex0];
float4 nodeNormal1 = g_vertexNormals[nodeIndex1];
float4 nodeNormal2 = g_vertexNormals[nodeIndex2];
float vertexArea0 = g_vertexArea[nodeIndex0];
float vertexArea1 = g_vertexArea[nodeIndex1];
float vertexArea2 = g_vertexArea[nodeIndex2];
float4 vector0 = node1 - node0;
float4 vector1 = node2 - node0;
float4 faceNormal = cross(vector0, vector1);
float triangleArea = length(faceNormal);
nodeNormal0 = nodeNormal0 + faceNormal;
nodeNormal1 = nodeNormal1 + faceNormal;
nodeNormal2 = nodeNormal2 + faceNormal;
vertexArea0 = vertexArea0 + triangleArea;
vertexArea1 = vertexArea1 + triangleArea;
vertexArea2 = vertexArea2 + triangleArea;
g_triangleNormals[faceID] = normalize3(faceNormal);
g_vertexNormals[nodeIndex0] = nodeNormal0;
g_vertexNormals[nodeIndex1] = nodeNormal1;
g_vertexNormals[nodeIndex2] = nodeNormal2;
g_triangleArea[faceID] = triangleArea;
g_vertexArea[nodeIndex0] = vertexArea0;
g_vertexArea[nodeIndex1] = vertexArea1;
g_vertexArea[nodeIndex2] = vertexArea2;
}
}
__kernel void
NormalizeNormalsAndAreasKernel(
const unsigned int numNodes,
__global int * g_vertexTriangleCount,
__global float4 * g_vertexNormals,
__global float * g_vertexArea GUID_ARG)
{
if( get_global_id(0) < numNodes )
{
float4 normal = g_vertexNormals[get_global_id(0)];
float area = g_vertexArea[get_global_id(0)];
int numTriangles = g_vertexTriangleCount[get_global_id(0)];
float vectorLength = length3(normal);
g_vertexNormals[get_global_id(0)] = normalize3(normal);
g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
}
}
);

View File

@@ -0,0 +1,34 @@
MSTRINGIFY(
__kernel void
updateVelocitiesFromPositionsWithoutVelocitiesKernel(
const int numNodes,
const float isolverdt,
__global float4 * g_vertexPositions,
__global float4 * g_vertexPreviousPositions,
__global int * g_vertexClothIndices,
__global float * g_clothDampingFactor,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexForces GUID_ARG)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexPositions[nodeID];
float4 previousPosition = g_vertexPreviousPositions[nodeID];
float4 velocity = g_vertexVelocities[nodeID];
int clothIndex = g_vertexClothIndices[nodeID];
float dampingFactor = g_clothDampingFactor[clothIndex];
float velocityCoefficient = (1.f - dampingFactor);
float4 difference = position - previousPosition;
velocity = difference*velocityCoefficient*isolverdt;
g_vertexVelocities[nodeID] = velocity;
g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
}
}
);

View File

@@ -0,0 +1,28 @@
MSTRINGIFY(
__kernel void
UpdatePositionsFromVelocitiesKernel(
const int numNodes,
const float solverSDT,
__global float4 * g_vertexVelocities,
__global float4 * g_vertexPreviousPositions,
__global float4 * g_vertexCurrentPosition GUID_ARG)
{
int vertexID = get_global_id(0);
if( vertexID < numNodes )
{
float4 previousPosition = g_vertexPreviousPositions[vertexID];
float4 velocity = g_vertexVelocities[vertexID];
float4 newPosition = previousPosition + velocity*solverSDT;
g_vertexCurrentPosition[vertexID] = newPosition;
g_vertexPreviousPositions[vertexID] = newPosition;
}
}
);

View File

@@ -0,0 +1,45 @@
MSTRINGIFY(
__kernel void
VSolveLinksKernel(
int startLink,
int numLinks,
float kst,
__global int2 * g_linksVertexIndices,
__global float * g_linksLengthRatio,
__global float4 * g_linksCurrentLength,
__global float * g_vertexInverseMass,
__global float4 * g_vertexVelocity GUID_ARG)
{
int linkID = get_global_id(0) + startLink;
if( get_global_id(0) < numLinks )
{
int2 nodeIndices = g_linksVertexIndices[linkID];
int node0 = nodeIndices.x;
int node1 = nodeIndices.y;
float linkLengthRatio = g_linksLengthRatio[linkID];
float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
float vertexInverseMass0 = g_vertexInverseMass[node0];
float vertexInverseMass1 = g_vertexInverseMass[node1];
float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
float dotResult = dot(linkCurrentLength, nodeDifference);
float j = -dotResult*linkLengthRatio*kst;
float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
vertexVelocity0 += velocityChange0;
vertexVelocity1 -= velocityChange1;
g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
}
}
);

View File

@@ -17,7 +17,16 @@ subject to the following restrictions:
#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H #define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
// OpenCL support // OpenCL support
#include <CL/cl.hpp>
#ifdef USE_MINICL
#include "MiniCL/cl.h"
#else //USE_MINICL
#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif //__APPLE__
#endif//USE_MINICL
#ifndef SAFE_RELEASE #ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } #define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
@@ -25,22 +34,25 @@ subject to the following restrictions:
template <typename ElementType> class btOpenCLBuffer template <typename ElementType> class btOpenCLBuffer
{ {
protected: public:
cl::CommandQueue m_queue;
btAlignedObjectArray< ElementType > * m_CPUBuffer;
cl::Buffer m_buffer;
cl_command_queue m_cqCommandQue;
cl_context m_clContext;
cl_mem m_buffer;
btAlignedObjectArray< ElementType > * m_CPUBuffer;
int m_gpuSize; int m_gpuSize;
bool m_onGPU; bool m_onGPU;
bool m_readOnlyOnGPU; bool m_readOnlyOnGPU;
bool m_allocated; bool m_allocated;
// TODO: Remove this once C++ bindings are fixed
cl::Context context;
bool createBuffer( cl::Buffer *preexistingBuffer = 0)
bool createBuffer( cl_mem* preexistingBuffer = 0)
{ {
cl_int err; cl_int err;
@@ -49,12 +61,11 @@ protected:
m_buffer = *preexistingBuffer; m_buffer = *preexistingBuffer;
} }
else { else {
m_buffer = cl::Buffer(
context, cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE,
m_CPUBuffer->size() * sizeof(ElementType), size_t size = m_CPUBuffer->size() * sizeof(ElementType);
0, m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
&err);
if( err != CL_SUCCESS ) if( err != CL_SUCCESS )
{ {
btAssert( "Buffer::Buffer(m_buffer)"); btAssert( "Buffer::Buffer(m_buffer)");
@@ -62,35 +73,31 @@ protected:
} }
m_gpuSize = m_CPUBuffer->size(); m_gpuSize = m_CPUBuffer->size();
return true; return true;
} }
public: public:
btOpenCLBuffer( btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
cl::CommandQueue queue, :m_cqCommandQue(commandQue),
btAlignedObjectArray< ElementType > *CPUBuffer, m_clContext(ctx),
bool readOnly) :
m_queue(queue),
m_CPUBuffer(CPUBuffer), m_CPUBuffer(CPUBuffer),
m_gpuSize(0), m_gpuSize(0),
m_onGPU(false), m_onGPU(false),
m_readOnlyOnGPU(readOnly), m_readOnlyOnGPU(readOnly),
m_allocated(false) m_allocated(false)
{ {
context = m_queue.getInfo<CL_QUEUE_CONTEXT>();
} }
~btOpenCLBuffer() ~btOpenCLBuffer()
{ {
} }
cl::Buffer getBuffer()
{
return m_buffer;
}
bool moveToGPU() bool moveToGPU()
{ {
cl_int err; cl_int err;
if( (m_CPUBuffer->size() != m_gpuSize) ) if( (m_CPUBuffer->size() != m_gpuSize) )
@@ -107,12 +114,12 @@ public:
m_allocated = true; m_allocated = true;
} }
err = m_queue.enqueueWriteBuffer( size_t size = m_CPUBuffer->size() * sizeof(ElementType);
m_buffer, err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
CL_FALSE, CL_FALSE,
0, 0,
m_CPUBuffer->size() * sizeof(ElementType), size,
&((*m_CPUBuffer)[0])); &((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS ) if( err != CL_SUCCESS )
{ {
btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" ); btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
@@ -122,20 +129,23 @@ public:
} }
return true; return true;
} }
bool moveFromGPU() bool moveFromGPU()
{ {
cl_int err; cl_int err;
if (m_CPUBuffer->size() > 0) { if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) { if (m_onGPU && !m_readOnlyOnGPU) {
err = m_queue.enqueueReadBuffer( size_t size = m_CPUBuffer->size() * sizeof(ElementType);
err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer, m_buffer,
CL_TRUE, CL_TRUE,
0, 0,
m_CPUBuffer->size() * sizeof(ElementType), size,
&((*m_CPUBuffer)[0])); &((*m_CPUBuffer)[0]),0,0,0);
if( err != CL_SUCCESS ) if( err != CL_SUCCESS )
{ {
@@ -151,16 +161,17 @@ public:
bool copyFromGPU() bool copyFromGPU()
{ {
cl_int err; cl_int err;
size_t size = m_CPUBuffer->size() * sizeof(ElementType);
if (m_CPUBuffer->size() > 0) { if (m_CPUBuffer->size() > 0) {
if (m_onGPU && !m_readOnlyOnGPU) { if (m_onGPU && !m_readOnlyOnGPU) {
err = m_queue.enqueueReadBuffer( err = clEnqueueReadBuffer(m_cqCommandQue,
m_buffer, m_buffer,
CL_TRUE, CL_TRUE,
0, 0,size,
m_CPUBuffer->size() * sizeof(ElementType), &((*m_CPUBuffer)[0]),0,0,0);
&((*m_CPUBuffer)[0]));
if( err != CL_SUCCESS ) if( err != CL_SUCCESS )
{ {

View File

@@ -13,8 +13,8 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
@@ -25,7 +25,9 @@ class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
{ {
public: public:
bool m_onGPU; bool m_onGPU;
cl::CommandQueue m_queue;
cl_command_queue m_cqCommandQue;
btOpenCLBuffer<LinkNodePair> m_clLinks; btOpenCLBuffer<LinkNodePair> m_clLinks;
btOpenCLBuffer<float> m_clLinkStrength; btOpenCLBuffer<float> m_clLinkStrength;
@@ -36,6 +38,24 @@ public:
btOpenCLBuffer<float> m_clLinksRestLength; btOpenCLBuffer<float> m_clLinksRestLength;
btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient; btOpenCLBuffer<float> m_clLinksMaterialLinearStiffnessCoefficient;
struct BatchPair
{
int start;
int length;
BatchPair() :
start(0),
length(0)
{
}
BatchPair( int s, int l ) :
start( s ),
length( l )
{
}
};
/** /**
* Link addressing information for each cloth. * Link addressing information for each cloth.
* Allows link locations to be computed independently of data batching. * Allows link locations to be computed independently of data batching.
@@ -45,9 +65,9 @@ public:
/** /**
* Start and length values for computation batches over link data. * Start and length values for computation batches over link data.
*/ */
btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; btAlignedObjectArray< BatchPair > m_batchStartLengths;
btSoftBodyLinkDataOpenCL(cl::CommandQueue queue); btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyLinkDataOpenCL(); virtual ~btSoftBodyLinkDataOpenCL();

View File

@@ -14,8 +14,8 @@ subject to the following restrictions:
*/ */
#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
@@ -26,7 +26,7 @@ class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
{ {
public: public:
bool m_onGPU; bool m_onGPU;
cl::CommandQueue m_queue; cl_command_queue m_queue;
btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet> m_clVertexIndices; btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet> m_clVertexIndices;
btOpenCLBuffer<float> m_clArea; btOpenCLBuffer<float> m_clArea;
@@ -41,10 +41,20 @@ public:
/** /**
* Start and length values for computation batches over link data. * Start and length values for computation batches over link data.
*/ */
btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; struct btSomePair
{
btSomePair() {}
btSomePair(int f,int s)
:first(f),second(s)
{
}
int first;
int second;
};
btAlignedObjectArray< btSomePair > m_batchStartLengths;
public: public:
btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ); btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
virtual ~btSoftBodyTriangleDataOpenCL(); virtual ~btSoftBodyTriangleDataOpenCL();

View File

@@ -13,8 +13,8 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution. 3. This notice may not be removed or altered from any source distribution.
*/ */
#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h"
#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverBuffer_OpenCL.h"
#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H #define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
@@ -24,7 +24,7 @@ class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
{ {
protected: protected:
bool m_onGPU; bool m_onGPU;
cl::CommandQueue m_queue; cl_command_queue m_queue;
public: public:
btOpenCLBuffer<int> m_clClothIdentifier; btOpenCLBuffer<int> m_clClothIdentifier;
@@ -37,7 +37,7 @@ public:
btOpenCLBuffer<float> m_clVertexArea; btOpenCLBuffer<float> m_clVertexArea;
btOpenCLBuffer<int> m_clVertexTriangleCount; btOpenCLBuffer<int> m_clVertexTriangleCount;
public: public:
btSoftBodyVertexDataOpenCL( cl::CommandQueue queue); btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx);
virtual ~btSoftBodyVertexDataOpenCL(); virtual ~btSoftBodyVertexDataOpenCL();

View File

@@ -16,10 +16,18 @@ subject to the following restrictions:
#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
#include "vectormath/vmInclude.h" #include "vectormath/vmInclude.h"
#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolver_OpenCL.h" #include <stdio.h> //@todo: remove the debugging printf at some stage
#include "BulletSoftBody/VertexBuffers/btSoftBodySolverVertexBuffer.h" #include "btSoftBodySolver_OpenCL.h"
#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
#include "BulletSoftBody/btSoftBody.h" #include "BulletSoftBody/btSoftBody.h"
static const size_t workGroupSize = 128;
//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
#if (0)//CL_VERSION_1_1 == 1)
//OpenCL 1.1 kernels use float3
#define MSTRINGIFY(A) #A #define MSTRINGIFY(A) #A
static char* PrepareLinksCLString = static char* PrepareLinksCLString =
#include "OpenCLC/PrepareLinks.cl" #include "OpenCLC/PrepareLinks.cl"
@@ -41,19 +49,43 @@ static char* UpdateNormalsCLString =
#include "OpenCLC/UpdateNormals.cl" #include "OpenCLC/UpdateNormals.cl"
static char* VSolveLinksCLString = static char* VSolveLinksCLString =
#include "OpenCLC/VSolveLinks.cl" #include "OpenCLC/VSolveLinks.cl"
#else
////OpenCL 1.0 kernels don't use float3
#define MSTRINGIFY(A) #A
static char* PrepareLinksCLString =
#include "OpenCLC10/PrepareLinks.cl"
static char* UpdatePositionsFromVelocitiesCLString =
#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
static char* SolvePositionsCLString =
#include "OpenCLC10/SolvePositions.cl"
static char* UpdateNodesCLString =
#include "OpenCLC10/UpdateNodes.cl"
static char* UpdatePositionsCLString =
#include "OpenCLC10/UpdatePositions.cl"
static char* UpdateConstantsCLString =
#include "OpenCLC10/UpdateConstants.cl"
static char* IntegrateCLString =
#include "OpenCLC10/Integrate.cl"
static char* ApplyForcesCLString =
#include "OpenCLC10/ApplyForces.cl"
static char* UpdateNormalsCLString =
#include "OpenCLC10/UpdateNormals.cl"
static char* VSolveLinksCLString =
#include "OpenCLC10/VSolveLinks.cl"
#endif //CL_VERSION_1_1
btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl::CommandQueue queue) : btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) :
m_queue(queue), m_queue(queue),
m_clClothIdentifier( queue, &m_clothIdentifier, false ), m_clClothIdentifier( queue, ctx, &m_clothIdentifier, false ),
m_clVertexPosition( queue, &m_vertexPosition, false ), m_clVertexPosition( queue, ctx, &m_vertexPosition, false ),
m_clVertexPreviousPosition( queue, &m_vertexPreviousPosition, false ), m_clVertexPreviousPosition( queue, ctx, &m_vertexPreviousPosition, false ),
m_clVertexVelocity( queue, &m_vertexVelocity, false ), m_clVertexVelocity( queue, ctx, &m_vertexVelocity, false ),
m_clVertexForceAccumulator( queue, &m_vertexForceAccumulator, false ), m_clVertexForceAccumulator( queue, ctx, &m_vertexForceAccumulator, false ),
m_clVertexNormal( queue, &m_vertexNormal, false ), m_clVertexNormal( queue, ctx, &m_vertexNormal, false ),
m_clVertexInverseMass( queue, &m_vertexInverseMass, false ), m_clVertexInverseMass( queue, ctx, &m_vertexInverseMass, false ),
m_clVertexArea( queue, &m_vertexArea, false ), m_clVertexArea( queue, ctx, &m_vertexArea, false ),
m_clVertexTriangleCount( queue, &m_vertexTriangleCount, false ) m_clVertexTriangleCount( queue, ctx, &m_vertexTriangleCount, false )
{ {
} }
@@ -108,16 +140,16 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator()
btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl::CommandQueue queue) : btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx)
m_queue(queue), :m_cqCommandQue(queue),
m_clLinks( queue, &m_links, false ), m_clLinks( queue, ctx, &m_links, false ),
m_clLinkStrength( queue, &m_linkStrength, false ), m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
m_clLinksMassLSC( queue, &m_linksMassLSC, false ), m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
m_clLinksRestLengthSquared( queue, &m_linksRestLengthSquared, false ), m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
m_clLinksCLength( queue, &m_linksCLength, false ), m_clLinksCLength( queue, ctx, &m_linksCLength, false ),
m_clLinksLengthRatio( queue, &m_linksLengthRatio, false ), m_clLinksLengthRatio( queue, ctx, &m_linksLengthRatio, false ),
m_clLinksRestLength( queue, &m_linksRestLength, false ), m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
m_clLinksMaterialLinearStiffnessCoefficient( queue, &m_linksMaterialLinearStiffnessCoefficient, false ) m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
{ {
} }
@@ -272,13 +304,13 @@ void btSoftBodyLinkDataOpenCL::generateBatches()
if( m_batchStartLengths.size() > 0 ) if( m_batchStartLengths.size() > 0 )
{ {
m_batchStartLengths.resize(batchCounts.size()); m_batchStartLengths.resize(batchCounts.size());
m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); m_batchStartLengths[0] = BatchPair(0, 0);
int sum = 0; int sum = 0;
for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex ) for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
{ {
m_batchStartLengths[batchIndex].first = sum; m_batchStartLengths[batchIndex].start = sum;
m_batchStartLengths[batchIndex].second = batchCounts[batchIndex]; m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
sum += batchCounts[batchIndex]; sum += batchCounts[batchIndex];
} }
} }
@@ -313,7 +345,7 @@ void btSoftBodyLinkDataOpenCL::generateBatches()
// next element in that batch, incrementing the batch counter // next element in that batch, incrementing the batch counter
// afterwards // afterwards
int batch = batchValues[linkIndex]; int batch = batchValues[linkIndex];
int newLocation = m_batchStartLengths[batch].first + batchCounts[batch]; int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
batchCounts[batch] = batchCounts[batch] + 1; batchCounts[batch] = batchCounts[batch] + 1;
m_links[newLocation] = m_links_Backup[linkLocation]; m_links[newLocation] = m_links_Backup[linkLocation];
@@ -336,11 +368,11 @@ void btSoftBodyLinkDataOpenCL::generateBatches()
btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ) : btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl_command_queue queue , cl_context ctx) :
m_queue( queue ), m_queue( queue ),
m_clVertexIndices( queue, &m_vertexIndices, false ), m_clVertexIndices( queue, ctx, &m_vertexIndices, false ),
m_clArea( queue, &m_area, false ), m_clArea( queue, ctx, &m_area, false ),
m_clNormal( queue, &m_normal, false ) m_clNormal( queue, ctx, &m_normal, false )
{ {
} }
@@ -493,7 +525,7 @@ void btSoftBodyTriangleDataOpenCL::generateBatches()
m_batchStartLengths.resize(batchCounts.size()); m_batchStartLengths.resize(batchCounts.size());
m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); m_batchStartLengths[0] = btSomePair(0,0);
int sum = 0; int sum = 0;
@@ -547,18 +579,19 @@ void btSoftBodyTriangleDataOpenCL::generateBatches()
btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(const cl::CommandQueue &queue) : btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) :
m_linkData(queue), m_linkData(queue, ctx),
m_vertexData(queue), m_vertexData(queue, ctx),
m_triangleData(queue), m_triangleData(queue, ctx),
m_clPerClothAcceleration(queue, &m_perClothAcceleration, true ), m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
m_clPerClothWindVelocity(queue, &m_perClothWindVelocity, true ), m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
m_clPerClothDampingFactor(queue, &m_perClothDampingFactor, true ), m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
m_clPerClothVelocityCorrectionCoefficient(queue, &m_perClothVelocityCorrectionCoefficient, true ), m_clPerClothVelocityCorrectionCoefficient(queue, ctx,&m_perClothVelocityCorrectionCoefficient, true ),
m_clPerClothLiftFactor(queue, &m_perClothLiftFactor, true ), m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ),
m_clPerClothDragFactor(queue, &m_perClothDragFactor, true ), m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ),
m_clPerClothMediumDensity(queue, &m_perClothMediumDensity, true ), m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ),
m_queue( queue ) m_cqCommandQue( queue ),
m_cxMainContext(ctx)
{ {
// Initial we will clearly need to update solver constants // Initial we will clearly need to update solver constants
// For now this is global for the cloths linked with this solver - we should probably make this body specific // For now this is global for the cloths linked with this solver - we should probably make this body specific
@@ -590,7 +623,7 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof
using Vectormath::Aos::Point3; using Vectormath::Aos::Point3;
// Create SoftBody that will store the information within the solver // Create SoftBody that will store the information within the solver
btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); btOpenCLAcceleratedSoftBodyInterface *newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
m_softBodySet.push_back( newSoftBody ); m_softBodySet.push_back( newSoftBody );
m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
@@ -712,51 +745,58 @@ bool btOpenCLSoftBodySolver::checkInitialized()
void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices ) void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
{ {
resetNormalsAndAreasKernel.kernel.setArg(0, numVertices); cl_int ciErrNum;
resetNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexNormal.getBuffer()); ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 0, sizeof(numVertices), (void*)&numVertices); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
resetNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexArea.getBuffer()); ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS);
ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 );
int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); if( ciErrNum != CL_SUCCESS )
cl_int err = m_queue.enqueueNDRangeKernel(resetNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize));
if( err != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" ); btAssert( 0 && "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" );
} }
} }
void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
{ {
normalizeNormalsAndAreasKernel.kernel.setArg(0, numVertices);
normalizeNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexTriangleCount.getBuffer());
normalizeNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexNormal.getBuffer());
normalizeNormalsAndAreasKernel.kernel.setArg(3, m_vertexData.m_clVertexArea.getBuffer());
int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(normalizeNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize));
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices);
ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 1, sizeof(cl_mem), &m_vertexData.m_clVertexTriangleCount.m_buffer);
ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)");
} }
} }
void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
{ {
updateSoftBodiesKernel.kernel.setArg(0, firstTriangle);
updateSoftBodiesKernel.kernel.setArg(1, numTriangles);
updateSoftBodiesKernel.kernel.setArg(2, m_triangleData.m_clVertexIndices.getBuffer());
updateSoftBodiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer());
updateSoftBodiesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer());
updateSoftBodiesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer());
updateSoftBodiesKernel.kernel.setArg(6, m_triangleData.m_clNormal.getBuffer());
updateSoftBodiesKernel.kernel.setArg(7, m_triangleData.m_clArea.getBuffer());
cl_int ciErrNum;
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 1, sizeof(int), &numTriangles);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 2, sizeof(cl_mem), &m_triangleData.m_clVertexIndices.m_buffer);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 6, sizeof(cl_mem), &m_triangleData.m_clNormal.m_buffer);
ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 7, sizeof(cl_mem), &m_triangleData.m_clArea.m_buffer);
int numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize); size_t numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize);
cl_int err = m_queue.enqueueNDRangeKernel(updateSoftBodiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, updateSoftBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
if( err != CL_SUCCESS ) if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)");
} }
} }
void btOpenCLSoftBodySolver::updateSoftBodies() void btOpenCLSoftBodySolver::updateSoftBodies()
@@ -807,6 +847,7 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath
void btOpenCLSoftBodySolver::applyForces( float solverdt ) void btOpenCLSoftBodySolver::applyForces( float solverdt )
{ {
// Ensure data is on accelerator // Ensure data is on accelerator
m_vertexData.moveToAccelerator(); m_vertexData.moveToAccelerator();
m_clPerClothAcceleration.moveToGPU(); m_clPerClothAcceleration.moveToGPU();
@@ -815,85 +856,30 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
m_clPerClothMediumDensity.moveToGPU(); m_clPerClothMediumDensity.moveToGPU();
m_clPerClothWindVelocity.moveToGPU(); m_clPerClothWindVelocity.moveToGPU();
cl_int err; cl_int ciErrNum ;
err = applyForcesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); int numVerts = m_vertexData.getNumVertices();
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(applyForcesKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(applyForcesKernel, 1, sizeof(float), &solverdt);
float fl = FLT_EPSILON;
ciErrNum = clSetKernelArg(applyForcesKernel, 2, sizeof(float), &fl);
ciErrNum = clSetKernelArg(applyForcesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 7, sizeof(cl_mem), &m_clPerClothLiftFactor.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 8 ,sizeof(cl_mem), &m_clPerClothDragFactor.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel, 9, sizeof(cl_mem), &m_clPerClothWindVelocity.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel,10, sizeof(cl_mem), &m_clPerClothAcceleration.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
ciErrNum = clSetKernelArg(applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,applyForcesKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); btAssert( 0 && "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(1, solverdt);
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(2, FLT_EPSILON);
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(3, m_vertexData.m_clClothIdentifier.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(6, m_vertexData.m_clVertexInverseMass.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(7, m_clPerClothLiftFactor.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(8, m_clPerClothDragFactor.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(9, m_clPerClothWindVelocity.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(10, m_clPerClothAcceleration.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(11, m_clPerClothMediumDensity.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(12, m_vertexData.m_clVertexForceAccumulator.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
err = applyForcesKernel.kernel.setArg(13, m_vertexData.m_clVertexVelocity.getBuffer());
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
} }
int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
err = m_queue.enqueueNDRangeKernel(applyForcesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize));
if( err != CL_SUCCESS )
{
btAssert( "enqueueNDRangeKernel(applyForcesKernel)");
}
} }
/** /**
@@ -901,22 +887,26 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt )
*/ */
void btOpenCLSoftBodySolver::integrate( float solverdt ) void btOpenCLSoftBodySolver::integrate( float solverdt )
{ {
// Ensure data is on accelerator // Ensure data is on accelerator
m_vertexData.moveToAccelerator(); m_vertexData.moveToAccelerator();
integrateKernel.kernel.setArg(0, m_vertexData.getNumVertices()); cl_int ciErrNum;
integrateKernel.kernel.setArg(1, solverdt); int numVerts = m_vertexData.getNumVertices();
integrateKernel.kernel.setArg(2, m_vertexData.m_clVertexInverseMass.getBuffer()); ciErrNum = clSetKernelArg(integrateKernel, 0, sizeof(int), &numVerts);
integrateKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer()); ciErrNum = clSetKernelArg(integrateKernel, 1, sizeof(float), &solverdt);
integrateKernel.kernel.setArg(4, m_vertexData.m_clVertexVelocity.getBuffer()); ciErrNum = clSetKernelArg(integrateKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
integrateKernel.kernel.setArg(5, m_vertexData.m_clVertexPreviousPosition.getBuffer()); ciErrNum = clSetKernelArg(integrateKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
integrateKernel.kernel.setArg(6, m_vertexData.m_clVertexForceAccumulator.getBuffer()); ciErrNum = clSetKernelArg(integrateKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(integrateKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
cl_int err = m_queue.enqueueNDRangeKernel(integrateKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,integrateKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
if( err != CL_SUCCESS ) if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(integrateKernel)"); btAssert( 0 && "enqueueNDRangeKernel(integrateKernel)");
} }
} }
@@ -935,6 +925,7 @@ float btOpenCLSoftBodySolver::computeTriangleArea(
void btOpenCLSoftBodySolver::updateConstants( float timeStep ) void btOpenCLSoftBodySolver::updateConstants( float timeStep )
{ {
using namespace Vectormath::Aos; using namespace Vectormath::Aos;
if( m_updateSolverConstants ) if( m_updateSolverConstants )
@@ -959,10 +950,12 @@ void btOpenCLSoftBodySolver::updateConstants( float timeStep )
m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
} }
} }
} }
void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
{ {
using Vectormath::Aos::Vector3; using Vectormath::Aos::Vector3;
using Vectormath::Aos::Point3; using Vectormath::Aos::Point3;
using Vectormath::Aos::lengthSqr; using Vectormath::Aos::lengthSqr;
@@ -988,33 +981,34 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
// Prepare anchors for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration )
/*for(i=0,ni=m_anchors.size();i<ni;++i)
{ {
Anchor& a=m_anchors[i]; for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
const btVector3 ra=a.m_body->getWorldTransform().getBasis()*a.m_local; {
a.m_c0 = ImpulseMatrix( m_sst.sdt, int startLink = m_linkData.m_batchStartLengths[i].start;
a.m_node->m_im, int numLinks = m_linkData.m_batchStartLengths[i].length;
a.m_body->getInvMass(),
a.m_body->getInvInertiaTensorWorld(),
ra);
a.m_c1 = ra;
a.m_c2 = m_sst.sdt*a.m_node->m_im;
a.m_body->activate();
}*/
// Really want to combine these into a single loop, don't we? No update in the middle? solveLinksForVelocity( startLink, numLinks, kst );
}
// TODO: Double check what kst is meant to mean - passed in as 1 in the bullet code }
// Compute new positions from velocity
// Also update the previous position so that our position computation is now based on the new position from the velocity solution
// rather than based directly on the original positions
if( m_numberOfVelocityIterations > 0 )
{
updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt );
} else {
updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
}
// Solve drift // Solve drift
for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
{ {
for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
{ {
int startLink = m_linkData.m_batchStartLengths[i].first; int startLink = m_linkData.m_batchStartLengths[i].start;
int numLinks = m_linkData.m_batchStartLengths[i].second; int numLinks = m_linkData.m_batchStartLengths[i].length;
solveLinksForPosition( startLink, numLinks, kst, ti ); solveLinksForPosition( startLink, numLinks, kst, ti );
} }
@@ -1023,6 +1017,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
} }
@@ -1030,96 +1025,136 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
// Kernel dispatches // Kernel dispatches
void btOpenCLSoftBodySolver::prepareLinks() void btOpenCLSoftBodySolver::prepareLinks()
{ {
prepareLinksKernel.kernel.setArg(0, m_linkData.getNumLinks());
prepareLinksKernel.kernel.setArg(1, m_linkData.m_clLinks.getBuffer());
prepareLinksKernel.kernel.setArg(2, m_linkData.m_clLinksMassLSC.getBuffer());
prepareLinksKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer());
prepareLinksKernel.kernel.setArg(4, m_linkData.m_clLinksLengthRatio.getBuffer());
prepareLinksKernel.kernel.setArg(5, m_linkData.m_clLinksCLength.getBuffer());
int numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(prepareLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); int numLinks = m_linkData.getNumLinks();
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(prepareLinksKernel,0, sizeof(int), &numLinks);
ciErrNum = clSetKernelArg(prepareLinksKernel,1, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
ciErrNum = clSetKernelArg(prepareLinksKernel,2, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
ciErrNum = clSetKernelArg(prepareLinksKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(prepareLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
ciErrNum = clSetKernelArg(prepareLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
size_t numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,prepareLinksKernel, 1 , NULL, &numWorkItems, &workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(prepareLinksKernel)"); btAssert( 0 && "enqueueNDRangeKernel(prepareLinksKernel)");
} }
} }
void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
{ {
updatePositionsFromVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices());
updatePositionsFromVelocitiesKernel.kernel.setArg(1, solverdt);
updatePositionsFromVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexVelocity.getBuffer());
updatePositionsFromVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer());
updatePositionsFromVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clVertexPosition.getBuffer());
int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); int numVerts = m_vertexData.getNumVertices();
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,1, sizeof(float), &solverdt);
ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,4, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updatePositionsFromVelocitiesKernel, 1, NULL, &numWorkItems,&workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)"); btAssert( 0 && "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)");
} }
} }
void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti ) void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
{ {
solvePositionsFromLinksKernel.kernel.setArg(0, startLink);
solvePositionsFromLinksKernel.kernel.setArg(1, numLinks);
solvePositionsFromLinksKernel.kernel.setArg(2, kst);
solvePositionsFromLinksKernel.kernel.setArg(3, ti);
solvePositionsFromLinksKernel.kernel.setArg(4, m_linkData.m_clLinks.getBuffer());
solvePositionsFromLinksKernel.kernel.setArg(5, m_linkData.m_clLinksMassLSC.getBuffer());
solvePositionsFromLinksKernel.kernel.setArg(6, m_linkData.m_clLinksRestLengthSquared.getBuffer());
solvePositionsFromLinksKernel.kernel.setArg(7, m_vertexData.m_clVertexInverseMass.getBuffer());
solvePositionsFromLinksKernel.kernel.setArg(8, m_vertexData.m_clVertexPosition.getBuffer());
int numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(solvePositionsFromLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,0, sizeof(int), &startLink);
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,1, sizeof(int), &numLinks);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,2, sizeof(float), &kst);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,3, sizeof(float), &ti);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
if( ciErrNum!= CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(solvePositionsFromLinksKernel)"); btAssert( 0 && "enqueueNDRangeKernel(solvePositionsFromLinksKernel)");
} }
} // solveLinksForPosition } // solveLinksForPosition
void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
{
cl_int ciErrNum;
ciErrNum = clSetKernelArg(vSolveLinksKernel, 0, sizeof(int), &startLink);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 1, sizeof(int), &numLinks);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 2, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 3, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 4, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
ciErrNum = clSetKernelArg(vSolveLinksKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,vSolveLinksKernel,1,NULL,&numWorkItems, &workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{
btAssert( 0 && "enqueueNDRangeKernel(vSolveLinksKernel)");
}
}
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
{ {
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(1, isolverdt);
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(5, m_clPerClothVelocityCorrectionCoefficient.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(6, m_clPerClothDampingFactor.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexVelocity.getBuffer());
updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(8, m_vertexData.m_clVertexForceAccumulator.getBuffer());
int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); int numVerts = m_vertexData.getNumVertices();
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 1, sizeof(float), &isolverdt);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 5, sizeof(cl_mem), &m_clPerClothVelocityCorrectionCoefficient.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 6, sizeof(cl_mem), &m_clPerClothDampingFactor.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 8, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)"); btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)");
} }
} // updateVelocitiesFromPositionsWithVelocities } // updateVelocitiesFromPositionsWithVelocities
void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
{ {
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(1, isolverdt);
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(5, m_clPerClothDampingFactor.getBuffer());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(6, m_vertexData.m_clVertexVelocity.getBuffer());
updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexForceAccumulator.getBuffer());
int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); cl_int ciErrNum;
cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); int numVerts = m_vertexData.getNumVertices();
if( err != CL_SUCCESS ) ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, sizeof(float), &isolverdt);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 4, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 6, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 7, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
if( ciErrNum != CL_SUCCESS )
{ {
btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)"); btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
} }
} // updateVelocitiesFromPositionsWithoutVelocities } // updateVelocitiesFromPositionsWithoutVelocities
// End kernel dispatches // End kernel dispatches
@@ -1133,15 +1168,20 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
// and use them together on a single kernel call if possible by setting up a // and use them together on a single kernel call if possible by setting up a
// per-cloth target buffer array for the copy kernel. // per-cloth target buffer array for the copy kernel.
btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody );
btOpenCLAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody );
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
{ {
const int firstVertex = currentCloth->getFirstVertex();
const int lastVertex = firstVertex + currentCloth->getNumVertices();
const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);
float *basePointer = cpuVertexBuffer->getBasePointer(); float *basePointer = cpuVertexBuffer->getBasePointer();
m_vertexData.m_clVertexPosition.copyFromGPU();
m_vertexData.m_clVertexNormal.copyFromGPU();
if( vertexBuffer->hasVertexPositions() ) if( vertexBuffer->hasVertexPositions() )
{ {
const int vertexOffset = cpuVertexBuffer->getVertexOffset(); const int vertexOffset = cpuVertexBuffer->getVertexOffset();
@@ -1173,43 +1213,46 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons
} }
} }
} }
} // btCPUSoftBodySolver::outputToVertexBuffers } // btCPUSoftBodySolver::outputToVertexBuffers
btOpenCLSoftBodySolver::KernelDesc btOpenCLSoftBodySolver::compileCLKernelFromString( const char *shaderString, const char *shaderName ) cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelSource, const char* kernelName )
{ {
cl_int err; printf("compiling kernalName: %s ",kernelName);
cl_kernel kernel;
cl_int ciErrNum;
size_t program_length = strlen(kernelSource);
context = m_queue.getInfo<CL_QUEUE_CONTEXT>(); cl_program m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&kernelSource, &program_length, &ciErrNum);
device = m_queue.getInfo<CL_QUEUE_DEVICE>(); // oclCHECKERROR(ciErrNum, CL_SUCCESS);
std::vector< cl::Device > devices;
devices.push_back( device ); // Build the program with 'mad' Optimization option
#ifdef MAC
char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
#else
const char* flags = "-DGUID_ARG=";
#endif
ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, flags, NULL, NULL);
if (ciErrNum != CL_SUCCESS)
{
printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
btAssert(0);
exit(0);
}
// Create the kernel
kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum);
if (ciErrNum != CL_SUCCESS)
{
printf("Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
btAssert(0);
exit(0);
}
cl::Program::Sources source(1, std::make_pair(shaderString, strlen(shaderString) + 1)); printf("ready. \n");
cl::Program program(context, source, &err); return kernel;
if( err != CL_SUCCESS )
{
btAssert( "program" );
}
err = program.build(devices);
if (err != CL_SUCCESS) {
//std::string str;
//str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
//std::cout << "Program Info: " << str;
if( err != CL_SUCCESS )
{
btAssert( "Program::build()" );
}
}
cl::Kernel kernel(program, shaderName, &err);
if( err != CL_SUCCESS )
{
btAssert( "kernel" );
}
KernelDesc descriptor;
descriptor.kernel = kernel;
return descriptor;
} }
void btOpenCLSoftBodySolver::predictMotion( float timeStep ) void btOpenCLSoftBodySolver::predictMotion( float timeStep )
@@ -1234,11 +1277,11 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep )
btOpenCLSoftBodySolver::btAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) btOpenCLAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
{ {
for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
{ {
btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
if( softBodyInterface->getSoftBody() == softBody ) if( softBodyInterface->getSoftBody() == softBody )
return softBodyInterface; return softBodyInterface;
} }
@@ -1273,4 +1316,4 @@ bool btOpenCLSoftBodySolver::buildShaders()
m_shadersInitialized = true; m_shadersInitialized = true;
return returnVal; return returnVal;
} }

View File

@@ -16,204 +16,165 @@ subject to the following restrictions:
#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
#define BT_SOFT_BODY_SOLVER_OPENCL_H #define BT_SOFT_BODY_SOLVER_OPENCL_H
#include "stddef.h" //for size_t
#include "vectormath/vmInclude.h" #include "vectormath/vmInclude.h"
#include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btSoftBodySolvers.h"
#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" #include "btSoftBodySolverBuffer_OpenCL.h"
#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h" #include "btSoftBodySolverLinkData_OpenCL.h"
#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h" #include "btSoftBodySolverVertexData_OpenCL.h"
#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h" #include "btSoftBodySolverTriangleData_OpenCL.h"
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btOpenCLAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
};
class btOpenCLSoftBodySolver : public btSoftBodySolver class btOpenCLSoftBodySolver : public btSoftBodySolver
{ {
private: private:
/**
* SoftBody class to maintain information about a soft body instance
* within a solver.
* This data addresses the main solver arrays.
*/
class btAcceleratedSoftBodyInterface
{
protected:
/** Current number of vertices that are part of this cloth */
int m_numVertices;
/** Maximum number of vertices allocated to be part of this cloth */
int m_maxVertices;
/** Current number of triangles that are part of this cloth */
int m_numTriangles;
/** Maximum number of triangles allocated to be part of this cloth */
int m_maxTriangles;
/** Index of first vertex in the world allocated to this cloth */
int m_firstVertex;
/** Index of first triangle in the world allocated to this cloth */
int m_firstTriangle;
/** Index of first link in the world allocated to this cloth */
int m_firstLink;
/** Maximum number of links allocated to this cloth */
int m_maxLinks;
/** Current number of links allocated to this cloth */
int m_numLinks;
/** The actual soft body this data represents */
btSoftBody *m_softBody;
public:
btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
m_softBody( softBody )
{
m_numVertices = 0;
m_maxVertices = 0;
m_numTriangles = 0;
m_maxTriangles = 0;
m_firstVertex = 0;
m_firstTriangle = 0;
m_firstLink = 0;
m_maxLinks = 0;
m_numLinks = 0;
}
int getNumVertices()
{
return m_numVertices;
}
int getNumTriangles()
{
return m_numTriangles;
}
int getMaxVertices()
{
return m_maxVertices;
}
int getMaxTriangles()
{
return m_maxTriangles;
}
int getFirstVertex()
{
return m_firstVertex;
}
int getFirstTriangle()
{
return m_firstTriangle;
}
// TODO: All of these set functions will have to do checks and
// update the world because restructuring of the arrays will be necessary
// Reasonable use of "friend"?
void setNumVertices( int numVertices )
{
m_numVertices = numVertices;
}
void setNumTriangles( int numTriangles )
{
m_numTriangles = numTriangles;
}
void setMaxVertices( int maxVertices )
{
m_maxVertices = maxVertices;
}
void setMaxTriangles( int maxTriangles )
{
m_maxTriangles = maxTriangles;
}
void setFirstVertex( int firstVertex )
{
m_firstVertex = firstVertex;
}
void setFirstTriangle( int firstTriangle )
{
m_firstTriangle = firstTriangle;
}
void setMaxLinks( int maxLinks )
{
m_maxLinks = maxLinks;
}
void setNumLinks( int numLinks )
{
m_numLinks = numLinks;
}
void setFirstLink( int firstLink )
{
m_firstLink = firstLink;
}
int getMaxLinks()
{
return m_maxLinks;
}
int getNumLinks()
{
return m_numLinks;
}
int getFirstLink()
{
return m_firstLink;
}
btSoftBody* getSoftBody()
{
return m_softBody;
}
#if 0
void setAcceleration( Vectormath::Aos::Vector3 acceleration )
{
m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration );
}
void setWindVelocity( Vectormath::Aos::Vector3 windVelocity )
{
m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity );
}
/**
* Set the density of the air in which the cloth is situated.
*/
void setAirDensity( btScalar density )
{
m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast<float>(density) );
}
/**
* Add a collision object to this soft body.
*/
void addCollisionObject( btCollisionObject *collisionObject )
{
m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject );
}
#endif
};
class KernelDesc
{
protected:
public:
cl::Kernel kernel;
KernelDesc()
{
}
virtual ~KernelDesc()
{
}
};
btSoftBodyLinkDataOpenCL m_linkData; btSoftBodyLinkDataOpenCL m_linkData;
btSoftBodyVertexDataOpenCL m_vertexData; btSoftBodyVertexDataOpenCL m_vertexData;
@@ -228,7 +189,7 @@ private:
* Cloths owned by this solver. * Cloths owned by this solver.
* Only our cloths are in this array. * Only our cloths are in this array.
*/ */
btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
/** Acceleration value to be applied to all non-static vertices in the solver. /** Acceleration value to be applied to all non-static vertices in the solver.
* Index n is cloth n, array sized by number of cloths in the world not the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver.
@@ -262,37 +223,34 @@ private:
btAlignedObjectArray< float > m_perClothMediumDensity; btAlignedObjectArray< float > m_perClothMediumDensity;
btOpenCLBuffer<float> m_clPerClothMediumDensity; btOpenCLBuffer<float> m_clPerClothMediumDensity;
KernelDesc prepareLinksKernel; cl_kernel prepareLinksKernel;
KernelDesc solvePositionsFromLinksKernel; cl_kernel solvePositionsFromLinksKernel;
KernelDesc updateConstantsKernel; cl_kernel updateConstantsKernel;
KernelDesc integrateKernel; cl_kernel integrateKernel;
KernelDesc addVelocityKernel; cl_kernel addVelocityKernel;
KernelDesc updatePositionsFromVelocitiesKernel; cl_kernel updatePositionsFromVelocitiesKernel;
KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel; cl_kernel updateVelocitiesFromPositionsWithoutVelocitiesKernel;
KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel; cl_kernel updateVelocitiesFromPositionsWithVelocitiesKernel;
KernelDesc vSolveLinksKernel; cl_kernel vSolveLinksKernel;
KernelDesc resetNormalsAndAreasKernel; cl_kernel resetNormalsAndAreasKernel;
KernelDesc normalizeNormalsAndAreasKernel; cl_kernel normalizeNormalsAndAreasKernel;
KernelDesc updateSoftBodiesKernel; cl_kernel updateSoftBodiesKernel;
KernelDesc outputToVertexArrayWithNormalsKernel; cl_kernel outputToVertexArrayWithNormalsKernel;
KernelDesc outputToVertexArrayWithoutNormalsKernel; cl_kernel outputToVertexArrayWithoutNormalsKernel;
KernelDesc outputToVertexArrayKernel; cl_kernel outputToVertexArrayKernel;
KernelDesc applyForcesKernel; cl_kernel applyForcesKernel;
KernelDesc collideSphereKernel; cl_kernel collideSphereKernel;
KernelDesc collideCylinderKernel; cl_kernel collideCylinderKernel;
static const int workGroupSize = 128; cl_command_queue m_cqCommandQue;
cl_context m_cxMainContext;
cl::CommandQueue m_queue;
cl::Context context;
cl::Device device;
/** /**
* Compile a compute shader kernel from a string and return the appropriate KernelDesc object. * Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
*/ */
KernelDesc compileCLKernelFromString( const char *shaderString, const char *shaderName ); cl_kernel compileCLKernelFromString( const char *shaderString, const char *shaderName );
bool buildShaders(); bool buildShaders();
@@ -306,7 +264,7 @@ private:
void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
virtual void applyForces( float solverdt ); virtual void applyForces( float solverdt );
@@ -342,7 +300,7 @@ private:
public: public:
btOpenCLSoftBodySolver(const cl::CommandQueue &queue); btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx);
virtual ~btOpenCLSoftBodySolver(); virtual ~btOpenCLSoftBodySolver();
@@ -371,4 +329,4 @@ public:
virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer ); virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer );
}; // btOpenCLSoftBodySolver }; // btOpenCLSoftBodySolver
#endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H #endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H

View File

@@ -23,7 +23,6 @@ class btSoftBodyTriangleData;
class btSoftBodyLinkData; class btSoftBodyLinkData;
class btSoftBodyVertexData; class btSoftBodyVertexData;
class btVertexBufferDescriptor; class btVertexBufferDescriptor;
class btAcceleratedSoftBodyInterface;
class btCollisionObject; class btCollisionObject;
class btSoftBody; class btSoftBody;

View File

@@ -30,6 +30,7 @@ subject to the following restrictions:
//#define DEBUG_MINICL_KERNELS 1 //#define DEBUG_MINICL_KERNELS 1
static char* spPlatformID = "MiniCL, SCEA"; static char* spPlatformID = "MiniCL, SCEA";
static char* spDriverVersion= "1.0";
CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs( CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(
cl_uint num_entries, cl_uint num_entries,
@@ -91,23 +92,24 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
cl_device_info param_name , cl_device_info param_name ,
size_t param_value_size , size_t param_value_size ,
void * param_value , void * param_value ,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
{ {
switch (param_name) switch (param_name)
{ {
case CL_DEVICE_NAME: case CL_DEVICE_NAME:
{ {
char deviceName[] = "CPU"; char deviceName[] = "MiniCL CPU";
unsigned int nameLen = strlen(deviceName)+1; unsigned int nameLen = strlen(deviceName)+1;
btAssert(param_value_size>strlen(deviceName)); btAssert(param_value_size>strlen(deviceName));
if (nameLen < param_value_size) if (nameLen < param_value_size)
{ {
const char* cpuName = "CPU"; const char* cpuName = "MiniCL CPU";
sprintf((char*)param_value,"%s",cpuName); sprintf((char*)param_value,"%s",cpuName);
} else } else
{ {
printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
return CL_INVALID_VALUE;
} }
break; break;
} }
@@ -120,6 +122,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
} else } else
{ {
printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
return CL_INVALID_VALUE;
} }
break; break;
} }
@@ -132,6 +135,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
} else } else
{ {
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
return CL_INVALID_VALUE;
} }
break; break;
@@ -149,6 +153,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
} else } else
{ {
printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
return CL_INVALID_VALUE;
} }
break; break;
} }
@@ -158,6 +163,142 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
*clock_frequency = 3*1024; *clock_frequency = 3*1024;
break; break;
} }
case CL_DEVICE_VENDOR :
{
if(param_value_size < (strlen(spPlatformID) + 1))
{
return CL_INVALID_VALUE;
}
strcpy((char*)param_value, spPlatformID);
if(param_value_size_ret != NULL)
{
*param_value_size_ret = strlen(spPlatformID) + 1;
}
break;
}
case CL_DRIVER_VERSION:
{
if(param_value_size < (strlen(spDriverVersion) + 1))
{
return CL_INVALID_VALUE;
}
strcpy((char*)param_value, spDriverVersion);
if(param_value_size_ret != NULL)
{
*param_value_size_ret = strlen(spDriverVersion) + 1;
}
break;
}
case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
{
cl_uint* maxDimensions = (cl_uint*)param_value;
*maxDimensions = 1;
break;
}
case CL_DEVICE_MAX_WORK_GROUP_SIZE:
{
cl_uint* maxWorkGroupSize = (cl_uint*)param_value;
*maxWorkGroupSize = 128;//1;
break;
}
case CL_DEVICE_ADDRESS_BITS:
{
cl_uint* addressBits = (cl_uint*)param_value;
*addressBits= 32; //@todo: should this be 64 for 64bit builds?
break;
}
case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
{
cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
*maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ?
break;
}
case CL_DEVICE_GLOBAL_MEM_SIZE:
{
cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
*maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ?
break;
}
case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
{
cl_bool* error_correction_support = (cl_bool*)param_value;
*error_correction_support = CL_FALSE;
break;
}
case CL_DEVICE_LOCAL_MEM_TYPE:
{
cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value;
*local_mem_type = CL_GLOBAL;
break;
}
case CL_DEVICE_LOCAL_MEM_SIZE:
{
cl_ulong* localmem = (cl_ulong*) param_value;
*localmem = 32*1024;
break;
}
case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
{
cl_ulong* localmem = (cl_ulong*) param_value;
*localmem = 64*1024;
break;
}
case CL_DEVICE_QUEUE_PROPERTIES:
{
cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value;
memset(queueProp,0,param_value_size);
break;
}
case CL_DEVICE_IMAGE_SUPPORT:
{
cl_bool* imageSupport = (cl_bool*) param_value;
*imageSupport = CL_FALSE;
break;
}
case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
case CL_DEVICE_MAX_READ_IMAGE_ARGS:
{
cl_uint* imageArgs = (cl_uint*) param_value;
*imageArgs = 0;
break;
}
case CL_DEVICE_IMAGE3D_MAX_DEPTH:
case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
case CL_DEVICE_IMAGE3D_MAX_WIDTH:
case CL_DEVICE_IMAGE2D_MAX_WIDTH:
{
size_t* maxSize = (size_t*) param_value;
*maxSize = 0;
break;
}
case CL_DEVICE_EXTENSIONS:
{
char* extensions = (char*) param_value;
*extensions = 0;
break;
}
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
{
cl_uint* width = (cl_uint*) param_value;
*width = 1;
break;
}
default: default:
{ {
printf("error: unsupported param_name:%d\n",param_name); printf("error: unsupported param_name:%d\n",param_name);
@@ -486,7 +627,7 @@ extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* co
} }
CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */,
cl_device_type /* device_type */, cl_device_type device_type ,
void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
void * /* user_data */, void * /* user_data */,
cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
@@ -502,14 +643,18 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie
"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7"
}; };
#ifdef DEBUG_MINICL_KERNELS btThreadSupportInterface* threadSupport = 0;
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); if (device_type==CL_DEVICE_TYPE_DEBUG)
#else {
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
threadSupport = new SequentialThreadSupport(stc);
} else
{
#if _WIN32 #if _WIN32
btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports); btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
// "MiniCL", // "MiniCL",
sUniqueThreadSupportName[sUniqueThreadSupportIndex++], sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
processMiniCLTask, //processCollisionTask, processMiniCLTask, //processCollisionTask,
@@ -518,10 +663,10 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie
#else #else
///todo: add posix thread support for other platforms ///todo: add posix thread support for other platforms
SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); threadSupport = new SequentialThreadSupport(stc);
#endif #endif
#endif //DEBUG_MINICL_KERNELS }
MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);

View File

@@ -155,8 +155,10 @@ typedef struct _cl_image_format {
#define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#define CL_DEVICE_TYPE_DEBUG (1 << 4)
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
// cl_device_info // cl_device_info
#define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001 #define CL_DEVICE_VENDOR_ID 0x1001

View File

@@ -140,6 +140,8 @@ static float4 operator+(const float4& a,const float4& b)
return tmp; return tmp;
} }
static float4 operator-(const float4& a,const float4& b) static float4 operator-(const float4& a,const float4& b)
{ {
float4 tmp; float4 tmp;
@@ -159,6 +161,17 @@ static float4 operator*(float a,const float4& b)
return tmp; return tmp;
} }
static float4 operator/(const float4& b,float a)
{
float4 tmp;
tmp.x = b.x/a;
tmp.y = b.y/a;
tmp.z = b.z/a;
tmp.w = b.w/a;
return tmp;
}
static float dot(const float4&a ,const float4& b) static float dot(const float4&a ,const float4& b)
{ {
@@ -170,6 +183,22 @@ static float dot(const float4&a ,const float4& b)
return tmp.x+tmp.y+tmp.z+tmp.w; return tmp.x+tmp.y+tmp.z+tmp.w;
} }
static float length(const float4&a)
{
float l = sqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
return l;
}
static float4 normalize(const float4&a)
{
float4 tmp;
float l = length(a);
tmp = 1.f/l*a;
return tmp;
}
static float4 cross(const float4&a ,const float4& b) static float4 cross(const float4&a ,const float4& b)
{ {
float4 tmp; float4 tmp;