diff --git a/Demos/CMakeLists.txt b/Demos/CMakeLists.txt index 7d70282ca..35d90a508 100644 --- a/Demos/CMakeLists.txt +++ b/Demos/CMakeLists.txt @@ -14,7 +14,7 @@ IF(BUILD_CPU_DEMOS) CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo DoublePrecisionDemo ConcaveDemo CollisionDemo ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo - MultiMaterialDemo SerializeDemo InternalEdgeDemo + MultiMaterialDemo SerializeDemo InternalEdgeDemo ) ELSE() SET(SharedDemoSubdirs @@ -28,6 +28,7 @@ ENDIF() MultiThreadedDemo VectorAdd_OpenCL ParticlesOpenCL + OpenCLClothDemo ) ELSE (USE_GLUT) diff --git a/Demos/DX11ClothDemo/btDirectComputeSupport.h b/Demos/DX11ClothDemo/btDirectComputeSupport.h index 52843484a..69360865a 100644 --- a/Demos/DX11ClothDemo/btDirectComputeSupport.h +++ b/Demos/DX11ClothDemo/btDirectComputeSupport.h @@ -1,6 +1,6 @@ /* Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ +Copyright (c) 2010 Advanced Micro Devices This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. @@ -13,6 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ + + #ifndef BT_DIRECT_COMPUTE_SUPPORT_HPP #define BT_DIRECT_COMPUTE_SUPPORT_HPP diff --git a/Demos/DX11ClothDemo/cap.h b/Demos/DX11ClothDemo/cap.h index 38cfae21b..e2d3d8e81 100644 --- a/Demos/DX11ClothDemo/cap.h +++ b/Demos/DX11ClothDemo/cap.h @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + class cap { diff --git a/Demos/DX11ClothDemo/cloth.h b/Demos/DX11ClothDemo/cloth.h index fd1983811..c130548c1 100644 --- a/Demos/DX11ClothDemo/cloth.h +++ b/Demos/DX11ClothDemo/cloth.h @@ -1,4 +1,22 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include +#include +#include class piece_of_cloth { @@ -171,7 +189,8 @@ public: pd3dImmediateContext->PSSetShaderResources(0,1,&texture2D_view); - pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart ); + //pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart ); + pd3dImmediateContext->DrawIndexed( ((height-1)*(width-1)*3*2), 0, ( UINT )pSubset->VertexStart ); } SAFE_RELEASE(pd3dImmediateContext); @@ -246,7 +265,7 @@ public: //unsigned int indices[] = {0,1,2, 1,3,2}; - unsigned int* indices = new unsigned int[width*3*2+2 + height*width*3*2]; + unsigned int* indices = new unsigned int[(height-1)*(width-1)*3*2]; for(int y = 0; y < height-1; y++) { @@ -265,7 +284,8 @@ public: } } - bufferDesc.ByteWidth = sizeof(unsigned int)*(width*3*2+2 + height*width*3*2); + + bufferDesc.ByteWidth = sizeof(unsigned int)*((height-1)*(width-1)*3*2); bufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; InitData.pSysMem = indices; diff --git a/Demos/DX11ClothDemo/cloth_renderer.cpp b/Demos/DX11ClothDemo/cloth_renderer.cpp index 9d65257cb..a5c95e51c 100644 --- a/Demos/DX11ClothDemo/cloth_renderer.cpp +++ b/Demos/DX11ClothDemo/cloth_renderer.cpp @@ -32,18 +32,15 @@ class btDX11SIMDAwareSoftBodySolver; #include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btDefaultSoftBodySolver.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" -//#include "BulletSoftBody/Solvers/CPU/btAcceleratedSoftBody_CPUVertexSolver.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h" -//#include "BulletSoftBody/Solvers/DX11/btAcceleratedSoftBody_DX11SIMDAwareSolver.h" -//#include "BulletSoftBody/btAcceleratedSoftBody_DXVertexBuffers.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h" #include "BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h" -//#define USE_SIMDAWARE_SOLVER -#define USE_GPU_SOLVER -//#define USE_VERTEX_SOLVER +#define USE_SIMDAWARE_SOLVER +//#define USE_GPU_SOLVER #define USE_GPU_COPY -const int numFlags = 2; +const int numFlags = 5; const int clothWidth = 40; const int clothHeight = 60;//60; float _windAngle = 1.0;//0.4; @@ -206,6 +203,7 @@ btSoftRigidDynamicsWorld* m_dynamicsWorld; btDefaultSoftBodySolver *g_defaultSolver = NULL; btCPUSoftBodySolver *g_cpuSolver = NULL; btDX11SoftBodySolver *g_dx11Solver = NULL; +btDX11SIMDAwareSoftBodySolver *g_dx11SIMDSolver = NULL; btSoftBodySolver *g_solver = NULL; @@ -454,12 +452,17 @@ void initBullet(void) #ifdef USE_GPU_SOLVER g_dx11Solver = new btDX11SoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() ); g_solver = g_dx11Solver; +#else +#ifdef USE_SIMDAWARE_SOLVER + g_dx11SIMDSolver = new btDX11SIMDAwareSoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() ); + g_solver = g_dx11SIMDSolver; #else g_cpuSolver = new btCPUSoftBodySolver; g_solver = g_cpuSolver; //g_defaultSolver = new btDefaultSoftBodySolver; //g_solver = g_defaultSolver; #endif +#endif @@ -1260,6 +1263,9 @@ void CALLBACK OnD3D11DestroyDevice( void* pUserContext ) delete g_cpuSolver; if( g_dx11Solver ) delete g_dx11Solver; + if( g_dx11SIMDSolver ) + delete g_dx11SIMDSolver; + for(int i=0; i< m_collisionShapes.size(); i++) delete m_collisionShapes[i]; diff --git a/Demos/DX11ClothDemo/cylinder.h b/Demos/DX11ClothDemo/cylinder.h index a9c6edb7e..517fcdf51 100644 --- a/Demos/DX11ClothDemo/cylinder.h +++ b/Demos/DX11ClothDemo/cylinder.h @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + class cylinder { diff --git a/Demos/OpenCLClothDemo/AMD/CMakeLists.txt b/Demos/OpenCLClothDemo/AMD/CMakeLists.txt new file mode 100644 index 000000000..4140d59f4 --- /dev/null +++ b/Demos/OpenCLClothDemo/AMD/CMakeLists.txt @@ -0,0 +1,102 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + + +IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ENDIF() + + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_AMD + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_ATISTREAMSDK_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_AMD + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_AMD pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/Apple/CMakeLists.txt b/Demos/OpenCLClothDemo/Apple/CMakeLists.txt new file mode 100644 index 000000000..e89513c18 --- /dev/null +++ b/Demos/OpenCLClothDemo/Apple/CMakeLists.txt @@ -0,0 +1,60 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + + +IF (APPLE) + FIND_LIBRARY(OPENCL_LIBRARY OpenCL DOC "OpenCL lib for OSX") + FIND_PATH(OPENCL_INCLUDE_DIR OpenCL/cl.h DOC "Include for OpenCL on OSX") +ENDIF (APPLE) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_Apple + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${OPENCL_LIBRARY} + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_Apple + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Apple pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/CLClothDemo.sln b/Demos/OpenCLClothDemo/CLClothDemo.sln new file mode 100644 index 000000000..48af26cde --- /dev/null +++ b/Demos/OpenCLClothDemo/CLClothDemo.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLClothDemo", "CLClothDemo.vcproj", "{A61906AF-B5DE-454E-99F6-B653C250D221}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.ActiveCfg = Debug|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.Build.0 = Debug|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.ActiveCfg = Release|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Demos/OpenCLClothDemo/CLClothDemo.vcproj b/Demos/OpenCLClothDemo/CLClothDemo.vcproj new file mode 100644 index 000000000..1023daf69 --- /dev/null +++ b/Demos/OpenCLClothDemo/CLClothDemo.vcproj @@ -0,0 +1,233 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Demos/OpenCLClothDemo/CMakeLists.txt b/Demos/OpenCLClothDemo/CMakeLists.txt new file mode 100644 index 000000000..1f378a3e1 --- /dev/null +++ b/Demos/OpenCLClothDemo/CMakeLists.txt @@ -0,0 +1,15 @@ +IF(BUILD_MINICL_OPENCL_DEMOS) + SUBDIRS( MiniCL ) +ENDIF() + +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() + +IF(BUILD_NVIDIA_OPENCL_DEMOS) + SUBDIRS(NVidia) +ENDIF() + +IF(APPLE) + SUBDIRS(Apple) +ENDIF() diff --git a/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt new file mode 100644 index 000000000..e6e216763 --- /dev/null +++ b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt @@ -0,0 +1,86 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +ADD_DEFINITIONS(-DUSE_MINICL) + +IF (WIN32) + IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) + ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) + ENDIF(CMAKE_CL_64) +ENDIF() + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_Mini + BulletSoftBodySolvers_CPU + MiniCL + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_Mini + ../cl_cloth_demo.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + ${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Mini pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt b/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt new file mode 100644 index 000000000..d41b8f377 --- /dev/null +++ b/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt @@ -0,0 +1,102 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + + + + +IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/x64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/Win32 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/x64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/Win32 ) + ENDIF(CMAKE_CL_64) +ENDIF() + + + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_NVidia + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_NVSDKCOMPUTE_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_NVidia + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_NVidia pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/amdFlag.bmp b/Demos/OpenCLClothDemo/amdFlag.bmp new file mode 100644 index 000000000..dd1d394ec Binary files /dev/null and b/Demos/OpenCLClothDemo/amdFlag.bmp differ diff --git a/Demos/OpenCLClothDemo/atiFlag.bmp b/Demos/OpenCLClothDemo/atiFlag.bmp new file mode 100644 index 000000000..2be4847dd Binary files /dev/null and b/Demos/OpenCLClothDemo/atiFlag.bmp differ diff --git a/Demos/OpenCLClothDemo/bmpLoader.cpp b/Demos/OpenCLClothDemo/bmpLoader.cpp new file mode 100644 index 000000000..75e89f433 --- /dev/null +++ b/Demos/OpenCLClothDemo/bmpLoader.cpp @@ -0,0 +1,325 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "bmpLoader.h" + +#include +#include +#include + +namespace amd +{ + +static const short bitMapID = 19778; + +void +BitMap::releaseResources(void) +{ + if (pixels_ != NULL) { + delete[] pixels_; + } + + if (colors_ != NULL) { + delete[] colors_; + } + + pixels_ = NULL; + colors_ = NULL; + isLoaded_ = false; +} + +BitMap& BitMap::operator=(const BitMap& rhs) +{ + if (this == &rhs) { + return *this; + } + + // Copy header + id = rhs.id; + size = rhs.size; + reserved1 = rhs.reserved1; + reserved2 = rhs.reserved2; + offset = rhs.offset; + + // Copy header info + sizeInfo = rhs.sizeInfo; + width = rhs.width; + height = rhs.height; + planes = rhs.planes; + bitsPerPixel = rhs.bitsPerPixel; + compression = rhs.compression; + imageSize = rhs.imageSize; + xPelsPerMeter = rhs.xPelsPerMeter; + yPelsPerMeter = rhs.yPelsPerMeter; + clrUsed = rhs.clrUsed; + clrImportant = rhs.clrImportant; + + numColors_ = rhs.numColors_; + isLoaded_ = rhs.isLoaded_; + + pixels_ = NULL; + colors_ = NULL; + if (isLoaded_) { + if (rhs.colors_ != NULL) { + colors_ = new ColorPalette[numColors_]; + if (colors_ == NULL) { + isLoaded_ = false; + return *this; + } + memcpy(colors_, rhs.colors_, numColors_ * sizeof(ColorPalette)); + } + + pixels_ = new uchar4[width * height]; + if (pixels_ == NULL) { + delete[] colors_; + colors_ = NULL; + isLoaded_ = false; + return *this; + } + memcpy(pixels_, rhs.pixels_, width * height * sizeof(uchar4)); + } + + return *this; +} + +void +BitMap::load(const char * filename) +{ + // Release any existing resources + releaseResources(); + + // Open BMP file + FILE * fd = fopen(filename, "rb"); + + // Opened OK + if (fd != NULL) { + // Read header + fread((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd); + + // Failed to read header + if (ferror(fd)) { + fclose(fd); + return; + } + + // Confirm that we have a bitmap file + if (id != bitMapID) { + fclose(fd); + return; + } + + // Read map info header + fread((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd); + + // Failed to read map info header + if (ferror(fd)) { + fclose(fd); + return; + } + + // No support for compressed images + if (compression) { + fclose(fd); + return; + } + + // Support only 8 or 24 bits images + if (bitsPerPixel < 8) { + fclose(fd); + return; + } + + // Store number of colors + numColors_ = 1 << bitsPerPixel; + + //load the palate for 8 bits per pixel + if(bitsPerPixel == 8) { + colors_ = new ColorPalette[numColors_]; + if (colors_ == NULL) { + fclose(fd); + return; + } + fread( + (char *)colors_, + numColors_ * sizeof(ColorPalette), + 1, + fd); + + // Failed to read colors + if (ferror(fd)) { + fclose(fd); + return; + } + } + + // Allocate buffer to hold all pixels + unsigned int sizeBuffer = size - offset; + unsigned char * tmpPixels = new unsigned char[sizeBuffer]; + + if (tmpPixels == NULL) { + delete colors_; + colors_ = NULL; + fclose(fd); + return; + } + + // Read pixels from file, including any padding + fread(tmpPixels, sizeBuffer * sizeof(unsigned char), 1, fd); + + // Failed to read pixel data + if (ferror(fd)) { + delete colors_; + colors_ = NULL; + delete tmpPixels; + fclose(fd); + return; + } + + // Allocate image + pixels_ = new uchar4[width * height]; + if (pixels_ == NULL) { + delete colors_; + colors_ = NULL; + delete tmpPixels; + fclose(fd); + return; + } + // Set image, including w component (white) + memset(pixels_, 0xff, width * height * sizeof(uchar4)); + + unsigned int index = 0; + for(int y = 0; y < height; y++) { + for(int x = 0; x < width; x++) { + // Read RGB values + if (bitsPerPixel == 8) { + pixels_[(y * width + x)] = colors_[tmpPixels[index++]]; + } + else { // 24 bit + pixels_[(y * width + x)].z = tmpPixels[index++]; + pixels_[(y * width + x)].y = tmpPixels[index++]; + pixels_[(y * width + x)].x = tmpPixels[index++]; + } + } + + // Handle padding + for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) { + index++; + } + } + + // Loaded file so we can close the file. + fclose(fd); + delete[] tmpPixels; + + // Loaded file so record this fact + isLoaded_ = true; + } +} + +int +BitMap::colorIndex(uchar4 color) +{ + for (int i = 0; i < numColors_; i++) { + if (colors_[i].x == color.x && + colors_[i].y == color.y && + colors_[i].z == color.z && + colors_[i].w == color.w) { + return i; + } + } + + return 0; +} + +bool +BitMap::write(const char * filename) +{ + if (!isLoaded_) { + return false; + } + + // Open BMP file + FILE * fd = fopen(filename, "wb"); + + // Opened OK + if (fd != NULL) { + // Write header + fwrite((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd); + + // Failed to write header + if (ferror(fd)) { + fclose(fd); + return false; + } + + // Write map info header + fwrite((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd); + + // Failed to write map info header + if (ferror(fd)) { + fclose(fd); + return false; + } + + // Write palate for 8 bits per pixel + if(bitsPerPixel == 8) { + fwrite( + (char *)colors_, + numColors_ * sizeof(ColorPalette), + 1, + fd); + + // Failed to write colors + if (ferror(fd)) { + fclose(fd); + return false; + } + } + + for(int y = 0; y < height; y++) { + for(int x = 0; x < width; x++) { + // Read RGB values + if (bitsPerPixel == 8) { + fputc( + colorIndex( + pixels_[(y * width + x)]), + fd); + } + else { // 24 bit + fputc(pixels_[(y * width + x)].z, fd); + fputc(pixels_[(y * width + x)].y, fd); + fputc(pixels_[(y * width + x)].x, fd); + + if (ferror(fd)) { + fclose(fd); + return false; + } + } + } + + // Add padding + for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) { + fputc(0, fd); + } + } + + return true; + } + + return false; +} + +} // amd diff --git a/Demos/OpenCLClothDemo/bmpLoader.h b/Demos/OpenCLClothDemo/bmpLoader.h new file mode 100644 index 000000000..301ad0d12 --- /dev/null +++ b/Demos/OpenCLClothDemo/bmpLoader.h @@ -0,0 +1,201 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef BMPLOADER_H_ +#define BMPLOADER_H_ + +#include +#include + +namespace amd +{ + +//! @fixme this needs to be moved to common types header? +#pragma pack(1) +typedef struct +{ + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; +} uchar4; + +typedef uchar4 ColorPalette; + +//! \struct Bitmap header info +typedef struct { + short id; + int size; + short reserved1; + short reserved2; + int offset; +} BitMapHeader; + +//! \struct Bitmap info header +typedef struct { + int sizeInfo; + int width; + int height; + short planes; + short bitsPerPixel; + unsigned compression; + unsigned imageSize; + int xPelsPerMeter; + int yPelsPerMeter; + int clrUsed; + int clrImportant; +} BitMapInfoHeader; + +//! \class Bitmap used to load a bitmap image from a file. +class BitMap : public BitMapHeader, public BitMapInfoHeader +{ +private: + uchar4 * pixels_; + + int numColors_; + + ColorPalette * colors_; + + bool isLoaded_; + + void releaseResources(void); + + int colorIndex(uchar4 color); +public: + + //! \brief Default constructor + BitMap() + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + {} + + /*!\brief Constructor + * + * Tries to load bitmap image from filename provided. + * + * \param filename pointer to null terminated string that is the path and + * filename to the bitmap image to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + BitMap(const char * filename) + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + { + load(filename); + } + + /*! \brief Copy constructor + * + * \param rhs is the bitmap to be copied (cloned). + */ + BitMap(const BitMap& rhs) + { + *this = rhs; + } + + //! \brief Destructor + ~BitMap() + { + releaseResources(); + } + + /*! \brief Assignment + * \param rhs is the bitmap to be assigned (cloned). + */ + BitMap& operator=(const BitMap& rhs); + + /*! \brief Load Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + void + load(const char * filename); + + /*! \brief Write Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be written. + * + * \return In the case that the bitmap is written true is returned. In + * the case that a bitmap image is not already loaded or the write fails + * for some reason false is returned. + */ + bool + write(const char * filename); + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then the width + * image is returned, otherwise -1; + */ + int + getWidth(void) const + { + if (isLoaded_) { + return width; + } + else { + return -1; + } + } + + /*! \brief Get image height + * + * \return If a bitmap image has been successfully loaded, then the height + * image is returned, otherwise -1. + */ + int + getHeight(void) const + { + if (isLoaded_) { + return height; + } + else { + return -1; + } + } + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then returns + * a pointer to image's pixels, otherwise NULL. + */ + const uchar4 * + getPixels(void) const { return pixels_; } + + /*! \brief Is an image currently loaded + * + * \return If a bitmap image has been successfully loaded, then returns + * true, otherwise if an image could not be loaded or an image has yet + * to be loaded false is returned. + */ + bool + isLoaded(void) const { return isLoaded_; } +}; +#pragma pack() +} + +#endif // BMPLOADER_H_ diff --git a/Demos/OpenCLClothDemo/bmpLoader.hpp b/Demos/OpenCLClothDemo/bmpLoader.hpp new file mode 100644 index 000000000..2daae0a47 --- /dev/null +++ b/Demos/OpenCLClothDemo/bmpLoader.hpp @@ -0,0 +1,189 @@ +// +// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. +// + +#ifndef BMPLOADER_H_ +#define BMPLOADER_H_ + +#include +#include + +namespace amd +{ + +//! @fixme this needs to be moved to common types header? +#pragma pack(1) +typedef struct +{ + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; +} uchar4; + +typedef uchar4 ColorPalette; + +//! \struct Bitmap header info +typedef struct { + short id; + int size; + short reserved1; + short reserved2; + int offset; +} BitMapHeader; + +//! \struct Bitmap info header +typedef struct { + int sizeInfo; + int width; + int height; + short planes; + short bitsPerPixel; + unsigned compression; + unsigned imageSize; + int xPelsPerMeter; + int yPelsPerMeter; + int clrUsed; + int clrImportant; +} BitMapInfoHeader; + +//! \class Bitmap used to load a bitmap image from a file. +class BitMap : public BitMapHeader, public BitMapInfoHeader +{ +private: + uchar4 * pixels_; + + int numColors_; + + ColorPalette * colors_; + + bool isLoaded_; + + void releaseResources(void); + + int colorIndex(uchar4 color); +public: + + //! \brief Default constructor + BitMap() + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + {} + + /*!\brief Constructor + * + * Tries to load bitmap image from filename provided. + * + * \param filename pointer to null terminated string that is the path and + * filename to the bitmap image to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + BitMap(const char * filename) + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + { + load(filename); + } + + /*! \brief Copy constructor + * + * \param rhs is the bitmap to be copied (cloned). + */ + BitMap(const BitMap& rhs) + { + *this = rhs; + } + + //! \brief Destructor + ~BitMap() + { + releaseResources(); + } + + /*! \brief Assignment + * \param rhs is the bitmap to be assigned (cloned). + */ + BitMap& operator=(const BitMap& rhs); + + /*! \brief Load Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + void + load(const char * filename); + + /*! \brief Write Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be written. + * + * \return In the case that the bitmap is written true is returned. In + * the case that a bitmap image is not already loaded or the write fails + * for some reason false is returned. + */ + bool + write(const char * filename); + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then the width + * image is returned, otherwise -1; + */ + int + getWidth(void) const + { + if (isLoaded_) { + return width; + } + else { + return -1; + } + } + + /*! \brief Get image height + * + * \return If a bitmap image has been successfully loaded, then the height + * image is returned, otherwise -1. + */ + int + getHeight(void) const + { + if (isLoaded_) { + return height; + } + else { + return -1; + } + } + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then returns + * a pointer to image's pixels, otherwise NULL. + */ + const uchar4 * + getPixels(void) const { return pixels_; } + + /*! \brief Is an image currently loaded + * + * \return If a bitmap image has been successfully loaded, then returns + * true, otherwise if an image could not be loaded or an image has yet + * to be loaded false is returned. + */ + bool + isLoaded(void) const { return isLoaded_; } +}; +#pragma pack() +} + +#endif // BMPLOADER_H_ diff --git a/Demos/OpenCLClothDemo/btOpenCLSupport.h b/Demos/OpenCLClothDemo/btOpenCLSupport.h new file mode 100644 index 000000000..5b03e14c5 --- /dev/null +++ b/Demos/OpenCLClothDemo/btOpenCLSupport.h @@ -0,0 +1,84 @@ +#ifndef BT_OPENCL_SUPPORT_HPP +#define BT_OPENCL_SUPPORT_HPP + +// OpenCL support +#include + +namespace BTAcceleratedSoftBody +{ + class OpenCLSupportHelper + { + private: + cl::Context m_context; + std::vector m_devices; + cl::CommandQueue m_queue; + public: + OpenCLSupportHelper() + { + } + + virtual ~OpenCLSupportHelper() + { + } + + cl::Device getDevice() + { + return m_devices[0]; + } + + cl::CommandQueue getCommandQueue() + { + return m_queue; + } + + cl::Context getContext() + { + return m_context; + } + + bool InitOpenCLDevice() + { + cl_int err; + + std::vector platforms; + err = cl::Platform::get(&platforms); + checkErr(platforms.size() != 0 ? CL_SUCCESS : -1, "Platform::get()"); + + std::string platformVendor; + platforms[0].getInfo(CL_PLATFORM_VENDOR, &platformVendor); + //std::cout << "Platform is by: " << platformVendor << "\n"; + + intptr_t properties[] = { + CL_CONTEXT_PLATFORM, (intptr_t)platforms[0](), + 0, 0 + }; + m_context = cl::Context( + CL_DEVICE_TYPE_GPU, + properties, + NULL, + NULL, + &err); + + if (err != CL_SUCCESS) + { + btAssert( "Context::Context()" ); + } + + m_devices = m_context.getInfo(); + if( m_devices.size() <= 0 ) + { + btAssert( "devices.size() > 0" ); + } + + m_queue = cl::CommandQueue(m_context, m_devices[0], 0, &err); + if (err != CL_SUCCESS) + { + btAssert( "CommandQueue::CommandQueue()"); + } + } + }; + + +} // namespace BTAcceleratedSoftBody + +#endif // #ifndef BT_OPENCL_SUPPORT_HPP \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp new file mode 100644 index 000000000..b7e22c714 --- /dev/null +++ b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp @@ -0,0 +1,470 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifdef _WIN32 +#include +#endif + +#include "clstuff.h" +#include "gl_win.h" +#include "cloth.h" + +#define USE_GPU_SOLVER + + +const int numFlags = 5; +const int clothWidth = 40; +const int clothHeight = 60;//60; +float _windAngle = 1.0;//0.4; +float _windStrength = 15; + + + +#include +using namespace std; + + + + +#include "btBulletDynamicsCommon.h" +#include "LinearMath/btHashMap.h" +#include "BulletSoftBody/btSoftRigidDynamicsWorld.h" +#include "vectormath/vmInclude.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h" + +using Vectormath::Aos::Vector3; + +class piece_of_cloth; +class btBroadphaseInterface; +class btCollisionShape; +class btOverlappingPairCache; +class btCollisionDispatcher; +class btConstraintSolver; +struct btCollisionAlgorithmCreateFunc; +class btDefaultCollisionConfiguration; + +namespace Vectormath +{ + namespace Aos + { + class Transform3; + } +} + + +btAlignedObjectArray m_collisionShapes; +btBroadphaseInterface* m_broadphase; +btCollisionDispatcher* m_dispatcher; +btConstraintSolver* m_solver; +btDefaultCollisionConfiguration* m_collisionConfiguration; + +btCPUSoftBodySolver *g_cpuSolver = NULL; +btOpenCLSoftBodySolver *g_openCLSolver = NULL; + +btSoftBodySolver *g_solver = NULL; + +btAlignedObjectArray m_flags; +btSoftRigidDynamicsWorld* m_dynamicsWorld; +btAlignedObjectArray cloths; + +extern cl_context g_cxMainContext; +extern cl_device_id g_cdDevice; +extern cl_command_queue g_cqCommandQue; + + +const float flagSpacing = 30.f; + + +// Helper to test and add links correctly. +// Records links that have already been generated +static bool testAndAddLink( btAlignedObjectArray &trianglesForLinks, btSoftBody *softBody, int triangle, int *triangleVertexIndexArray, int numVertices, int vertex0, int vertex1, int nonLinkVertex, btSoftBody::Material *structuralMaterial, bool createBendLinks, btSoftBody::Material *bendMaterial ) +{ + if( trianglesForLinks[ numVertices * vertex0 + vertex1 ] >= 0 && createBendLinks) + { + // Already have link so find other triangle and generate cross link + + int otherTriangle = trianglesForLinks[numVertices * vertex0 + vertex1]; + int otherIndices[3] = {triangleVertexIndexArray[otherTriangle * 3], triangleVertexIndexArray[otherTriangle * 3 + 1], triangleVertexIndexArray[otherTriangle * 3 + 2]}; + + int nodeA; + // Test all links of the other triangle against this link. The one that's not part of it is what we want. + if( otherIndices[0] != vertex0 && otherIndices[0] != vertex1 ) + nodeA = otherIndices[0]; + if( otherIndices[1] != vertex0 && otherIndices[1] != vertex1 ) + nodeA = otherIndices[1]; + if( otherIndices[2] != vertex0 && otherIndices[2] != vertex1 ) + nodeA = otherIndices[2]; + + softBody->appendLink( nodeA, nonLinkVertex, bendMaterial ); + } else { + // Don't yet have link so create it + softBody->appendLink( vertex0, vertex1, structuralMaterial ); + + // If we added a new link, set the triangle array + trianglesForLinks[numVertices * vertex0 + vertex1] = triangle; + trianglesForLinks[numVertices * vertex1 + vertex0] = triangle; + + } + + return true; +} + +btSoftBody *createFromIndexedMesh( btVector3 *vertexArray, int numVertices, int *triangleVertexIndexArray, int numTriangles, bool createBendLinks ) +{ + btSoftBody* softBody = new btSoftBody(&(m_dynamicsWorld->getWorldInfo()), numVertices, vertexArray, 0); + btSoftBody::Material * structuralMaterial = softBody->appendMaterial(); + btSoftBody::Material * bendMaterial; + if( createBendLinks ) + { + bendMaterial = softBody->appendMaterial(); + bendMaterial->m_kLST = 0.7; + } else { + bendMaterial = NULL; + } + structuralMaterial->m_kLST = 1.0; + + + // List of values for each link saying which triangle is associated with that link + // -1 to start. Once a value is entered we know the "other" triangle + // and can add a link across the link + btAlignedObjectArray triangleForLinks; + triangleForLinks.resize( numVertices * numVertices, -1 ); + int numLinks = 0; + for( int triangle = 0; triangle < numTriangles; ++triangle ) + { + int index[3] = {triangleVertexIndexArray[triangle * 3], triangleVertexIndexArray[triangle * 3 + 1], triangleVertexIndexArray[triangle * 3 + 2]}; + softBody->appendFace( index[0], index[1], index[2] ); + + // Generate the structural links directly from the triangles + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[0], index[1], index[2], structuralMaterial, createBendLinks, bendMaterial ); + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[1], index[2], index[0], structuralMaterial, createBendLinks, bendMaterial ); + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[2], index[0], index[1], structuralMaterial, createBendLinks, bendMaterial); + } + + return softBody; +} + +/** + * Create a sequence of flag objects and add them to the world. + */ +void createFlag( btSoftBodySolver &solver, int width, int height, btAlignedObjectArray &flags ) +{ + // First create a triangle mesh to represent a flag + + using Vectormath::Aos::Matrix3; + using Vectormath::Aos::Vector3; + + // Allocate a simple mesh consisting of a vertex array and a triangle index array + btIndexedMesh mesh; + mesh.m_numVertices = width*height; + mesh.m_numTriangles = 2*(width-1)*(height-1); + + btVector3 *vertexArray = new btVector3[mesh.m_numVertices]; + + mesh.m_vertexBase = reinterpret_cast(vertexArray); + int *triangleVertexIndexArray = new int[3*mesh.m_numTriangles]; + mesh.m_triangleIndexBase = reinterpret_cast(triangleVertexIndexArray); + mesh.m_triangleIndexStride = sizeof(int)*3; + mesh.m_vertexStride = sizeof(Vector3); + + // Generate normalised object space vertex coordinates for a rectangular flag + float zCoordinate = 0.0f; + + Matrix3 defaultScale(Vector3(5.f, 0.f, 0.f), Vector3(0.f, 20.f, 0.f), Vector3(0.f, 0.f, 1.f)); + for( int y = 0; y < height; ++y ) + { + float yCoordinate = y*2.0f/float(height) - 1.0f; + for( int x = 0; x < width; ++x ) + { + float xCoordinate = x*2.0f/float(width) - 1.0f; + + Vector3 vertex(xCoordinate, yCoordinate, zCoordinate); + Vector3 transformedVertex = defaultScale*vertex; + + vertexArray[y*width + x] = btVector3(transformedVertex.getX(), transformedVertex.getY(), transformedVertex.getZ() ); + + } + } + + // Generate vertex indices for triangles + for( int y = 0; y < (height-1); ++y ) + { + for( int x = 0; x < (width-1); ++x ) + { + // Triangle 0 + // Top left of square on mesh + { + int vertex0 = y*width + x; + int vertex1 = vertex0 + 1; + int vertex2 = vertex0 + width; + int triangleIndex = 2*y*(width-1) + 2*x; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+1)/sizeof(int)+1] = vertex1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+2)/sizeof(int)+2] = vertex2; + } + + // Triangle 1 + // Bottom right of square on mesh + { + int vertex0 = y*width + x + 1; + int vertex1 = vertex0 + width; + int vertex2 = vertex1 - 1; + int triangleIndex = 2*y*(width-1) + 2*x + 1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+1] = vertex1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+2] = vertex2; + } + } + } + + + float rotateAngleRoundZ = 0.5; + float rotateAngleRoundX = 0.5; + btMatrix3x3 defaultRotate; + defaultRotate[0] = btVector3(cos(rotateAngleRoundZ), sin(rotateAngleRoundZ), 0.f); + defaultRotate[1] = btVector3(-sin(rotateAngleRoundZ), cos(rotateAngleRoundZ), 0.f); + defaultRotate[2] = btVector3(0.f, 0.f, 1.f); + btMatrix3x3 defaultRotateX; + defaultRotateX[0] = btVector3(1.f, 0.f, 0.f); + defaultRotateX[1] = btVector3( 0.f, cos(rotateAngleRoundX), sin(rotateAngleRoundX)); + defaultRotateX[2] = btVector3(0.f, -sin(rotateAngleRoundX), cos(rotateAngleRoundX)); + + btMatrix3x3 defaultRotateAndScale( (defaultRotateX*defaultRotate) ); + + + // Construct the sequence flags applying a slightly different translation to each one to arrange them + // appropriately in the scene. + for( int i = 0; i < numFlags; ++i ) + { + float zTranslate = flagSpacing * (i-numFlags/2); + + btVector3 defaultTranslate(0.f, 20.f, zTranslate); + + btTransform transform( defaultRotateAndScale, defaultTranslate ); + + + btSoftBody *softBody = createFromIndexedMesh( vertexArray, mesh.m_numVertices, triangleVertexIndexArray, mesh.m_numTriangles, true ); + + + for( int i = 0; i < mesh.m_numVertices; ++i ) + { + softBody->setMass(i, 10.f/mesh.m_numVertices); + } + softBody->setMass((height-1)*(width), 0.f); + softBody->setMass((height-1)*(width) + width - 1, 0.f); + softBody->setMass((height-1)*width + width/2, 0.f); + softBody->m_cfg.collisions = btSoftBody::fCollision::CL_SS+btSoftBody::fCollision::CL_RS; + + + flags.push_back( softBody ); + + softBody->transform( transform ); + + m_dynamicsWorld->addSoftBody( softBody ); + } + + delete [] vertexArray; + delete [] triangleVertexIndexArray; +} + + +void updatePhysicsWorld() +{ + static int counter = 0; + + // Change wind velocity a bit based on a frame counter + if( (counter % 400) == 0 ) + { + _windAngle = (_windAngle + 0.05f); + if( _windAngle > (2*3.141) ) + _windAngle = 0; + + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { + btSoftBody *cloth = 0; + + cloth = m_flags[flagIndex]; + + float localWind = _windAngle + 0.5*(((float(rand())/RAND_MAX))-0.1); + float xCoordinate = cos(localWind)*_windStrength; + float zCoordinate = sin(localWind)*_windStrength; + + cloth->setWindVelocity( btVector3(xCoordinate, 0, zCoordinate) ); + } + } + + //btVector3 origin( capCollider->getWorldTransform().getOrigin() ); + //origin.setX( origin.getX() + 0.05 ); + //capCollider->getWorldTransform().setOrigin( origin ); + + counter++; +} + +void initBullet(void) +{ + +#ifdef USE_GPU_SOLVER + g_openCLSolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext); + g_solver = g_openCLSolver; +#else + g_cpuSolver = new btCPUSoftBodySolver; + g_solver = g_cpuSolver; +#endif + + m_collisionConfiguration = new btDefaultCollisionConfiguration(); + m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration); + m_broadphase = new btDbvtBroadphase(); + btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver; + m_solver = sol; + + m_dynamicsWorld = new btSoftRigidDynamicsWorld(m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration, g_solver); + + m_dynamicsWorld->setGravity(btVector3(0,-10,0)); + btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.))); + m_collisionShapes.push_back(groundShape); + btTransform groundTransform; + groundTransform.setIdentity(); + groundTransform.setOrigin(btVector3(0,-50,0)); + + + + + + + m_dynamicsWorld->getWorldInfo().air_density = (btScalar)1.2; + m_dynamicsWorld->getWorldInfo().water_density = 0; + m_dynamicsWorld->getWorldInfo().water_offset = 0; + m_dynamicsWorld->getWorldInfo().water_normal = btVector3(0,0,0); + m_dynamicsWorld->getWorldInfo().m_gravity.setValue(0,-10,0); + + + +#if 0 + { + btScalar mass(0.); + + //rigidbody is dynamic if and only if mass is non zero, otherwise static + bool isDynamic = (mass != 0.f); + + btVector3 localInertia(0,0,0); + if (isDynamic) + groundShape->calculateLocalInertia(mass,localInertia); + + //using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects + btDefaultMotionState* myMotionState = new btDefaultMotionState(groundTransform); + btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,groundShape,localInertia); + btRigidBody* body = new btRigidBody(rbInfo); + + //add the body to the dynamics world + m_dynamicsWorld->addRigidBody(body); + } + +#endif + +#ifdef USE_GPU_SOLVER + createFlag( *g_openCLSolver, clothWidth, clothHeight, m_flags ); +#else + createFlag( *g_cpuSolver, clothWidth, clothHeight, m_flags ); +#endif + + // Create output buffer descriptions for ecah flag + // These describe where the simulation should send output data to + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { +// m_flags[flagIndex]->setWindVelocity( Vectormath::Aos::Vector3( 0.f, 0.f, 15.f ) ); + + // In this case we have a DX11 output buffer with a vertex at index 0, 8, 16 and so on as well as a normal at 3, 11, 19 etc. + // Copies will be performed GPU-side directly into the output buffer + + btCPUVertexBufferDescriptor *vertexBufferDescriptor = new btCPUVertexBufferDescriptor(reinterpret_cast< float* >(cloths[flagIndex].cpu_buffer), 0, 8, 3, 8); + cloths[flagIndex].m_vertexBufferDescriptor = vertexBufferDescriptor; + } + + + g_solver->optimize( m_dynamicsWorld->getSoftBodyArray() ); + +} + + + + +btClock m_clock; + +void doFlags() +{ + //float ms = getDeltaTimeMicroseconds(); + btScalar dt = (btScalar)m_clock.getTimeMicroseconds(); + m_clock.reset(); + + ///step the simulation + if( m_dynamicsWorld ) + { + m_dynamicsWorld->stepSimulation(dt/1000000.); + static int frameCount = 0; + frameCount++; + if (frameCount==100) + { + m_dynamicsWorld->stepSimulation(1./60.,0); + CProfileManager::dumpAll(); + } + updatePhysicsWorld(); + } + + + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { + g_solver->copySoftBodyToVertexBuffer( m_flags[flagIndex], cloths[flagIndex].m_vertexBufferDescriptor ); + cloths[flagIndex].draw(); + } +} + + +int main(int argc, char *argv[]) +{ + + + initCL(); + + cloths.resize(numFlags); + + for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex ) + { + cloths[flagIndex].create_buffers(clothWidth, clothHeight); + } + + initBullet(); + m_dynamicsWorld->stepSimulation(1./60.,0); + + preInitGL(argc, argv); + + std::string flagTexs[] = { + "atiFlag.bmp", + "amdFlag.bmp", + }; + int numFlagTexs = 2; + + for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex ) + { + cloths[flagIndex].create_texture(flagTexs[flagIndex % numFlagTexs]); + cloths[flagIndex].x_offset = 0; + cloths[flagIndex].y_offset = 0; + cloths[flagIndex].z_offset = 0; + } + + goGL(); + return 0; +} + diff --git a/Demos/OpenCLClothDemo/cloth.h b/Demos/OpenCLClothDemo/cloth.h new file mode 100644 index 000000000..cce971b22 --- /dev/null +++ b/Demos/OpenCLClothDemo/cloth.h @@ -0,0 +1,183 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "gl_win.h" //for OpenGL stuff + +#include "bmpLoader.h" +#include +#include "LinearMath/btScalar.h" + + +struct vertex_struct +{ + float pos[3]; + float normal[3]; + float texcoord[2]; + +}; + +class btVertexBufferDescriptor; + +class piece_of_cloth +{ + public: + + void destroy(void) + { + if(created) + { + if(cpu_buffer) delete [] cpu_buffer; + } + } + + piece_of_cloth() + { + created = false; + cpu_buffer = NULL; + m_vertexBufferDescriptor = NULL; + } + + bool created; + + vertex_struct* cpu_buffer; + unsigned int* indices; + btVertexBufferDescriptor *m_vertexBufferDescriptor; + + double x_offset, y_offset, z_offset; + + int width; + int height; + + GLuint texture; + + void draw(void) + { + glEnable(GL_TEXTURE_2D); + glBindTexture (GL_TEXTURE_2D, texture); + + glEnable(GL_DEPTH_TEST); + + glColor3f(0.0f, 1.0f, 1.0f); + + glEnableClientState(GL_VERTEX_ARRAY); + //glEnableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glBindTexture(GL_TEXTURE_2D, texture); + + glVertexPointer( 3, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].pos[0])) ); + //glNormalPointer( 3, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].normal[0])) ); + glTexCoordPointer( 2, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].texcoord[0])) ); + + glDrawElements(GL_TRIANGLES, (height-1 )*(width-1)*3*2, GL_UNSIGNED_INT, indices); +// glDisableClientState(GL_NORMAL_ARRAY); + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + + glBindTexture(GL_TEXTURE_2D, 0); + } + + void create_texture(std::string filename) + { + amd::BitMap texBMP(filename.c_str()); + if ( texBMP.isLoaded() ) { + glEnable(GL_TEXTURE_2D); + glGenTextures(1, &texture); + + glBindTexture(GL_TEXTURE_2D, texture); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL); + + glTexImage2D( + GL_TEXTURE_2D, + 0, + GL_RGBA8, + texBMP.getWidth(), + texBMP.getHeight(), + 0, + GL_RGBA, + GL_UNSIGNED_BYTE, + texBMP.getPixels()); + + glBindTexture(GL_TEXTURE_2D, 0); + } + else { + std::cout << "ERROR: could not load bitmap " << "texture.bmp" << std::endl; + exit(1); + } + } + + void create_buffers(int width_, int height_) + { + width = width_; + height = height_; + + created = true; + + cpu_buffer = new vertex_struct[width*height]; + memset(cpu_buffer, 0, width*height*sizeof(vertex_struct)); + + + // Initial test data for rendering + for(int y = 0; y < height; y++) + { + for(int x = 0; x < width; x++) + { + double coord = btSin(x/5.0)*0.01; + //coord = sin(y/); + + cpu_buffer[y*width+x].pos[0] = (x/((float)(width-1)))*1; + cpu_buffer[y*width+x].pos[1] = coord; + cpu_buffer[y*width+x].pos[2] = (y/((float)(height-1)))*1; + cpu_buffer[y*width+x].normal[0] = 1; + cpu_buffer[y*width+x].normal[1] = 0; + cpu_buffer[y*width+x].normal[2] = 0; + cpu_buffer[y*width+x].texcoord[0] = x/((float)(width-1)); + cpu_buffer[y*width+x].texcoord[1] = y/((float)(height-1)); + } + } + + + // Generate and fill index array for rendering + indices = new unsigned int[width*3*2+2 + height*width*3*2]; + + for(int y = 0; y < height-1; y++) + { + for(int x = 0; x < width-1; x++) + { + // *3 indices/triangle, *2 triangles/quad + int baseIndex = (x + y*(width-1))*3*2; + indices[baseIndex] = x + y*width; + indices[baseIndex+1] = x+1 + y*width; + indices[baseIndex+2] = x+width + y*width; + + + indices[baseIndex+3] = x + 1 + y*width; + indices[baseIndex+4] = x+(width+1) + y*width; + indices[baseIndex+5] = x+width + y*width; + } + } + } +}; diff --git a/Demos/OpenCLClothDemo/clstuff.cpp b/Demos/OpenCLClothDemo/clstuff.cpp new file mode 100644 index 000000000..0ee8b1ad4 --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.cpp @@ -0,0 +1,53 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#include "clstuff.h" +#include "gl_win.h" + + +#include "btOclCommon.h" +#include "btOclUtils.h" +#include "LinearMath/btScalar.h" + +cl_context g_cxMainContext; +cl_device_id g_cdDevice; +cl_command_queue g_cqCommandQue; + +void initCL(void) +{ + int ciErrNum = 0; + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum); + //try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel +//#ifdef USE_MINICL +// g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum); +//#else + g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); +//#endif + + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext); + + btOclPrintDevInfo(g_cdDevice); + + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} diff --git a/Demos/OpenCLClothDemo/clstuff.h b/Demos/OpenCLClothDemo/clstuff.h new file mode 100644 index 000000000..09f6313eb --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.h @@ -0,0 +1,10 @@ +#ifndef __CLSTUFF_HDR__ +#define __CLSTUFF_HDR__ + + + + + +void initCL(void); + +#endif //__CLSTUFF_HDR__ \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/clstuff.hpp b/Demos/OpenCLClothDemo/clstuff.hpp new file mode 100644 index 000000000..09f6313eb --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.hpp @@ -0,0 +1,10 @@ +#ifndef __CLSTUFF_HDR__ +#define __CLSTUFF_HDR__ + + + + + +void initCL(void); + +#endif //__CLSTUFF_HDR__ \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/fragment.glsl b/Demos/OpenCLClothDemo/fragment.glsl new file mode 100644 index 000000000..6a265d348 --- /dev/null +++ b/Demos/OpenCLClothDemo/fragment.glsl @@ -0,0 +1,7 @@ +uniform sampler2D tex; + +void main() +{ + vec4 color = texture2D(tex,gl_TexCoord[0].st); + gl_FragColor = color; +} \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/gl_win.cpp b/Demos/OpenCLClothDemo/gl_win.cpp new file mode 100644 index 000000000..1826330a3 --- /dev/null +++ b/Demos/OpenCLClothDemo/gl_win.cpp @@ -0,0 +1,272 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "clstuff.h" +#include "gl_win.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +//#ifndef _WIN32 && !defined(__APPLE__) +//#include +//#endif //!_WIN32 + + + +static GLuint vbo = 0; + +#ifdef _WIN32 +#include +#endif + + +static unsigned int windowWidth = 1280; +static unsigned int windowHeight = 1024; + +// mouse controls +int mouseOldX; +int mouseOldY; +int mouseButtons = 0; + +float rotateX; +float rotateY; + +float translateZ; +float translateX; +float translateY; + +static GLuint glProgram; + + +void doFlags(); + + +void render( void) +{ + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); +// glDisable ( GL_CULL_FACE ); + + glMatrixMode( GL_MODELVIEW ); + glLoadIdentity(); + + glTranslatef( translateX, translateY, translateZ ); + glRotatef( rotateX, 0.5f , 0.0f, 0.0f ); + glRotatef( rotateY, 0.0f, 0.5f, 0.0f ); + +// glDisable (GL_BLEND); + + doFlags(); + // TODO: + //glBindBuffer(GL_ARRAY_BUFFER, vbo); + //glVertexPointer(4, GL_FLOAT, 0, NULL); + //glEnableClientState(GL_VERTEX_ARRAY); + + //glDrawArrays(GL_POINTS, 0, 4*4); + +// glDisableClientState(GL_VERTEX_ARRAY); + // glBindBuffer(GL_ARRAY_BUFFER, 0); + + +// glUseProgram(0); +} + +static void initGL(void) +{ + //glClearColor( 0.05f, 0.0f, 0.1f, 0.1f ); + glClearColor( 0.0f, 0.45f, 0.45f, 1.f); + +#if 0 + GLfloat mat_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + GLfloat mat_shininess[] = { 50.0f }; + GLfloat light_position[] = { + -10.f, + 5.f, + -1.f, + 1.0f }; + + glEnable ( GL_COLOR_MATERIAL ); + glShadeModel( GL_SMOOTH ); + glEnable( GL_LINE_SMOOTH ); + + + glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular ); + glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess ); + glLightfv( GL_LIGHT0, GL_POSITION, light_position ); + + //glEnable( GL_LIGHTING ); + //glEnable( GL_LIGHT0 ); // Switch on and crashes! + glEnable( GL_DEPTH_TEST ); +#endif +#if 0 + + + glEnable ( GL_COLOR_MATERIAL ); + glShadeModel( GL_SMOOTH ); + glEnable( GL_LINE_SMOOTH ); + + glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular ); + glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess ); + glLightfv( GL_LIGHT0, GL_POSITION, light_position ); + + glEnable( GL_LIGHTING ); + glEnable( GL_LIGHT0 ); + glEnable( GL_DEPTH_TEST ); +#endif + rotateX = 0; + rotateY = 30; + translateX = 0.0f; + translateY = -30.0f; + translateZ = -120.0; +} + +void display(void) +{ + render(); + + glutSwapBuffers(); + glutPostRedisplay(); +} + +void keyboard( unsigned char key, int /*x*/, int /*y*/) +{ + switch( key) { + case('q') : +#ifdef _WIN32 + case VK_ESCAPE: +#endif //_WIN32 + exit(0); + break; + case('a'): + translateY += 0.1f; + break; + case('z'): + translateY -= 0.1f; + break; + case('d'): + translateX += 0.1f; + break; + case('s'): + translateX -= 0.1f; + break; + case('f'): + translateZ += 0.1f; + break; + case('g'): + translateZ -= 0.1f; + break; + } +} + +void mouse(int button, int state, int x, int y) +{ + if (state == GLUT_DOWN) { + mouseButtons |= 1< +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#endif //_WINDOWS +#endif //APPLE + + +#include + +void goGL(void); +void preInitGL(int argc, char ** argv); + +//int getVBO( std::string, int size ); + +#endif //__GL_WIN_HDR__ diff --git a/Demos/OpenCLClothDemo/gl_win.hpp b/Demos/OpenCLClothDemo/gl_win.hpp new file mode 100644 index 000000000..e7d3f9388 --- /dev/null +++ b/Demos/OpenCLClothDemo/gl_win.hpp @@ -0,0 +1,34 @@ +#ifndef __GL_WIN_HDR__ +#define __GL_WIN_HDR__ + +#ifdef _WIN32//for glut.h +#include +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#endif //_WINDOWS +#endif //APPLE + + +#include + +void goGL(void); +void preInitGL(int argc, char ** argv); + +int getVBO( std::string, int size ); + +#endif //__GL_WIN_HDR__ diff --git a/Demos/OpenCLClothDemo/shaders.cl b/Demos/OpenCLClothDemo/shaders.cl new file mode 100644 index 000000000..27e2d219f --- /dev/null +++ b/Demos/OpenCLClothDemo/shaders.cl @@ -0,0 +1,535 @@ +#pragma OPENCL EXTENSION cl_amd_printf : enable + +#define float3 float4 +#define uint3 uint4 + +#define PARTICLE_RADIUS 0.05; + +#define width 1280 +#define height 1024 + +#define B 0 +#define T height +#define L 0 +#define R width + +#define shiftNumber 4 +#define shiftMask 0xF +#define shiftValue 16.0f +#define stride 4 + +#define screenWidth1 width +#define screenHeight1 height +#define halfScreenWidth1 screenWidth1/2 +#define halfScreenHeight1 screenHeight1/2 +#define screenWidth1SubOne (screenWidth1-1) +#define screenHeight1SubOne (screenHeight1-1) +#define stride screenWidth1 +#define screenPixelNumber screenWidth1*screenHeight1 +#define depthBufferSize screenPixelNumber*depthComplexity + +#define WGS 1 + +//--------------------------------------------------------------- + +struct __VSSpriteOut +{ + float4 position; + float4 particlePosition; +}; + +typedef struct __VSSpriteout VSSpriteOut; + +struct __GSSpriteOut +{ + float4 position; + float2 textureUV; +// float4 viewSpacePosition; +// float4 particlePosition; +}; + +typedef struct __GSSpriteout GSSpriteOut; + +//------------------------------------------------------------------------------ + +__constant float4 g_positions[4] = +{ + (float4)(-1.0f, 1.0f, 0.0f, 0.0f), + (float4)( 1.0f, 1.0f, 0.0f, 0.0f), + (float4)( -1.0f, -1.0f, 0.0f, 0.0f), + (float4)( 1.0f, -1.0f, 0.0f, 0.0f) +}; + +__constant float2 g_texcoords[4] = +{ + (float2)(0.0f,0.0f), + (float2)(1.0f,0.0f), + (float2)(0.0f,1.0f), + (float2)(1.0f,1.0f) +}; + +//------------------------------------------------------------------------------ + +void copyMatrix( + float matrix[16], + __constant float matrix0[16]) +{ + uint i; + + for (i = 0; i < 16; i++) { + matrix[i] = matrix0[i]; + } +} + +void matrixMulLoopBody( + uint i, + float matrix[16], + __constant float matrix0[16], + __constant float matrix1[16]) +{ + matrix[i] = 0.0f; + matrix[i] += matrix0[(i%4) + (0*4)] * matrix1[(0) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (1*4)] * matrix1[(1) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (2*4)] * matrix1[(2) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (3*4)] * matrix1[(3) + ((i/4)*4)]; +} + +void matrixMul( + float matrix[16], + __constant float matrix0[16], + __constant float matrix1[16]) +{ + matrixMulLoopBody(0, matrix, matrix0, matrix1); + matrixMulLoopBody(1, matrix, matrix0, matrix1); + matrixMulLoopBody(2, matrix, matrix0, matrix1); + matrixMulLoopBody(3, matrix, matrix0, matrix1); + matrixMulLoopBody(4, matrix, matrix0, matrix1); + matrixMulLoopBody(5, matrix, matrix0, matrix1); + matrixMulLoopBody(6, matrix, matrix0, matrix1); + matrixMulLoopBody(7, matrix, matrix0, matrix1); + matrixMulLoopBody(8, matrix, matrix0, matrix1); + matrixMulLoopBody(9, matrix, matrix0, matrix1); + matrixMulLoopBody(10, matrix, matrix0, matrix1); + matrixMulLoopBody(11, matrix, matrix0, matrix1); + matrixMulLoopBody(12, matrix, matrix0, matrix1); + matrixMulLoopBody(13, matrix, matrix0, matrix1); + matrixMulLoopBody(14, matrix, matrix0, matrix1); + matrixMulLoopBody(15, matrix, matrix0, matrix1); +} + +float4 matrixVectorMul(float matrix[16], float4 vector) +{ + float4 result; + + result.x = matrix[0]*vector.x + matrix[4+0]*vector.y + matrix[8+0]*vector.z + matrix[12+0]*vector.w; + result.y = matrix[1]*vector.x + matrix[4+1]*vector.y + matrix[8+1]*vector.z + matrix[12+1]*vector.w; + result.z = matrix[2]*vector.x + matrix[4+2]*vector.y + matrix[8+2]*vector.z + matrix[12+2]*vector.w; + result.w = matrix[3]*vector.x + matrix[4+3]*vector.y + matrix[8+3]*vector.z + matrix[12+3]*vector.w; + + return result; +} + +float3 matrixVector3Mul(__constant float matrix[9], float3 vector) +{ + float3 result; + + result.x = matrix[0]*vector.x + matrix[3+0]*vector.y + matrix[6+0]*vector.z; + result.y = matrix[1]*vector.x + matrix[3+1]*vector.y + matrix[6+1]*vector.z; + result.z = matrix[2]*vector.x + matrix[3+2]*vector.y + matrix[6+2]*vector.z; + + return result; +} + +//------------------------------------------------------------------------------ + +//#define DEVICE_CPU 1 +#if defined(DEVICE_CPU) +void printMatrix(char * name, __constant float matrix[16]) +{ + printf("%s[0] = %f, %f, %f, %f\n", name, matrix[0], matrix[1], matrix[2], matrix[3]); + printf("%s[1] = %f, %f, %f, %f\n", name, matrix[4], matrix[5], matrix[6], matrix[7]); + printf("%s[2] = %f, %f, %f, %f\n", name, matrix[8], matrix[9], matrix[10], matrix[11]); + printf("%s[3] = %f, %f, %f, %f\n", name, matrix[12], matrix[13], matrix[14], matrix[15]); +} +#endif + +#if 1 +__kernel void vertexShader( + __constant float modelview[16], + __constant float projection[16], + __global float4 * inputPrimitives, + __global float4 * outputPrimitives) +{ + float matrix[16]; + float4 gl_Vertex; + float4 gl_Position; + + uint id = get_global_id(0); + + gl_Vertex = inputPrimitives[id]; + + // gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex + matrixMul(matrix, projection, modelview); + + gl_Position = matrixVectorMul(matrix, gl_Vertex); + + outputPrimitives[id] = gl_Position; +} + +#else + +__kernel void vertexShader( + __constant float modelview[16], + __constant float projection[16], + __global float4 * inputPrimitives, + __global float4 * outputPrimitives) +{ + uint id = get_global_id(0); + + outputPrimitives[id] = inputPrimitives[id]; +} + +#endif + +//----------------------------------------------------------------------------------- + +__kernel void +clearImage( + __write_only image2d_t image, + float4 color) +{ + + int2 coords = (int2)(get_global_id(0), get_global_id(1)); + write_imagef(image, coords, color); +} + +// OpenGL viewport transformation +// The site http://research.cs.queensu.ca/~jstewart/454/notes/pipeline/ +// contains a description of this process +void +viewportTransform(float4 v, __constant int4 viewport[1], float2 * output) +{ + int4 vp = viewport[0]; + *output + = 0.5f * + (float2)(v.x+1,v.y+1) * + (float2)((vp.s2-vp.s0) + vp.s0, + (vp.s3-vp.s1) + vp.s1); +} + +#define PARTICLE_WIDTH 32.0f +#define PARTICLE_HEIGHT 32.0f + +// Unoptimized triangle rasterizer function +// Details of the algorithm can be found here: +// http://www.devmaster.net/forums/showthread.php?t=1884 +// +void +rasterizerUnOpt( + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __constant int4 viewport[1], + __write_only image2d_t screen, + __read_only image2d_t particle, + uint v1Offset, + uint v2Offset, + uint v3Offset, + __global float4 * debugOut1) +{ + sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + + uint id = get_global_id(0); + + struct __GSSpriteOut output; + float2 v1, v2, v3; + float2 uv1, uv2, uv3; + + output = outputPrimitives[id*4+v1Offset]; + uv1 = output.textureUV; + viewportTransform(output.position, viewport, &v1); + + output = outputPrimitives[id*4+v2Offset]; + uv2 = output.textureUV; + viewportTransform(output.position, viewport, &v2); + + output = outputPrimitives[id*4+v3Offset]; + uv3 = output.textureUV; + viewportTransform(output.position, viewport, &v3); + + // Bounding rectangle + int2 min_ = convert_int2(min(v1, min(v2, v3))); + int2 max_ = convert_int2(max(v1, max(v2, v3))); + + // naive bi-linear interploation for texture coords, note this is + // broken with respect to OpenGL and needs to be fixed for the + // general case. + float p1x = v2.x - v1.x; + float p1y = v2.y - v1.y; + + float p2x = v3.x - v1.x; + float p2y = v3.y - v1.y; + + // Scan through bounding rectangle + for(int y = min_.y; y < max_.y; y++) { + for(int x = min_.x; x < max_.x; x++) { + // When all half-space functions positive, pixel is in triangle + if((v1.x - v2.x) * (y - v1.y) - (v1.y - v2.y) * (x - v1.x) > 0 && + (v2.x - v3.x) * (y - v2.y) - (v2.y - v3.y) * (x - v2.x) > 0 && + (v3.x - v1.x) * (y - v3.y) - (v3.y - v1.y) * (x - v3.x) > 0) { + + float px = x - v1.x; + float py = y - v1.y; + + write_imagef( + screen, + (int2)(x,y), + // texel); + (float4)(1.0f,1.0f,1.0f,1.0f)); + } + } + } +} + +// Optimized rasterizer function +// Details of the algorithm can be found here: +// http://www.devmaster.net/forums/showthread.php?t=1884 +// +// Currently has a bug, still work in progess +__kernel void +rasterizerXX( + __global float4 * outputPrimitives, + __write_only image2d_t screen, + __global float4 * debugOut1, + __global int2 * debugOut2) +{ + uint id = get_global_id(0); + +// printf("ras\n"); + + float4 v1 = outputPrimitives[id*4+0]; + float4 v2 = outputPrimitives[id*4+1]; + float4 v3 = outputPrimitives[id*4+2]; + + float y1 = 0.5f* (v1.y+1) * (T - B) + B; + float y2 = 0.5f* (v2.y+1) * (T - B) + B; + float y3 = 0.5f* (v3.y+1) * (T - B) + B; + + float x1 = 0.5f * (v1.x+1) * (R - L) + L; + float x2 = 0.5f * (v2.x+1) * (R - L) + L; + float x3 = 0.5f * (v3.x+1) * (R - L) + L; + + const int Y1 = convert_int(shiftValue * y1); + const int Y2 = convert_int(shiftValue * y2); + const int Y3 = convert_int(shiftValue * y3); + + const int X1 = convert_int(shiftValue * x1); + const int X2 = convert_int(shiftValue * x2); + const int X3 = convert_int(shiftValue * x3); + + debugOut1[id*4+0] = v1; + debugOut1[id*4+1] = v2; + debugOut1[id*4+2] = v3; + + debugOut2[id*3+0] = (int2)(X1, Y1); + debugOut2[id*3+1] = (int2)(X2, Y2); + debugOut2[id*3+2] = (int2)(X3, Y3); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << shiftNumber; + const int FDX23 = DX23 << shiftNumber; + const int FDX31 = DX31 << shiftNumber; + + const int FDY12 = DY12 << shiftNumber; + const int FDY23 = DY23 << shiftNumber; + const int FDY31 = DY31 << shiftNumber; + + // Bounding rectangle + int minx = (min(X1, min(X2, X3)) + shiftMask) >> shiftNumber; + //minx = max(0,minx); + + int maxx = (max(X1, min(X2, X3)) + shiftMask) >> shiftNumber; + //min(maxx , screenWidth1SubOne); + + int miny = (min(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber; + //max(0,miny); + + int maxy = (max(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber; + //min(maxy , screenHeight1SubOne); + + //(char*&)colorBuffer += miny * stride; + int offset = miny * stride; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + int CY1 = C1 + DX12 * (miny << shiftNumber) - DY12 * (minx << shiftNumber); + int CY2 = C2 + DX23 * (miny << shiftNumber) - DY23 * (minx << shiftNumber); + int CY3 = C3 + DX31 * (miny << shiftNumber) - DY31 * (minx << shiftNumber); + + for(int y = miny; y < maxy; y++) { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + debugOut2[id*3+0] = (int2)(minx, maxx); + + for(int x = minx; x < maxx; x++) { + debugOut2[id*3+0] = (int2)(CX1, CX2); + + if(CX1 > 0 && CX2 > 0 && CX3 > 0) { + debugOut2[id*3+0] = (int2)(1, 1); + write_imagef( + screen, + (int2)(x,y), + (float4)(1.0f,1.0f,1.0f,1.0f)); + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + //(char*&)colorBuffer += stride; + offset += stride; + } +} + +//------------------------------------------------------------------------------ + +void geometryShader( + __constant float modelview[16], + __constant float projection[16], + __constant float inverseView[9], + __constant int4 viewport[1], + __local struct __VSSpriteOut * vsOutputPrimitives, + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __write_only image2d_t screen, + __read_only image2d_t particle, + __global float4 * debugOut1, + __global int * debugOut2) +{ + float2 texcoords[4] = + { + (float2)(0.0f,0.0f), + (float2)(1.0f,0.0f), + (float2)(0.0f,1.0f), + (float2)(1.0f,1.0f) + }; + + float matrix[16]; + + uint id = get_global_id(0); + uint lid = get_local_id(0); + + float4 vsPosition = vsOutputPrimitives[lid].position; + + matrixMul(matrix, projection, modelview); + // + // Emit two new triangles + // + for (uint i = 0; i<4; i++) { + float3 position = g_positions[i] * PARTICLE_RADIUS; + position = matrixVector3Mul(inverseView, position) + vsPosition; + float3 particlePosition = + matrixVector3Mul( + inverseView, + (float4)(0.0f,0.0f,0.0f,0.0f)) + vsPosition; // world space + + // Compute view space position + position.w = 1.0f; + position = matrixVectorMul(matrix, position); + + //perspective division + position /= position.w; + + struct __GSSpriteOut output; + output.position = position; + //output.textureUV = g_texcoords[i]; + output.textureUV = texcoords[i]; + outputPrimitives[id*4+i] = output; + } + + // Render QUAD - Triangle 1 + rasterizerUnOpt( + outputPrimitives, + viewport, + screen, + particle, + 0, + 1, + 2, + debugOut1); + + // Render QUAD - Triangle 2 + rasterizerUnOpt( + outputPrimitives, + viewport, + screen, + particle, + 2, + 1, + 3, + debugOut1); +} + +__kernel void vertexShaderSprite( + __constant float modelview[16], + __constant float projection[16], + __constant float inverseView[9], + __constant int4 viewport[1], + __local struct __VSSpriteOut * vsOutputPrimitives, + __global float4 * inputPrimitives, + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __write_only image2d_t screen, + __read_only image2d_t particle, + __global float4 * debugOut1, + __global int * debugOut2) +{ + float matrix[16]; + + uint id = get_global_id(0); + uint lid = get_local_id(0); + + // gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex + matrixMul(matrix, projection, modelview); + + float4 position = inputPrimitives[id]; + vsOutputPrimitives[lid].position = position; + vsOutputPrimitives[lid].particlePosition = + matrixVectorMul(matrix, position); + + geometryShader( + modelview, + projection, + inverseView, + viewport, + vsOutputPrimitives, + outputPrimitives, + screen, + particle, + debugOut1, + debugOut2); +} \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/texture1.bmp b/Demos/OpenCLClothDemo/texture1.bmp new file mode 100644 index 000000000..1d3da81c1 Binary files /dev/null and b/Demos/OpenCLClothDemo/texture1.bmp differ diff --git a/Demos/OpenCLClothDemo/vertex.glsl b/Demos/OpenCLClothDemo/vertex.glsl new file mode 100644 index 000000000..516983023 --- /dev/null +++ b/Demos/OpenCLClothDemo/vertex.glsl @@ -0,0 +1,7 @@ +void main() +{ + //gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex; + + gl_TexCoord[0] = gl_MultiTexCoord0; + gl_Position = gl_Vertex; +} \ No newline at end of file diff --git a/Demos/ParticlesOpenCL/AMD/CMakeLists.txt b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt index 727006017..f4c4422b6 100644 --- a/Demos/ParticlesOpenCL/AMD/CMakeLists.txt +++ b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt @@ -7,6 +7,8 @@ ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL ) ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) @@ -53,15 +55,17 @@ IF (USE_GLUT) ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h - ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp - ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ) ELSE (USE_GLUT) diff --git a/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp index 4da7f523c..f35e44d37 100644 --- a/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp +++ b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp @@ -329,7 +329,9 @@ void btParticlesDynamicsWorld::initCLKernels(int argc, char** argv) if (!m_cxMainContext) { // m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); - m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + + m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); + //m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext); diff --git a/Demos/SharedOpenCL/btOclCommon.cpp b/Demos/SharedOpenCL/btOclCommon.cpp index d412ef3c6..48fe105d7 100644 --- a/Demos/SharedOpenCL/btOclCommon.cpp +++ b/Demos/SharedOpenCL/btOclCommon.cpp @@ -85,7 +85,7 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; cl_context retContext = clCreateContextFromType(cprops, - CL_DEVICE_TYPE_ALL, + deviceType, NULL, NULL, &ciErrNum); diff --git a/Demos/SharedOpenCL/btOclUtils.cpp b/Demos/SharedOpenCL/btOclUtils.cpp index 7af73b92a..6e0823227 100644 --- a/Demos/SharedOpenCL/btOclUtils.cpp +++ b/Demos/SharedOpenCL/btOclUtils.cpp @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + #include #include #include diff --git a/Demos/SharedOpenCL/btOclUtils.h b/Demos/SharedOpenCL/btOclUtils.h index fba65d8c6..309deca50 100644 --- a/Demos/SharedOpenCL/btOclUtils.h +++ b/Demos/SharedOpenCL/btOclUtils.h @@ -1,3 +1,17 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ #ifndef BT_OCL_UTILS_H #define BT_OCL_UTILS_H diff --git a/Demos/VectorAdd_OpenCL/VectorAddKernels.cl b/Demos/VectorAdd_OpenCL/VectorAddKernels.cl index e224eb6ff..f3d5b3486 100644 --- a/Demos/VectorAdd_OpenCL/VectorAddKernels.cl +++ b/Demos/VectorAdd_OpenCL/VectorAddKernels.cl @@ -1,13 +1,4 @@ -#ifndef GUID_ARG -#define GUID_ARG -#endif - - -#ifndef MSTRINGIFY -#define MSTRINGIFY(A) A -#endif - MSTRINGIFY( diff --git a/src/BulletMultiThreaded/CMakeLists.txt b/src/BulletMultiThreaded/CMakeLists.txt index b4207a72b..6267a5307 100644 --- a/src/BulletMultiThreaded/CMakeLists.txt +++ b/src/BulletMultiThreaded/CMakeLists.txt @@ -67,10 +67,8 @@ ADD_LIBRARY(BulletMultiThreaded ) -#for now, only Direct 11 (Direct Compute) -IF(USE_DX11) - SUBDIRS(GpuSoftBodySolvers) -ENDIF(USE_DX11) + +SUBDIRS(GpuSoftBodySolvers) IF (BUILD_SHARED_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt index aaf2e4bef..63cc88b7a 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt @@ -3,20 +3,12 @@ INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src ) -LIST(APPEND SubDirList "CPU") +SUBDIRS ( + OpenCL + CPU +) -# Configure use of OpenCL and DX11 -# Generates the settings file and defines libraries and include paths -OPTION(USE_OPENCL "Use OpenCL" OFF) - - - -if( USE_OPENCL ) - LIST(APPEND SubDirList "OpenCL") -endif( USE_OPENCL ) -if( USE_DX11 ) - LIST(APPEND SubDirList "DX11") -endif( USE_DX11 ) - -SUBDIRS( ${SubDirList} ) +IF( USE_DX11 ) + SUBDIRS( DX11 ) +ENDIF( USE_DX11 ) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt index 5fea665b0..3bfffcdcb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt @@ -14,14 +14,17 @@ ${VECTOR_MATH_INCLUDE} SET(BulletSoftBodyDX11Solvers_SRCS btSoftBodySolver_DX11.cpp + btSoftBodySolver_DX11SIMDAware.cpp ) SET(BulletSoftBodyDX11Solvers_HDRS btSoftBodySolver_DX11.h + btSoftBodySolver_DX11SIMDAware.h ../cpu/btSoftBodySolverData.h btSoftBodySolverVertexData_DX11.h btSoftBodySolverTriangleData_DX11.h btSoftBodySolverLinkData_DX11.h + btSoftBodySolverLinkData_DX11SIMDAware.h btSoftBodySolverBuffer_DX11.h btSoftBodySolverVertexBuffer_DX11.h @@ -37,6 +40,7 @@ SET(BulletSoftBodyDX11Solvers_Shaders UpdatePositions UpdateNodes SolvePositions + SolvePositionsSIMDBatched UpdatePositionsFromVelocities ApplyForces PrepareLinks diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl new file mode 100644 index 000000000..5106f612d --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl @@ -0,0 +1,128 @@ +MSTRINGIFY( + +cbuffer SolvePositionsFromLinksKernelCB : register( b0 ) +{ + int startWaveInBatch; + int numWaves; + float kst; + float ti; +}; + + +// Number of batches per wavefront stored one element per logical wavefront +StructuredBuffer g_wavefrontBatchCountsVertexCounts : register( t0 ); +// Set of up to maxNumVertices vertex addresses per wavefront +StructuredBuffer g_vertexAddressesPerWavefront : register( t1 ); + +StructuredBuffer g_verticesInverseMass : register( t2 ); + +// Per-link data layed out structured in terms of sub batches within wavefronts +StructuredBuffer g_linksVertexIndices : register( t3 ); +StructuredBuffer g_linksMassLSC : register( t4 ); +StructuredBuffer g_linksRestLengthSquared : register( t5 ); + +RWStructuredBuffer g_vertexPositions : register( u0 ); + +// Data loaded on a per-wave basis +groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER]; +groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; +groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; + +// Storing the vertex addresses actually slowed things down a little +//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; + + +[numthreads(BLOCK_SIZE, 1, 1)] +void +SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1)); + const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE); + const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER; + const int localWavefront = wavefront - firstWavefrontInBlock; + + // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier + if( wavefront < (startWaveInBatch + numWaves) ) + { + + // Load the batch counts for the wavefronts + // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier + if( laneInWavefront == 0 ) + { + int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[firstWavefrontInBlock + localWavefront]; + wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront; + } + + + int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront]; + int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; + int verticesUsedByWave = batchesAndVerticesWithinWavefront.y; + + // Load the vertices for the wavefronts + for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) + { + int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + + //vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress; + vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress]; + vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress]; + } + + // Loop through the batches performing the solve on each in LDS + int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE; + + //for( int batch = 0; batch < batchesWithinWavefront; ++batch ) + + int batch = 0; + do + { + int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch; + int locationOfValue = baseDataLocation + laneInWavefront; + + + // These loads should all be perfectly linear across the WF + int2 localVertexIndices = g_linksVertexIndices[locationOfValue]; + float massLSC = g_linksMassLSC[locationOfValue]; + float restLengthSquared = g_linksRestLengthSquared[locationOfValue]; + + + // LDS vertex addresses based on logical wavefront number in block and loaded index + int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x; + int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y; + + float3 position0 = vertexPositionSharedData[vertexAddress0].xyz; + float3 position1 = vertexPositionSharedData[vertexAddress1].xyz; + + float inverseMass0 = vertexInverseMassSharedData[vertexAddress0]; + float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; + + float3 del = position1 - position0; + float len = dot(del, del); + + float k = 0; + if( massLSC > 0.0f ) + { + k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; + } + + position0 = position0 - del*(k*inverseMass0); + position1 = position1 + del*(k*inverseMass1); + + vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f); + vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f); + + ++batch; + } while( batch < batchesWithinWavefront ); + + // Update the global memory vertices for the wavefronts + for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) + { + int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + + g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + } + } + +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h new file mode 100644 index 000000000..92864a159 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h @@ -0,0 +1,173 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_DX11.h" + +#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H +#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H + +struct ID3D11Device; +struct ID3D11DeviceContext; + + +class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData +{ +public: + bool m_onGPU; + ID3D11Device *m_d3dDevice; + ID3D11DeviceContext *m_d3dDeviceContext; + + const int m_wavefrontSize; + const int m_linksPerWorkItem; + const int m_maxLinksPerWavefront; + int m_maxBatchesWithinWave; + int m_maxVerticesWithinWave; + int m_numWavefronts; + + int m_maxVertex; + + struct NumBatchesVerticesPair + { + int numBatches; + int numVertices; + }; + + // Array storing number of links in each wavefront + btAlignedObjectArray m_linksPerWavefront; + btAlignedObjectArray m_numBatchesAndVerticesWithinWaves; + btDX11Buffer< NumBatchesVerticesPair > m_dx11NumBatchesAndVerticesWithinWaves; + + // All arrays here will contain batches of m_maxLinksPerWavefront links + // ordered by wavefront. + // with either global vertex pairs or local vertex pairs + btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront + btDX11Buffer m_dx11WavefrontVerticesGlobalAddresses; + btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link + btDX11Buffer m_dx11LinkVerticesLocalAddresses; + btDX11Buffer m_dx11LinkStrength; + btDX11Buffer m_dx11LinksMassLSC; + btDX11Buffer m_dx11LinksRestLengthSquared; + btDX11Buffer m_dx11LinksRestLength; + btDX11Buffer m_dx11LinksMaterialLinearStiffnessCoefficient; + + struct BatchPair + { + int start; + int length; + + BatchPair() : + start(0), + length(0) + { + } + + BatchPair( int s, int l ) : + start( s ), + length( l ) + { + } + }; + + /** + * Link addressing information for each cloth. + * Allows link locations to be computed independently of data batching. + */ + btAlignedObjectArray< int > m_linkAddresses; + + /** + * Start and length values for computation batches over link data. + */ + btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths; + + + //ID3D11Buffer* readBackBuffer; + + btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ); + + virtual ~btSoftBodyLinkDataDX11SIMDAware(); + + /** Allocate enough space in all link-related arrays to fit numLinks links */ + virtual void createLinks( int numLinks ); + + /** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */ + virtual void setLinkAt( const LinkDescription &link, int linkIndex ); + + virtual bool onAccelerator(); + + virtual bool moveToAccelerator(); + + virtual bool moveFromAccelerator(); + + /** + * Generate (and later update) the batching for the entire link set. + * This redoes a lot of work because it batches the entire set when each cloth is inserted. + * In theory we could delay it until just before we need the cloth. + * It's a one-off overhead, though, so that is a later optimisation. + */ + void generateBatches(); + + int getMaxVerticesPerWavefront() + { + return m_maxVerticesWithinWave; + } + + int getWavefrontSize() + { + return m_wavefrontSize; + } + + int getLinksPerWorkItem() + { + return m_linksPerWorkItem; + } + + int getMaxLinksPerWavefront() + { + return m_maxLinksPerWavefront; + } + + int getMaxBatchesPerWavefront() + { + return m_maxBatchesWithinWave; + } + + int getNumWavefronts() + { + return m_numWavefronts; + } + + NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront ) + { + return m_numBatchesAndVerticesWithinWaves[wavefront]; + } + + int getVertexGlobalAddresses( int vertexIndex ) + { + return m_wavefrontVerticesGlobalAddresses[vertexIndex]; + } + + /** + * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally. + */ + LinkNodePair getVertexPairLocalAddresses( int linkIndex ) + { + return m_linkVerticesLocalAddresses[linkIndex]; + } + +}; + + +#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp index 9c9b325a8..7877aa6a0 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp @@ -622,7 +622,7 @@ void btDX11SoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softB using Vectormath::Aos::Point3; // Create SoftBody that will store the information within the solver - btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + btDX11AcceleratedSoftBodyInterface *newSoftBody = new btDX11AcceleratedSoftBodyInterface( softBody ); m_softBodySet.push_back( newSoftBody ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); @@ -1451,11 +1451,11 @@ void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float -btDX11SoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +btDX11AcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) { for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + btDX11AcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; if( softBodyInterface->getSoftBody() == softBody ) return softBodyInterface; } @@ -1466,7 +1466,7 @@ void btDX11SoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const { checkInitialized(); - btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + btDX11AcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); const int firstVertex = currentCloth->getFirstVertex(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h index a61e5166c..ea5b3d462 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h @@ -13,6 +13,9 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H +#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H + #include "vectormath/vmInclude.h" #include "BulletSoftBody/btSoftBodySolvers.h" @@ -22,185 +25,184 @@ subject to the following restrictions: #include "btSoftBodySolverTriangleData_DX11.h" -#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H -#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H + +/** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ +class btDX11AcceleratedSoftBodyInterface +{ +protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + +public: + btDX11AcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + // TODO: All of these set functions will have to do checks and + // update the world because restructuring of the arrays will be necessary + // Reasonable use of "friend"? + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + +#if 0 + void setAcceleration( Vectormath::Aos::Vector3 acceleration ) + { + m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); + } + + void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) + { + m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); + } + + /** + * Set the density of the air in which the cloth is situated. + */ + void setAirDensity( btScalar density ) + { + m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); + } + + /** + * Add a collision object to this soft body. + */ + void addCollisionObject( btCollisionObject *collisionObject ) + { + m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); + } +#endif +}; + class btDX11SoftBodySolver : public btSoftBodySolver { public: - /** - * SoftBody class to maintain information about a soft body instance - * within a solver. - * This data addresses the main solver arrays. - */ - class btAcceleratedSoftBodyInterface - { - protected: - /** Current number of vertices that are part of this cloth */ - int m_numVertices; - /** Maximum number of vertices allocated to be part of this cloth */ - int m_maxVertices; - /** Current number of triangles that are part of this cloth */ - int m_numTriangles; - /** Maximum number of triangles allocated to be part of this cloth */ - int m_maxTriangles; - /** Index of first vertex in the world allocated to this cloth */ - int m_firstVertex; - /** Index of first triangle in the world allocated to this cloth */ - int m_firstTriangle; - /** Index of first link in the world allocated to this cloth */ - int m_firstLink; - /** Maximum number of links allocated to this cloth */ - int m_maxLinks; - /** Current number of links allocated to this cloth */ - int m_numLinks; - - /** The actual soft body this data represents */ - btSoftBody *m_softBody; - - - public: - btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : - m_softBody( softBody ) - { - m_numVertices = 0; - m_maxVertices = 0; - m_numTriangles = 0; - m_maxTriangles = 0; - m_firstVertex = 0; - m_firstTriangle = 0; - m_firstLink = 0; - m_maxLinks = 0; - m_numLinks = 0; - } - int getNumVertices() - { - return m_numVertices; - } - - int getNumTriangles() - { - return m_numTriangles; - } - - int getMaxVertices() - { - return m_maxVertices; - } - - int getMaxTriangles() - { - return m_maxTriangles; - } - - int getFirstVertex() - { - return m_firstVertex; - } - - int getFirstTriangle() - { - return m_firstTriangle; - } - - // TODO: All of these set functions will have to do checks and - // update the world because restructuring of the arrays will be necessary - // Reasonable use of "friend"? - void setNumVertices( int numVertices ) - { - m_numVertices = numVertices; - } - - void setNumTriangles( int numTriangles ) - { - m_numTriangles = numTriangles; - } - - void setMaxVertices( int maxVertices ) - { - m_maxVertices = maxVertices; - } - - void setMaxTriangles( int maxTriangles ) - { - m_maxTriangles = maxTriangles; - } - - void setFirstVertex( int firstVertex ) - { - m_firstVertex = firstVertex; - } - - void setFirstTriangle( int firstTriangle ) - { - m_firstTriangle = firstTriangle; - } - - void setMaxLinks( int maxLinks ) - { - m_maxLinks = maxLinks; - } - - void setNumLinks( int numLinks ) - { - m_numLinks = numLinks; - } - - void setFirstLink( int firstLink ) - { - m_firstLink = firstLink; - } - - int getMaxLinks() - { - return m_maxLinks; - } - - int getNumLinks() - { - return m_numLinks; - } - - int getFirstLink() - { - return m_firstLink; - } - - btSoftBody* getSoftBody() - { - return m_softBody; - } - - #if 0 - void setAcceleration( Vectormath::Aos::Vector3 acceleration ) - { - m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); - } - - void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) - { - m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); - } - - /** - * Set the density of the air in which the cloth is situated. - */ - void setAirDensity( btScalar density ) - { - m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); - } - - /** - * Add a collision object to this soft body. - */ - void addCollisionObject( btCollisionObject *collisionObject ) - { - m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); - } - #endif - }; - class KernelDesc { @@ -344,7 +346,7 @@ private: * Cloths owned by this solver. * Only our cloths are in this array. */ - btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + btAlignedObjectArray< btDX11AcceleratedSoftBodyInterface * > m_softBodySet; /** Acceleration value to be applied to all non-static vertices in the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver. @@ -429,7 +431,7 @@ private: void updateConstants( float timeStep ); - btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + btDX11AcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); ////////////////////////////////////// // Kernel dispatches diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp new file mode 100644 index 000000000..c72dead3e --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp @@ -0,0 +1,1793 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + + +#define WAVEFRONT_SIZE 32 +#define WAVEFRONT_BLOCK_MULTIPLIER 2 +#define LINKS_PER_SIMD_LANE 16 + +#define STRINGIFY( S ) STRINGIFY2( S ) +#define STRINGIFY2( S ) #S + +#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" +#include "vectormath/vmInclude.h" + +#include "btSoftBodySolverLinkData_DX11SIMDAware.h" +#include "btSoftBodySolver_DX11SIMDAware.h" +#include "btSoftBodySolverVertexBuffer_DX11.h" +#include "BulletSoftBody/btSoftBody.h" + +#define MSTRINGIFY(A) #A +static char* PrepareLinksHLSLString = +#include "HLSL/PrepareLinks.hlsl" +static char* UpdatePositionsFromVelocitiesHLSLString = +#include "HLSL/UpdatePositionsFromVelocities.hlsl" +static char* SolvePositionsSIMDBatchedHLSLString = +#include "HLSL/SolvePositionsSIMDBatched.hlsl" +static char* UpdateNodesHLSLString = +#include "HLSL/UpdateNodes.hlsl" +static char* UpdatePositionsHLSLString = +#include "HLSL/UpdatePositions.hlsl" +static char* UpdateConstantsHLSLString = +#include "HLSL/UpdateConstants.hlsl" +static char* IntegrateHLSLString = +#include "HLSL/Integrate.hlsl" +static char* ApplyForcesHLSLString = +#include "HLSL/ApplyForces.hlsl" +static char* UpdateNormalsHLSLString = +#include "HLSL/UpdateNormals.hlsl" +static char* OutputToVertexArrayHLSLString = +#include "HLSL/OutputToVertexArray.hlsl" +static char* VSolveLinksHLSLString = +#include "HLSL/VSolveLinks.hlsl" + + + +btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : + m_d3dDevice( d3dDevice ), + m_d3dDeviceContext( d3dDeviceContext ), + m_wavefrontSize( WAVEFRONT_SIZE ), + m_linksPerWorkItem( LINKS_PER_SIMD_LANE ), + m_maxBatchesWithinWave( 0 ), + m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ), + m_numWavefronts( 0 ), + m_maxVertex( 0 ), + m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ), + m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ), + m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ), + m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ), + m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ), + m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ), + m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ), + m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true ) +{ + m_d3dDevice = d3dDevice; + m_d3dDeviceContext = d3dDeviceContext; +} + +btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware() +{ +} + +static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec ) +{ + Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() ); + return outVec; +} + +void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks ) +{ + int previousSize = m_links.size(); + int newSize = previousSize + numLinks; + + btSoftBodyLinkData::createLinks( numLinks ); + + // Resize the link addresses array as well + m_linkAddresses.resize( newSize ); +} + +void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex ) +{ + btSoftBodyLinkData::setLinkAt( link, linkIndex ); + + if( link.getVertex0() > m_maxVertex ) + m_maxVertex = link.getVertex0(); + if( link.getVertex1() > m_maxVertex ) + m_maxVertex = link.getVertex1(); + + // Set the link index correctly for initialisation + m_linkAddresses[linkIndex] = linkIndex; +} + +bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator() +{ + return m_onGPU; +} + +bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator() +{ + bool success = true; + + success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU(); + success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU(); + success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU(); + success = success && m_dx11LinkStrength.moveToGPU(); + success = success && m_dx11LinksMassLSC.moveToGPU(); + success = success && m_dx11LinksRestLengthSquared.moveToGPU(); + success = success && m_dx11LinksRestLength.moveToGPU(); + success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU(); + + if( success ) + m_onGPU = true; + + return success; +} + +bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator() +{ + bool success = true; + success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU(); + success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU(); + success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU(); + success = success && m_dx11LinkStrength.moveFromGPU(); + success = success && m_dx11LinksMassLSC.moveFromGPU(); + success = success && m_dx11LinksRestLengthSquared.moveFromGPU(); + success = success && m_dx11LinksRestLength.moveFromGPU(); + success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU(); + + if( success ) + m_onGPU = false; + + return success; +} + + + + + + + + + + + + + + + +btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context) : + m_dx11Device( dx11Device ), + m_dx11Context( dx11Context ), + m_linkData(m_dx11Device, m_dx11Context), + m_vertexData(m_dx11Device, m_dx11Context), + m_triangleData(m_dx11Device, m_dx11Context), + m_dx11PerClothAcceleration( m_dx11Device, m_dx11Context, &m_perClothAcceleration, true ), + m_dx11PerClothWindVelocity( m_dx11Device, m_dx11Context, &m_perClothWindVelocity, true ), + m_dx11PerClothDampingFactor( m_dx11Device, m_dx11Context, &m_perClothDampingFactor, true ), + m_dx11PerClothVelocityCorrectionCoefficient( m_dx11Device, m_dx11Context, &m_perClothVelocityCorrectionCoefficient, true ), + m_dx11PerClothLiftFactor( m_dx11Device, m_dx11Context, &m_perClothLiftFactor, true ), + m_dx11PerClothDragFactor( m_dx11Device, m_dx11Context, &m_perClothDragFactor, true ), + m_dx11PerClothMediumDensity( m_dx11Device, m_dx11Context, &m_perClothMediumDensity, true ) +{ + // Initial we will clearly need to update solver constants + // For now this is global for the cloths linked with this solver - we should probably make this body specific + // for performance in future once we understand more clearly when constants need to be updated + m_updateSolverConstants = true; + + m_shadersInitialized = false; +} + +void btDX11SIMDAwareSoftBodySolver::releaseKernels() +{ + SAFE_RELEASE( integrateKernel.constBuffer ); + SAFE_RELEASE( integrateKernel.kernel ); + SAFE_RELEASE( solvePositionsFromLinksKernel.constBuffer ); + SAFE_RELEASE( solvePositionsFromLinksKernel.kernel ); + SAFE_RELEASE( updatePositionsFromVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updatePositionsFromVelocitiesKernel.kernel ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel ); + SAFE_RELEASE( resetNormalsAndAreasKernel.constBuffer ); + SAFE_RELEASE( resetNormalsAndAreasKernel.kernel ); + SAFE_RELEASE( normalizeNormalsAndAreasKernel.constBuffer ); + SAFE_RELEASE( normalizeNormalsAndAreasKernel.kernel ); + SAFE_RELEASE( updateSoftBodiesKernel.constBuffer ); + SAFE_RELEASE( updateSoftBodiesKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.kernel ); + + + SAFE_RELEASE( addVelocityKernel.constBuffer ); + SAFE_RELEASE( addVelocityKernel.kernel ); + SAFE_RELEASE( applyForcesKernel.constBuffer ); + SAFE_RELEASE( applyForcesKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayKernel.kernel ); + SAFE_RELEASE( collideCylinderKernel.constBuffer ); + SAFE_RELEASE( collideCylinderKernel.kernel ); + + m_shadersInitialized = false; +} + +btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver() +{ + releaseKernels(); +} + + +void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ) +{ + if( m_softBodySet.size() != softBodies.size() ) + { + // Have a change in the soft body set so update, reloading all the data + getVertexData().clear(); + getTriangleData().clear(); + getLinkData().clear(); + m_softBodySet.resize(0); + + + for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) + { + btSoftBody *softBody = softBodies[ softBodyIndex ]; + using Vectormath::Aos::Matrix3; + using Vectormath::Aos::Point3; + + // Create SoftBody that will store the information within the solver + btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + m_softBodySet.push_back( newSoftBody ); + + m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); + m_perClothDampingFactor.push_back(softBody->m_cfg.kDP); + m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF ); + m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); + m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); + m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); + + // Add space for new vertices and triangles in the default solver for now + // TODO: Include space here for tearing too later + int firstVertex = getVertexData().getNumVertices(); + int numVertices = softBody->m_nodes.size(); + int maxVertices = numVertices; + // Allocate space for new vertices in all the vertex arrays + getVertexData().createVertices( maxVertices, softBodyIndex ); + + int firstTriangle = getTriangleData().getNumTriangles(); + int numTriangles = softBody->m_faces.size(); + int maxTriangles = numTriangles; + getTriangleData().createTriangles( maxTriangles ); + + // Copy vertices from softbody into the solver + for( int vertex = 0; vertex < numVertices; ++vertex ) + { + Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ()); + btSoftBodyVertexData::VertexDescription desc; + + // TODO: Position in the softbody might be pre-transformed + // or we may need to adapt for the pose. + //desc.setPosition( cloth.getMeshTransform()*multPoint ); + desc.setPosition( multPoint ); + + float vertexInverseMass = softBody->m_nodes[vertex].m_im; + desc.setInverseMass(vertexInverseMass); + getVertexData().setVertexAt( desc, firstVertex + vertex ); + } + + // Copy triangles similarly + // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene + for( int triangle = 0; triangle < numTriangles; ++triangle ) + { + // Note that large array storage is relative to the array not to the cloth + // So we need to add firstVertex to each value + int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0])); + int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0])); + int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0])); + btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex); + getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle ); + + // Increase vertex triangle counts for this triangle + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++; + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++; + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++; + } + + int firstLink = getLinkData().getNumLinks(); + int numLinks = softBody->m_links.size(); + int maxLinks = numLinks; + + // Allocate space for the links + getLinkData().createLinks( numLinks ); + + // Add the links + for( int link = 0; link < numLinks; ++link ) + { + int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]); + int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]); + + btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST); + newLink.setLinkStrength(1.f); + getLinkData().setLinkAt(newLink, firstLink + link); + } + + newSoftBody->setFirstVertex( firstVertex ); + newSoftBody->setFirstTriangle( firstTriangle ); + newSoftBody->setNumVertices( numVertices ); + newSoftBody->setMaxVertices( maxVertices ); + newSoftBody->setNumTriangles( numTriangles ); + newSoftBody->setMaxTriangles( maxTriangles ); + newSoftBody->setFirstLink( firstLink ); + newSoftBody->setNumLinks( numLinks ); + } + + + + updateConstants(0.f); + + + m_linkData.generateBatches(); + m_triangleData.generateBatches(); + + + // Build the shaders to match the batching parameters + buildShaders(); + } + +} + + +btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData() +{ + // TODO: Consider setting link data to "changed" here + return m_linkData; +} + +btSoftBodyVertexData &btDX11SIMDAwareSoftBodySolver::getVertexData() +{ + // TODO: Consider setting vertex data to "changed" here + return m_vertexData; +} + +btSoftBodyTriangleData &btDX11SIMDAwareSoftBodySolver::getTriangleData() +{ + // TODO: Consider setting triangle data to "changed" here + return m_triangleData; +} + + +bool btDX11SIMDAwareSoftBodySolver::checkInitialized() +{ + if( !m_shadersInitialized ) + if( buildShaders() ) + m_shadersInitialized = true; + + return m_shadersInitialized; +} + +void btDX11SIMDAwareSoftBodySolver::resetNormalsAndAreas( int numVertices ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.numNodes = numVertices; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( resetNormalsAndAreasKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::resetNormalsAndAreas + +void btDX11SIMDAwareSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.numNodes = numVertices; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexTriangleCount.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( normalizeNormalsAndAreasKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::normalizeNormalsAndAreas + +void btDX11SIMDAwareSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.startFace = firstTriangle; + constBuffer.numFaces = numTriangles; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateSoftBodiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( updateSoftBodiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateSoftBodiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_triangleData.m_dx11VertexIndices.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_triangleData.m_dx11Normal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_triangleData.m_dx11Area.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( updateSoftBodiesKernel.kernel, NULL, 0 ); + + int numBlocks = (numTriangles + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::executeUpdateSoftBodies + +void btDX11SIMDAwareSoftBodySolver::updateSoftBodies() +{ + using namespace Vectormath::Aos; + + + int numVertices = m_vertexData.getNumVertices(); + int numTriangles = m_triangleData.getNumTriangles(); + + // Ensure data is on accelerator + m_vertexData.moveToAccelerator(); + m_triangleData.moveToAccelerator(); + + resetNormalsAndAreas( numVertices ); + + + // Go through triangle batches so updates occur correctly + for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex ) + { + + int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].start; + int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].length; + + executeUpdateSoftBodies( startTriangle, numTriangles ); + } + + + normalizeNormalsAndAreas( numVertices ); + +} // btDX11SIMDAwareSoftBodySolver::updateSoftBodies + + +Vectormath::Aos::Vector3 btDX11SIMDAwareSoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ) +{ + return a*Vectormath::Aos::dot(v, a); +} + +void btDX11SIMDAwareSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ) +{ + float dtInverseMass = solverdt*inverseMass; + if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) ) + { + vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass; + } else { + vertexForce += force; + } +} + +void btDX11SIMDAwareSoftBodySolver::applyForces( float solverdt ) +{ + using namespace Vectormath::Aos; + + // Ensure data is on accelerator + m_vertexData.moveToAccelerator(); + m_dx11PerClothAcceleration.moveToGPU(); + m_dx11PerClothLiftFactor.moveToGPU(); + m_dx11PerClothDragFactor.moveToGPU(); + m_dx11PerClothMediumDensity.moveToGPU(); + m_dx11PerClothWindVelocity.moveToGPU(); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + ApplyForcesCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverdt = solverdt; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(ApplyForcesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexArea.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothLiftFactor.getSRV()) ); + m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11PerClothDragFactor.getSRV()) ); + m_dx11Context->CSSetShaderResources( 6, 1, &(m_dx11PerClothWindVelocity.getSRV()) ); + m_dx11Context->CSSetShaderResources( 7, 1, &(m_dx11PerClothAcceleration.getSRV()) ); + m_dx11Context->CSSetShaderResources( 8, 1, &(m_dx11PerClothMediumDensity.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( applyForcesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 6, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 7, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 8, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + + +} // btDX11SIMDAwareSoftBodySolver::applyForces + +/** + * Integrate motion on the solver. + */ +void btDX11SIMDAwareSoftBodySolver::integrate( float solverdt ) +{ + // TEMPORARY COPIES + m_vertexData.moveToAccelerator(); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + IntegrateCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverdt = solverdt; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(IntegrateCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( integrateKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::integrate + +float btDX11SIMDAwareSoftBodySolver::computeTriangleArea( + const Vectormath::Aos::Point3 &vertex0, + const Vectormath::Aos::Point3 &vertex1, + const Vectormath::Aos::Point3 &vertex2 ) +{ + Vectormath::Aos::Vector3 a = vertex1 - vertex0; + Vectormath::Aos::Vector3 b = vertex2 - vertex0; + Vectormath::Aos::Vector3 crossProduct = cross(a, b); + float area = length( crossProduct ); + return area; +} // btDX11SIMDAwareSoftBodySolver::computeTriangleArea + +// Update constants here is a simple CPU version that is run on optimize +void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep ) +{ + using namespace Vectormath::Aos; + + if( m_updateSolverConstants ) + { + m_updateSolverConstants = false; + + // Will have to redo this if we change the structure (tear, maybe) or various other possible changes + + // Initialise link constants + const int numLinks = m_linkData.getNumLinks(); + for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) ); + m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 ))); + float invMass0 = m_vertexData.getInverseMass(vertices.vertex0); + float invMass1 = m_vertexData.getInverseMass(vertices.vertex1); + float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex); + float massLSC = (invMass0 + invMass1)/linearStiffness; + m_linkData.getMassLSC(linkIndex) = massLSC; + float restLength = m_linkData.getRestLength(linkIndex); + float restLengthSquared = restLength*restLength; + m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; + } + } +} // btDX11SIMDAwareSoftBodySolver::updateConstants + + + +void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt ) +{ + + //std::cerr << "'GPU' solve constraints\n"; + using Vectormath::Aos::Vector3; + using Vectormath::Aos::Point3; + using Vectormath::Aos::lengthSqr; + using Vectormath::Aos::dot; + + // Prepare links + int numLinks = m_linkData.getNumLinks(); + int numVertices = m_vertexData.getNumVertices(); + + float kst = 1.f; + float ti = 0.f; + + + m_dx11PerClothDampingFactor.moveToGPU(); + m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU(); + + + + // Ensure data is on accelerator + m_linkData.moveToAccelerator(); + m_vertexData.moveToAccelerator(); + + // Solve drift + for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) + { + int it = iteration; + + for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i ) + { + int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start; + int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length; + + solveLinksForPosition( startWave, numWaves, kst, ti ); + } + + } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) + + + + + updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + +} // btDX11SIMDAwareSoftBodySolver::solveConstraints + + + + +////////////////////////////////////// +// Kernel dispatches + + +void btDX11SIMDAwareSoftBodySolver::updatePositionsFromVelocities( float solverdt ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdatePositionsFromVelocitiesCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverSDT = solverdt; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updatePositionsFromVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdatePositionsFromVelocitiesCB) ); + m_dx11Context->Unmap( updatePositionsFromVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updatePositionsFromVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( updatePositionsFromVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::updatePositionsFromVelocities + + +void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti ) +{ + + + m_vertexData.moveToAccelerator(); + m_linkData.moveToAccelerator(); + + // Copy kernel parameters to GPU + SolvePositionsFromLinksKernelCB constBuffer; + + // Set the first wave of the batch and the number of waves + constBuffer.startWave = startWave; + constBuffer.numWaves = numWaves; + + constBuffer.kst = kst; + constBuffer.ti = ti; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) ); + m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) ); + m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 ); + + int numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER ); + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition + + +void btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) +{ + // Copy kernel parameters to GPU + UpdateVelocitiesFromPositionsWithVelocitiesCB constBuffer; + + // Set the first link of the batch + // and the batch size + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.isolverdt = isolverdt; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) ); + m_dx11Context->Unmap( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothVelocityCorrectionCoefficient.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothDampingFactor.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + + // Execute the kernel + m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + +} // btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithVelocities + +void btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) +{ + // Copy kernel parameters to GPU + UpdateVelocitiesFromPositionsWithoutVelocitiesCB constBuffer; + + // Set the first link of the batch + // and the batch size + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.isolverdt = isolverdt; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) ); + m_dx11Context->Unmap( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + + // Execute the kernel + m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + +} // btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities + +// End kernel dispatches +///////////////////////////////////// + + + + + + + + + +btDX11SIMDAwareSoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SIMDAwareSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +{ + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + if( softBodyInterface->getSoftBody() == softBody ) + return softBodyInterface; + } + return 0; +} + +void btDX11SIMDAwareSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ) +{ + checkInitialized(); + + btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); + + if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) + { + // If we're doing a CPU-buffer copy must copy the data back to the host first + m_vertexData.m_dx11VertexPosition.copyFromGPU(); + m_vertexData.m_dx11VertexNormal.copyFromGPU(); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); + const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); + float *basePointer = cpuVertexBuffer->getBasePointer(); + + if( vertexBuffer->hasVertexPositions() ) + { + const int vertexOffset = cpuVertexBuffer->getVertexOffset(); + const int vertexStride = cpuVertexBuffer->getVertexStride(); + float *vertexPointer = basePointer + vertexOffset; + + for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) + { + Vectormath::Aos::Point3 position = m_vertexData.getPosition(vertexIndex); + *(vertexPointer + 0) = position.getX(); + *(vertexPointer + 1) = position.getY(); + *(vertexPointer + 2) = position.getZ(); + vertexPointer += vertexStride; + } + } + if( vertexBuffer->hasNormals() ) + { + const int normalOffset = cpuVertexBuffer->getNormalOffset(); + const int normalStride = cpuVertexBuffer->getNormalStride(); + float *normalPointer = basePointer + normalOffset; + + for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) + { + Vectormath::Aos::Vector3 normal = m_vertexData.getNormal(vertexIndex); + *(normalPointer + 0) = normal.getX(); + *(normalPointer + 1) = normal.getY(); + *(normalPointer + 2) = normal.getZ(); + normalPointer += normalStride; + } + } + } else if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::DX11_BUFFER ) + { + // Do a DX11 copy shader DX to DX copy + + const btDX11VertexBufferDescriptor *dx11VertexBuffer = static_cast< btDX11VertexBufferDescriptor* >(vertexBuffer); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + OutputToVertexArrayCB constBuffer; + ID3D11ComputeShader* outputToVertexArrayShader = outputToVertexArrayWithoutNormalsKernel.kernel; + ID3D11Buffer* outputToVertexArrayConstBuffer = outputToVertexArrayWithoutNormalsKernel.constBuffer; + + constBuffer.startNode = firstVertex; + constBuffer.numNodes = currentCloth->getNumVertices(); + constBuffer.positionOffset = vertexBuffer->getVertexOffset(); + constBuffer.positionStride = vertexBuffer->getVertexStride(); + if( vertexBuffer->hasNormals() ) + { + constBuffer.normalOffset = vertexBuffer->getNormalOffset(); + constBuffer.normalStride = vertexBuffer->getNormalStride(); + outputToVertexArrayShader = outputToVertexArrayWithNormalsKernel.kernel; + outputToVertexArrayConstBuffer = outputToVertexArrayWithNormalsKernel.constBuffer; + } + + // TODO: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( outputToVertexArrayConstBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(OutputToVertexArrayCB) ); + m_dx11Context->Unmap( outputToVertexArrayConstBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &outputToVertexArrayConstBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) ); + + ID3D11UnorderedAccessView* dx11UAV = dx11VertexBuffer->getDX11UAV(); + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(dx11UAV), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( outputToVertexArrayShader, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + } + +} // btDX11SoftBodySolver::outputToVertexBuffers + + + + + +btDX11SIMDAwareSoftBodySolver::KernelDesc btDX11SIMDAwareSoftBodySolver::compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros ) +{ + const char *cs5String = "cs_5_0"; + + HRESULT hr = S_OK; + ID3DBlob* pErrorBlob = NULL; + ID3DBlob* pBlob = NULL; + ID3D11ComputeShader* kernelPointer = 0; + + hr = D3DX11CompileFromMemory( + shaderString, + strlen(shaderString), + shaderName, + compileMacros, + NULL, + shaderName, + cs5String, + D3D10_SHADER_ENABLE_STRICTNESS, + NULL, + NULL, + &pBlob, + &pErrorBlob, + NULL + ); + + if( FAILED(hr) ) + { + if( pErrorBlob ) { + btAssert( "Compilation of compute shader failed\n" ); + char *debugString = (char*)pErrorBlob->GetBufferPointer(); + OutputDebugStringA( debugString ); + } + + SAFE_RELEASE( pErrorBlob ); + SAFE_RELEASE( pBlob ); + + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + + // Create the Compute Shader + hr = m_dx11Device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, &kernelPointer ); + if( FAILED( hr ) ) + { + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + + ID3D11Buffer* constBuffer = 0; + if( constBufferSize > 0 ) + { + // Create the constant buffer + D3D11_BUFFER_DESC constant_buffer_desc; + ZeroMemory(&constant_buffer_desc, sizeof(constant_buffer_desc)); + constant_buffer_desc.ByteWidth = constBufferSize; + constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + m_dx11Device->CreateBuffer(&constant_buffer_desc, NULL, &constBuffer); + if( FAILED( hr ) ) + { + KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + } + + SAFE_RELEASE( pErrorBlob ); + SAFE_RELEASE( pBlob ); + + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = kernelPointer; + descriptor.constBuffer = constBuffer; + return descriptor; +} // compileComputeShader + + +bool btDX11SIMDAwareSoftBodySolver::buildShaders() +{ + // Ensure current kernels are released first + releaseKernels(); + + bool returnVal = true; + + + if( m_shadersInitialized ) + return true; + + + updatePositionsFromVelocitiesKernel = compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) ); + if( !updatePositionsFromVelocitiesKernel.constBuffer ) + returnVal = false; + + char maxVerticesPerWavefront[20]; + char maxBatchesPerWavefront[20]; + char waveFrontSize[20]; + char waveFrontBlockMultiplier[20]; + char blockSize[20]; + + sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront()); + sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront()); + sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize()); + sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER); + sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize()); + + D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 }; + + solvePositionsFromLinksKernel = compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros ); + if( !solvePositionsFromLinksKernel.constBuffer ) + returnVal = false; + + updateVelocitiesFromPositionsWithVelocitiesKernel = compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) ); + if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ) + returnVal = false; + updateVelocitiesFromPositionsWithoutVelocitiesKernel = compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB)); + if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ) + returnVal = false; + integrateKernel = compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) ); + if( !integrateKernel.constBuffer ) + returnVal = false; + applyForcesKernel = compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) ); + if( !applyForcesKernel.constBuffer ) + returnVal = false; + + // TODO: Rename to UpdateSoftBodies + resetNormalsAndAreasKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) ); + if( !resetNormalsAndAreasKernel.constBuffer ) + returnVal = false; + normalizeNormalsAndAreasKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) ); + if( !normalizeNormalsAndAreasKernel.constBuffer ) + returnVal = false; + updateSoftBodiesKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) ); + if( !updateSoftBodiesKernel.constBuffer ) + returnVal = false; + outputToVertexArrayWithNormalsKernel = compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithNormalsKernel", sizeof(OutputToVertexArrayCB) ); + if( !outputToVertexArrayWithNormalsKernel.constBuffer ) + returnVal = false; + outputToVertexArrayWithoutNormalsKernel = compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithoutNormalsKernel", sizeof(OutputToVertexArrayCB) ); + if( !outputToVertexArrayWithoutNormalsKernel.constBuffer ) + returnVal = false; + + + if( returnVal ) + m_shadersInitialized = true; + + return returnVal; +} + + + +void btDX11SIMDAwareSoftBodySolver::predictMotion( float timeStep ) +{ + // Fill the force arrays with current acceleration data etc + m_perClothWindVelocity.resize( m_softBodySet.size() ); + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); + + m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); + } + m_dx11PerClothWindVelocity.changedOnCPU(); + + // Apply forces that we know about to the cloths + applyForces( timeStep * getTimeScale() ); + + // Itegrate motion for all soft bodies dealt with by the solver + integrate( timeStep * getTimeScale() ); + // End prediction work for solvers +} + + + + + + + + + + + + + + + + + + + +static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray > &wavefrontBatches ) +{ + // A per-batch map of truth values stating whether a given vertex is in that batch + // This allows us to significantly optimize the batching + btAlignedObjectArray > mapOfVerticesInBatches; + + for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex ) + { + btAlignedObjectArray &wavefront( linksForWavefronts[waveIndex] ); + + int batch = 0; + bool placed = false; + while( batch < wavefrontBatches.size() && !placed ) + { + // Test the current batch, see if this wave shares any vertex with the waves in the batch + bool foundSharedVertex = false; + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] ) + { + foundSharedVertex = true; + } + } + + if( !foundSharedVertex ) + { + wavefrontBatches[batch].push_back( waveIndex ); + // Insert vertices into this batch too + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + (mapOfVerticesInBatches[batch])[vertices.vertex0] = true; + (mapOfVerticesInBatches[batch])[vertices.vertex1] = true; + } + placed = true; + } + batch++; + } + if( batch == wavefrontBatches.size() && !placed ) + { + wavefrontBatches.resize( batch + 1 ); + wavefrontBatches[batch].push_back( waveIndex ); + + // And resize map as well + mapOfVerticesInBatches.resize( batch + 1 ); + + // Resize maps with total number of vertices + mapOfVerticesInBatches[batch].resize( numVertices, false ); + + // Insert vertices into this batch too + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + (mapOfVerticesInBatches[batch])[vertices.vertex0] = true; + (mapOfVerticesInBatches[batch])[vertices.vertex1] = true; + } + } + } + mapOfVerticesInBatches.clear(); +} + +// Function to remove an object from a vector maintaining correct ordering of the vector +template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove ) +{ + int currentSize = vectorToUpdate.size(); + for( int i = indexToRemove; i < (currentSize-1); ++i ) + { + vectorToUpdate[i] = vectorToUpdate[i+1]; + } + if( currentSize > 0 ) + vectorToUpdate.resize( currentSize - 1 ); +} + +/** + * Insert element into vectorToUpdate at index index. + */ +template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element ) +{ + vectorToUpdate.resize( vectorToUpdate.size() + 1 ); + for( int i = (vectorToUpdate.size() - 1); i > index; --i ) + { + vectorToUpdate[i] = vectorToUpdate[i-1]; + } + vectorToUpdate[index] = element; +} + +/** + * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness. + * ie it treats vectorToUpdate as an ordered set. + */ +template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray &vectorToUpdate, T element ) +{ + int index = 0; + while( index < vectorToUpdate.size() && vectorToUpdate[index] < element ) + { + index++; + } + if( index == vectorToUpdate.size() || vectorToUpdate[index] != element ) + insertAtIndex( vectorToUpdate, index, element ); +} + +// Experimental batch generation that we could use in the simulations +// Attempts to generate larger batches that work on a per-wavefront basis +void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray &numLinksPerVertex, int &maxLinks ) +{ + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + numLinksPerVertex[nodes.vertex0]++; + numLinksPerVertex[nodes.vertex1]++; + } + int maxLinksPerVertex = 0; + for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex ) + { + maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex); + } + maxLinks = maxLinksPerVertex; + + btAlignedObjectArray< int > linksFoundPerVertex; + linksFoundPerVertex.resize( numVertices, 0 ); + + listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices ); + + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + { + // Do vertex 0 + int vertexIndex = nodes.vertex0; + int linkForVertex = linksFoundPerVertex[nodes.vertex0]; + int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex; + + listOfLinksPerVertex[linkAddress] = linkIndex; + + linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1; + } + { + // Do vertex 1 + int vertexIndex = nodes.vertex1; + int linkForVertex = linksFoundPerVertex[nodes.vertex1]; + int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex; + + listOfLinksPerVertex[linkAddress] = linkIndex; + + linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1; + } + } +} + +static void computeBatchingIntoWavefronts( + btSoftBodyLinkData &linkData, + int wavefrontSize, + int linksPerWorkItem, + int maxLinksPerWavefront, + btAlignedObjectArray < btAlignedObjectArray > &linksForWavefronts, + btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray > > &batchesWithinWaves, /* wave, batch, links in batch */ + btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */ + ) +{ + + + // Attempt generation of larger batches of links. + btAlignedObjectArray< bool > processedLink; + processedLink.resize( linkData.getNumLinks() ); + btAlignedObjectArray< int > listOfLinksPerVertex; + int maxLinksPerVertex = 0; + + // Count num vertices + int numVertices = 0; + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + numVertices = btMax( numVertices, nodes.vertex0 + 1 ); + numVertices = btMax( numVertices, nodes.vertex1 + 1 ); + } + + // Need list of links per vertex + // Compute valence of each vertex + btAlignedObjectArray numLinksPerVertex; + numLinksPerVertex.resize(0); + numLinksPerVertex.resize( numVertices, 0 ); + + generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex ); + + for( int vertex = 0; vertex < 10; ++vertex ) + { + for( int link = 0; link < numLinksPerVertex[vertex]; ++link ) + { + int linkAddress = vertex * maxLinksPerVertex + link; + } + } + + + // At this point we know what links we have for each vertex so we can start batching + + // We want a vertex to start with, let's go with 0 + int currentVertex = 0; + int linksProcessed = 0; + + btAlignedObjectArray verticesToProcess; + + while( linksProcessed < linkData.getNumLinks() ) + { + // Next wavefront + int nextWavefront = linksForWavefronts.size(); + linksForWavefronts.resize( nextWavefront + 1 ); + btAlignedObjectArray &linksForWavefront(linksForWavefronts[nextWavefront]); + verticesForWavefronts.resize( nextWavefront + 1 ); + btAlignedObjectArray &vertexSet( verticesForWavefronts[nextWavefront] ); + + linksForWavefront.resize(0); + + // Loop to find enough links to fill the wavefront + // Stopping if we either run out of links, or fill it + while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront ) + { + // Go through the links for the current vertex + for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link ) + { + int linkAddress = currentVertex * maxLinksPerVertex + link; + int linkIndex = listOfLinksPerVertex[linkAddress]; + + // If we have not already processed this link, add it to the wavefront + // Claim it as another processed link + // Add the vertex at the far end to the list of vertices to process. + if( !processedLink[linkIndex] ) + { + linksForWavefront.push_back( linkIndex ); + linksProcessed++; + processedLink[linkIndex] = true; + int v0 = linkData.getVertexPair(linkIndex).vertex0; + int v1 = linkData.getVertexPair(linkIndex).vertex1; + if( v0 == currentVertex ) + verticesToProcess.push_back( v1 ); + else + verticesToProcess.push_back( v0 ); + } + } + if( verticesToProcess.size() > 0 ) + { + // Get the element on the front of the queue and remove it + currentVertex = verticesToProcess[0]; + removeFromVector( verticesToProcess, 0 ); + } else { + // If we've not yet processed all the links, find the first unprocessed one + // and select one of its vertices as the current vertex + if( linksProcessed < linkData.getNumLinks() ) + { + int searchLink = 0; + while( processedLink[searchLink] ) + searchLink++; + currentVertex = linkData.getVertexPair(searchLink).vertex0; + } + } + } + + // We have either finished or filled a wavefront + for( int link = 0; link < linksForWavefront.size(); ++link ) + { + int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0; + int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1; + insertUniqueAndOrderedIntoVector( vertexSet, v0 ); + insertUniqueAndOrderedIntoVector( vertexSet, v1 ); + } + // Iterate over links mapped to the wave and batch those + // We can run a batch on each cycle trivially + + batchesWithinWaves.resize( batchesWithinWaves.size() + 1 ); + btAlignedObjectArray < btAlignedObjectArray > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] ); + + + for( int link = 0; link < linksForWavefront.size(); ++link ) + { + int linkIndex = linksForWavefront[link]; + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex ); + + int batch = 0; + bool placed = false; + while( batch < batchesWithinWave.size() && !placed ) + { + bool foundSharedVertex = false; + if( batchesWithinWave[batch].size() >= wavefrontSize ) + { + // If we have already filled this batch, move on to another + foundSharedVertex = true; + } else { + for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 ) + { + btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] ); + + if( vertices.vertex0 == vertices2.vertex0 || + vertices.vertex1 == vertices2.vertex0 || + vertices.vertex0 == vertices2.vertex1 || + vertices.vertex1 == vertices2.vertex1 ) + { + foundSharedVertex = true; + break; + } + } + } + if( !foundSharedVertex ) + { + batchesWithinWave[batch].push_back( linkIndex ); + placed = true; + } else { + ++batch; + } + } + if( batch == batchesWithinWave.size() && !placed ) + { + batchesWithinWave.resize( batch + 1 ); + batchesWithinWave[batch].push_back( linkIndex ); + } + } + + } + +} + +void btSoftBodyLinkDataDX11SIMDAware::generateBatches() +{ + btAlignedObjectArray < btAlignedObjectArray > linksForWavefronts; + btAlignedObjectArray < btAlignedObjectArray > wavefrontBatches; + btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray > > batchesWithinWaves; + btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set + + // Group the links into wavefronts + computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts ); + + + // Batch the wavefronts + generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches ); + + m_numWavefronts = linksForWavefronts.size(); + + // At this point we have a description of which links we need to process in each wavefront + + // First correctly fill the batch ranges vector + int numBatches = wavefrontBatches.size(); + m_wavefrontBatchStartLengths.resize(0); + int prefixSum = 0; + for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex ) + { + int wavesInBatch = wavefrontBatches[batchIndex].size(); + int nextPrefixSum = prefixSum + wavesInBatch; + m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) ); + + prefixSum += wavesInBatch; + } + + // Also find max number of batches within a wave + m_maxBatchesWithinWave = 0; + m_maxVerticesWithinWave = 0; + m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts ); + for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex ) + { + // See if the number of batches in this wave is greater than the current maxium + int batchesInCurrentWave = batchesWithinWaves[waveIndex].size(); + int verticesInCurrentWave = verticesForWavefronts[waveIndex].size(); + m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave ); + m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave ); + } + + // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around + m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2); + + // Now we know the maximum number of vertices per-wave we can resize the global vertices array + m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts ); + + // Grab backup copies of all the link data arrays for the sorting process + btAlignedObjectArray m_links_Backup(m_links); + btAlignedObjectArray m_linkStrength_Backup(m_linkStrength); + btAlignedObjectArray m_linksMassLSC_Backup(m_linksMassLSC); + btAlignedObjectArray m_linksRestLengthSquared_Backup(m_linksRestLengthSquared); + //btAlignedObjectArray m_linksCLength_Backup(m_linksCLength); + //btAlignedObjectArray m_linksLengthRatio_Backup(m_linksLengthRatio); + btAlignedObjectArray m_linksRestLength_Backup(m_linksRestLength); + btAlignedObjectArray m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient); + + // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses. + m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + + // Then re-order links into wavefront blocks + + // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts. + int wavefrontCount = 0; + + // Iterate over batches of wavefronts, then wavefronts in the batch + for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex ) + { + btAlignedObjectArray &batch( wavefrontBatches[batchIndex] ); + int wavefrontsInBatch = batch.size(); + + + for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex ) + { + + int originalWavefrontIndex = batch[wavefrontIndex]; + btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] ); + int verticesUsedByWavefront = wavefrontVertices.size(); + + // Copy the set of vertices into the correctly structured array for use on the device + // Fill the non-vertices with -1s + // so we can mask out those reads + for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex ) + { + m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex]; + } + for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex ) + { + m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1; + } + + // Obtain the set of batches within the current wavefront + btAlignedObjectArray < btAlignedObjectArray > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] ); + // Set the size of the batches for use in the solver, correctly ordered + NumBatchesVerticesPair batchesAndVertices; + batchesAndVertices.numBatches = batchesWithinWavefront.size(); + batchesAndVertices.numVertices = verticesUsedByWavefront; + m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices; + + + // Now iterate over batches within the wavefront to structure the links correctly + for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch ) + { + btAlignedObjectArray &linksInBatch( batchesWithinWavefront[wavefrontBatch] ); + int wavefrontBatchSize = linksInBatch.size(); + + int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch; + + for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex ) + { + int originalLinkAddress = linksInBatch[linkIndex]; + // Reorder simple arrays trivially + m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress]; + m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress]; + m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress]; + m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress]; + m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress]; + m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress]; + + // The local address is more complicated. We need to work out where a given vertex will end up + // by searching the set of vertices for this link and using the index as the local address + btSoftBodyLinkData::LinkNodePair localPair; + btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex]; + localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 ); + localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 ); + m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair; + } + for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex ) + { + // Put 0s into these arrays for padding for cleanliness + m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0); + m_linkStrength[batchAddressInTarget + linkIndex] = 0.f; + m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f; + m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f; + m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f; + m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f; + + + // For local addresses of junk data choose a set of addresses just above the range of valid ones + // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses + // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work + // off the end of that range so we need no control + btSoftBodyLinkData::LinkNodePair localPair; + localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16); + localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16); + m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair; + } + + } + + + wavefrontCount++; + } + + + } + +} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches() diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h new file mode 100644 index 000000000..ceac535e2 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h @@ -0,0 +1,432 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "vectormath/vmInclude.h" +#include "BulletSoftBody/btSoftBodySolvers.h" +#include "btSoftBodySolverVertexBuffer_DX11.h" +#include "btSoftBodySolverLinkData_DX11SIMDAware.h" +#include "btSoftBodySolverVertexData_DX11.h" +#include "btSoftBodySolverTriangleData_DX11.h" + + +#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H +#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H + +class btDX11SIMDAwareSoftBodySolver : public btSoftBodySolver +{ +public: + + /** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ + class btAcceleratedSoftBodyInterface + { + protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + + public: + btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + + }; + + + class KernelDesc + { + protected: + + + public: + ID3D11ComputeShader* kernel; + ID3D11Buffer* constBuffer; + + KernelDesc() + { + kernel = 0; + constBuffer = 0; + } + + virtual ~KernelDesc() + { + // TODO: this should probably destroy its kernel but we need to be careful + // in case KernelDescs are copied + } + }; + + struct SolvePositionsFromLinksKernelCB + { + int startWave; + int numWaves; + float kst; + float ti; + }; + + struct IntegrateCB + { + int numNodes; + float solverdt; + int padding1; + int padding2; + }; + + struct UpdatePositionsFromVelocitiesCB + { + int numNodes; + float solverSDT; + int padding1; + int padding2; + }; + + struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB + { + int numNodes; + float isolverdt; + int padding1; + int padding2; + }; + + struct UpdateVelocitiesFromPositionsWithVelocitiesCB + { + int numNodes; + float isolverdt; + int padding1; + int padding2; + }; + + struct UpdateSoftBodiesCB + { + int numNodes; + int startFace; + int numFaces; + float epsilon; + }; + + + struct OutputToVertexArrayCB + { + int startNode; + int numNodes; + int positionOffset; + int positionStride; + + int normalOffset; + int normalStride; + int padding1; + int padding2; + }; + + + struct ApplyForcesCB + { + unsigned int numNodes; + float solverdt; + float epsilon; + int padding3; + }; + + struct AddVelocityCB + { + int startNode; + int lastNode; + float velocityX; + float velocityY; + float velocityZ; + int padding1; + int padding2; + int padding3; + }; + + +private: + ID3D11Device * m_dx11Device; + ID3D11DeviceContext* m_dx11Context; + + + /** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */ + btSoftBodyLinkDataDX11SIMDAware m_linkData; + btSoftBodyVertexDataDX11 m_vertexData; + btSoftBodyTriangleDataDX11 m_triangleData; + + /** Variable to define whether we need to update solver constants on the next iteration */ + bool m_updateSolverConstants; + + bool m_shadersInitialized; + + /** + * Cloths owned by this solver. + * Only our cloths are in this array. + */ + btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + + /** Acceleration value to be applied to all non-static vertices in the solver. + * Index n is cloth n, array sized by number of cloths in the world not the solver. + */ + btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration; + btDX11Buffer m_dx11PerClothAcceleration; + + /** Wind velocity to be applied normal to all non-static vertices in the solver. + * Index n is cloth n, array sized by number of cloths in the world not the solver. + */ + btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity; + btDX11Buffer m_dx11PerClothWindVelocity; + + /** Velocity damping factor */ + btAlignedObjectArray< float > m_perClothDampingFactor; + btDX11Buffer m_dx11PerClothDampingFactor; + + /** Velocity correction coefficient */ + btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient; + btDX11Buffer m_dx11PerClothVelocityCorrectionCoefficient; + + /** Lift parameter for wind effect on cloth. */ + btAlignedObjectArray< float > m_perClothLiftFactor; + btDX11Buffer m_dx11PerClothLiftFactor; + + /** Drag parameter for wind effect on cloth. */ + btAlignedObjectArray< float > m_perClothDragFactor; + btDX11Buffer m_dx11PerClothDragFactor; + + /** Density of the medium in which each cloth sits */ + btAlignedObjectArray< float > m_perClothMediumDensity; + btDX11Buffer m_dx11PerClothMediumDensity; + + KernelDesc solvePositionsFromLinksKernel; + KernelDesc integrateKernel; + KernelDesc addVelocityKernel; + KernelDesc updatePositionsFromVelocitiesKernel; + KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel; + KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel; + KernelDesc resetNormalsAndAreasKernel; + KernelDesc normalizeNormalsAndAreasKernel; + KernelDesc updateSoftBodiesKernel; + KernelDesc outputToVertexArrayWithNormalsKernel; + KernelDesc outputToVertexArrayWithoutNormalsKernel; + + KernelDesc outputToVertexArrayKernel; + KernelDesc applyForcesKernel; + KernelDesc collideSphereKernel; + KernelDesc collideCylinderKernel; + + + + /** + * Integrate motion on the solver. + */ + virtual void integrate( float solverdt ); + float computeTriangleArea( + const Vectormath::Aos::Point3 &vertex0, + const Vectormath::Aos::Point3 &vertex1, + const Vectormath::Aos::Point3 &vertex2 ); + + + /** + * Compile a compute shader kernel from a string and return the appropriate KernelDesc object. + */ + KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 ); + + bool buildShaders(); + + void resetNormalsAndAreas( int numVertices ); + + void normalizeNormalsAndAreas( int numVertices ); + + void executeUpdateSoftBodies( int firstTriangle, int numTriangles ); + + Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ); + + void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); + + virtual void applyForces( float solverdt ); + + void updateConstants( float timeStep ); + + btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + + ////////////////////////////////////// + // Kernel dispatches + void prepareLinks(); + + void updatePositionsFromVelocities( float solverdt ); + void solveLinksForPosition( int startLink, int numLinks, float kst, float ti ); + void solveLinksForVelocity( int startLink, int numLinks, float kst ); + + void updateVelocitiesFromPositionsWithVelocities( float isolverdt ); + void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ); + + // End kernel dispatches + ///////////////////////////////////// + + void releaseKernels(); + + +public: + btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context); + + virtual ~btDX11SIMDAwareSoftBodySolver(); + + + + virtual btSoftBodyLinkData &getLinkData(); + + virtual btSoftBodyVertexData &getVertexData(); + + virtual btSoftBodyTriangleData &getTriangleData(); + + + + virtual bool checkInitialized(); + + virtual void updateSoftBodies( ); + + virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies ); + + virtual void solveConstraints( float solverdt ); + + virtual void predictMotion( float solverdt ); + + virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer ); +}; + +#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt new file mode 100644 index 000000000..d2ef78f69 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt @@ -0,0 +1,82 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + +ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + + +IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) +ELSE() + INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include ) +ENDIF() + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt new file mode 100644 index 000000000..65bbea43a --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt @@ -0,0 +1,73 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt index 0c63b945a..36b173cf8 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt @@ -1,71 +1,16 @@ -INCLUDE_DIRECTORIES( -${BULLET_PHYSICS_SOURCE_DIR}/src -) +IF(BUILD_MINICL_OPENCL_DEMOS) + SUBDIRS( MiniCL ) +ENDIF() +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() -SET(OPENCL_DIR $ENV{ATISTREAMSDKROOT}) -SET(OPENCL_INCLUDE_PATH "${ATISTREAMSDKROOT}/include" CACHE DOCSTRING "OpenCL SDK include path") +IF(BUILD_NVIDIA_OPENCL_DEMOS) + SUBDIRS(NVidia) +ENDIF() -INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_PATH} "../cpu/") - -SET(BulletSoftBodyOpenCLSolvers_SRCS - btSoftBodySolver_OpenCL.cpp -) - -SET(BulletSoftBodyOpenCLSolvers_HDRS - btSoftBodySolver_OpenCL.h - ../cpu/btSoftBodySolverData.h - btSoftBodySolverVertexData_OpenCL.h - btSoftBodySolverTriangleData_OpenCL.h - btSoftBodySolverLinkData_OpenCL.h - btSoftBodySolverBuffer_OpenCL.h -) - -# OpenCL and HLSL Shaders. -# Build rules generated to stringify these into headers -# which are needed by some of the sources -SET(BulletSoftBodyOpenCLSolvers_Shaders -# OutputToVertexArray - UpdateNormals - Integrate - UpdatePositions - UpdateNodes - SolvePositions - UpdatePositionsFromVelocities - ApplyForces - PrepareLinks - VSolveLinks -) - -foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) - LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "OpenCLC/${f}.cl") -endforeach(f) - - - -ADD_LIBRARY(BulletSoftBodySolvers_OpenCL ${BulletSoftBodyOpenCLSolvers_SRCS} ${BulletSoftBodyOpenCLSolvers_HDRS} ${BulletSoftBodyOpenCLSolvers_OpenCLC}) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES VERSION ${BULLET_VERSION}) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES SOVERSION ${BULLET_VERSION}) -IF (BUILD_SHARED_LIBS) - TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) -ENDIF (BUILD_SHARED_LIBS) - - -IF (INSTALL_LIBS) - IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) - IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION .) - ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION lib${LIB_SUFFIX}) - INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES FRAMEWORK true) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) -ENDIF (INSTALL_LIBS) +IF(APPLE) + SUBDIRS(Apple) +ENDIF() diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt new file mode 100644 index 000000000..e9f86c2c9 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt @@ -0,0 +1,75 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + +ADD_DEFINITIONS(-DUSE_MINICL) + + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp new file mode 100644 index 000000000..79b0ac234 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp @@ -0,0 +1,40 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + +#define MSTRINGIFY(A) A +#include "../OpenCLC10/ApplyForces.cl" +#include "../OpenCLC10/Integrate.cl" +#include "../OpenCLC10/PrepareLinks.cl" +#include "../OpenCLC10/SolvePositions.cl" +#include "../OpenCLC10/UpdateNodes.cl" +#include "../OpenCLC10/UpdateNormals.cl" +#include "../OpenCLC10/UpdatePositions.cl" +#include "../OpenCLC10/UpdatePositionsFromVelocities.cl" +//#include "../OpenCLC10/VSolveLinks.cl" + +MINICL_REGISTER(PrepareLinksKernel) +MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel) +MINICL_REGISTER(SolvePositionsFromLinksKernel) +MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel) +MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel) +MINICL_REGISTER(IntegrateKernel) +MINICL_REGISTER(ApplyForcesKernel) +MINICL_REGISTER(ResetNormalsAndAreasKernel) +MINICL_REGISTER(NormalizeNormalsAndAreasKernel) +MINICL_REGISTER(UpdateSoftBodiesKernel) + + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt new file mode 100644 index 000000000..7608492b7 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt @@ -0,0 +1,79 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + + + +IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc ) +ELSE() + INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc ) +ENDIF() + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl new file mode 100644 index 000000000..555d07a1d --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl @@ -0,0 +1,91 @@ +MSTRINGIFY( + + +float adot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +float4 projectOnAxis( float4 v, float4 a ) +{ + return (a*adot3(v, a)); +} + +__kernel void +ApplyForcesKernel( + const uint numNodes, + const float solverdt, + const float epsilon, + __global int * g_vertexClothIdentifier, + __global float4 * g_vertexNormal, + __global float * g_vertexArea, + __global float * g_vertexInverseMass, + __global float * g_clothLiftFactor, + __global float * g_clothDragFactor, + __global float4 * g_clothWindVelocity, + __global float4 * g_clothAcceleration, + __global float * g_clothMediumDensity, + __global float4 * g_vertexForceAccumulator, + __global float4 * g_vertexVelocity GUID_ARG) +{ + unsigned int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + int clothId = g_vertexClothIdentifier[nodeID]; + float nodeIM = g_vertexInverseMass[nodeID]; + + if( nodeIM > 0.0f ) + { + float4 nodeV = g_vertexVelocity[nodeID]; + float4 normal = g_vertexNormal[nodeID]; + float area = g_vertexArea[nodeID]; + float4 nodeF = g_vertexForceAccumulator[nodeID]; + + // Read per-cloth values + float4 clothAcceleration = g_clothAcceleration[clothId]; + float4 clothWindVelocity = g_clothWindVelocity[clothId]; + float liftFactor = g_clothLiftFactor[clothId]; + float dragFactor = g_clothDragFactor[clothId]; + float mediumDensity = g_clothMediumDensity[clothId]; + + // Apply the acceleration to the cloth rather than do this via a force + nodeV += (clothAcceleration*solverdt); + + g_vertexVelocity[nodeID] = nodeV; + + float4 relativeWindVelocity = nodeV - clothWindVelocity; + float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity); + + if( relativeSpeedSquared > epsilon ) + { + // Correct direction of normal relative to wind direction and get dot product + normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f); + float dvNormal = dot(normal, relativeWindVelocity); + if( dvNormal > 0 ) + { + float4 force = (float4)(0.f, 0.f, 0.f, 0.f); + float c0 = area * dvNormal * relativeSpeedSquared / 2.f; + float c1 = c0 * mediumDensity; + force += normal * (-c1 * liftFactor); + force += normalize(relativeWindVelocity)*(-c1 * dragFactor); + + float dtim = solverdt * nodeIM; + float4 forceDTIM = force * dtim; + + float4 nodeFPlusForce = nodeF + force; + + // m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim; + float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim); + + nodeF = nodeFPlusForce; + if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) + nodeF = nodeFMinus; + + g_vertexForceAccumulator[nodeID] = nodeF; + } + } + } + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl new file mode 100644 index 000000000..fb65330d9 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl @@ -0,0 +1,35 @@ +MSTRINGIFY( + +// Node indices for each link + + + +__kernel void +IntegrateKernel( + const int numNodes, + const float solverdt, + __global float * g_vertexInverseMasses, + __global float4 * g_vertexPositions, + __global float4 * g_vertexVelocity, + __global float4 * g_vertexPreviousPositions, + __global float4 * g_vertexForceAccumulator GUID_ARG) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 velocity = g_vertexVelocity[nodeID]; + float4 force = g_vertexForceAccumulator[nodeID]; + float inverseMass = g_vertexInverseMasses[nodeID]; + + g_vertexPreviousPositions[nodeID] = position; + velocity += force * inverseMass * solverdt; + position += velocity * solverdt; + + g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f); + g_vertexPositions[nodeID] = position; + g_vertexVelocity[nodeID] = velocity; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl new file mode 100644 index 000000000..ba3277667 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl @@ -0,0 +1,41 @@ +MSTRINGIFY( + +float dot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +__kernel void +PrepareLinksKernel( + const int numLinks, + __global int2 * g_linksVertexIndices, + __global float * g_linksMassLSC, + __global float4 * g_nodesPreviousPosition, + __global float * g_linksLengthRatio, + __global float4 * g_linksCurrentLength GUID_ARG) +{ + int linkID = get_global_id(0); + if( linkID < numLinks ) + { + + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0]; + float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1]; + + float massLSC = g_linksMassLSC[linkID]; + + float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0; + + float linkLengthRatio = dot3(linkCurrentLength, linkCurrentLength)*massLSC; + linkLengthRatio = 1.0f/linkLengthRatio; + + g_linksCurrentLength[linkID] = linkCurrentLength; + g_linksLengthRatio[linkID] = linkLengthRatio; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl new file mode 100644 index 000000000..fe7aec66e --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl @@ -0,0 +1,57 @@ + + + +MSTRINGIFY( + + +float mydot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +__kernel void +SolvePositionsFromLinksKernel( + const int startLink, + const int numLinks, + const float kst, + const float ti, + __global int2 * g_linksVertexIndices, + __global float * g_linksMassLSC, + __global float * g_linksRestLengthSquared, + __global float * g_verticesInverseMass, + __global float4 * g_vertexPositions GUID_ARG) + +{ + int linkID = get_global_id(0) + startLink; + if( get_global_id(0) < numLinks ) + { + float massLSC = g_linksMassLSC[linkID]; + float restLengthSquared = g_linksRestLengthSquared[linkID]; + + if( massLSC > 0.0f ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float4 position0 = g_vertexPositions[node0]; + float4 position1 = g_vertexPositions[node1]; + + float inverseMass0 = g_verticesInverseMass[node0]; + float inverseMass1 = g_verticesInverseMass[node1]; + + float4 del = position1 - position0; + float len = mydot3(del, del); + float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; + position0 = position0 - del*(k*inverseMass0); + position1 = position1 + del*(k*inverseMass1); + + g_vertexPositions[node0] = position0; + g_vertexPositions[node1] = position1; + + } + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl new file mode 100644 index 000000000..488a58479 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl @@ -0,0 +1,44 @@ +MSTRINGIFY( + +/*#define float3 float4 + +float dot3(float3 a, float3 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +}*/ + +__kernel void +UpdateConstantsKernel( + const int numLinks, + __global int2 * g_linksVertexIndices, + __global float4 * g_vertexPositions, + __global float * g_vertexInverseMasses, + __global float * g_linksMaterialLSC, + __global float * g_linksMassLSC, + __global float * g_linksRestLengthSquared, + __global float * g_linksRestLengths) +{ + int linkID = get_global_id(0); + if( linkID < numLinks ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ]; + + float3 position0 = g_vertexPositions[node0].xyz; + float3 position1 = g_vertexPositions[node1].xyz; + float inverseMass0 = g_vertexInverseMasses[node0]; + float inverseMass1 = g_vertexInverseMasses[node1]; + + float3 difference = position0 - position1; + float length2 = dot(difference, difference); + float length = sqrt(length2); + + g_linksRestLengths[linkID] = length; + g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient; + g_linksRestLengthSquared[linkID] = length*length; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl new file mode 100644 index 000000000..9ad227b45 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl @@ -0,0 +1,39 @@ +MSTRINGIFY( + + +__kernel void +updateVelocitiesFromPositionsWithVelocitiesKernel( + int numNodes, + float isolverdt, + __global float4 * g_vertexPositions, + __global float4 * g_vertexPreviousPositions, + __global int * g_vertexClothIndices, + __global float *g_clothVelocityCorrectionCoefficients, + __global float * g_clothDampingFactor, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexForces GUID_ARG) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 previousPosition = g_vertexPreviousPositions[nodeID]; + float4 velocity = g_vertexVelocities[nodeID]; + int clothIndex = g_vertexClothIndices[nodeID]; + float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex]; + float dampingFactor = g_clothDampingFactor[clothIndex]; + float velocityCoefficient = (1.f - dampingFactor); + + float4 difference = position - previousPosition; + + velocity += difference*velocityCorrectionCoefficient*isolverdt; + + // Damp the velocity + velocity *= velocityCoefficient; + + g_vertexVelocities[nodeID] = velocity; + g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl new file mode 100644 index 000000000..7bb233413 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl @@ -0,0 +1,102 @@ +MSTRINGIFY( + +float length3(float4 a) +{ + a.w = 0; + return length(a); +} + +float4 normalize3(float4 a) +{ + a.w = 0; + return normalize(a); +} + +__kernel void +ResetNormalsAndAreasKernel( + const unsigned int numNodes, + __global float4 * g_vertexNormals, + __global float * g_vertexArea GUID_ARG) +{ + if( get_global_id(0) < numNodes ) + { + g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + g_vertexArea[get_global_id(0)] = 0.0f; + } +} + + +__kernel void +UpdateSoftBodiesKernel( + const unsigned int startFace, + const unsigned int numFaces, + __global int4 * g_triangleVertexIndexSet, + __global float4 * g_vertexPositions, + __global float4 * g_vertexNormals, + __global float * g_vertexArea, + __global float4 * g_triangleNormals, + __global float * g_triangleArea GUID_ARG) +{ + int faceID = get_global_id(0) + startFace; + if( get_global_id(0) < numFaces ) + { + int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ]; + int nodeIndex0 = triangleIndexSet.x; + int nodeIndex1 = triangleIndexSet.y; + int nodeIndex2 = triangleIndexSet.z; + + float4 node0 = g_vertexPositions[nodeIndex0]; + float4 node1 = g_vertexPositions[nodeIndex1]; + float4 node2 = g_vertexPositions[nodeIndex2]; + float4 nodeNormal0 = g_vertexNormals[nodeIndex0]; + float4 nodeNormal1 = g_vertexNormals[nodeIndex1]; + float4 nodeNormal2 = g_vertexNormals[nodeIndex2]; + float vertexArea0 = g_vertexArea[nodeIndex0]; + float vertexArea1 = g_vertexArea[nodeIndex1]; + float vertexArea2 = g_vertexArea[nodeIndex2]; + + float4 vector0 = node1 - node0; + float4 vector1 = node2 - node0; + + float4 faceNormal = cross(vector0, vector1); + float triangleArea = length(faceNormal); + + nodeNormal0 = nodeNormal0 + faceNormal; + nodeNormal1 = nodeNormal1 + faceNormal; + nodeNormal2 = nodeNormal2 + faceNormal; + vertexArea0 = vertexArea0 + triangleArea; + vertexArea1 = vertexArea1 + triangleArea; + vertexArea2 = vertexArea2 + triangleArea; + + g_triangleNormals[faceID] = normalize3(faceNormal); + g_vertexNormals[nodeIndex0] = nodeNormal0; + g_vertexNormals[nodeIndex1] = nodeNormal1; + g_vertexNormals[nodeIndex2] = nodeNormal2; + g_triangleArea[faceID] = triangleArea; + g_vertexArea[nodeIndex0] = vertexArea0; + g_vertexArea[nodeIndex1] = vertexArea1; + g_vertexArea[nodeIndex2] = vertexArea2; + } +} + +__kernel void +NormalizeNormalsAndAreasKernel( + const unsigned int numNodes, + __global int * g_vertexTriangleCount, + __global float4 * g_vertexNormals, + __global float * g_vertexArea GUID_ARG) +{ + if( get_global_id(0) < numNodes ) + { + float4 normal = g_vertexNormals[get_global_id(0)]; + float area = g_vertexArea[get_global_id(0)]; + int numTriangles = g_vertexTriangleCount[get_global_id(0)]; + + float vectorLength = length3(normal); + + g_vertexNormals[get_global_id(0)] = normalize3(normal); + g_vertexArea[get_global_id(0)] = area/(float)(numTriangles); + } +} + +); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl new file mode 100644 index 000000000..3155a04e4 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl @@ -0,0 +1,34 @@ +MSTRINGIFY( + +__kernel void +updateVelocitiesFromPositionsWithoutVelocitiesKernel( + const int numNodes, + const float isolverdt, + __global float4 * g_vertexPositions, + __global float4 * g_vertexPreviousPositions, + __global int * g_vertexClothIndices, + __global float * g_clothDampingFactor, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexForces GUID_ARG) + +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 previousPosition = g_vertexPreviousPositions[nodeID]; + float4 velocity = g_vertexVelocities[nodeID]; + int clothIndex = g_vertexClothIndices[nodeID]; + float dampingFactor = g_clothDampingFactor[clothIndex]; + float velocityCoefficient = (1.f - dampingFactor); + + float4 difference = position - previousPosition; + + velocity = difference*velocityCoefficient*isolverdt; + + g_vertexVelocities[nodeID] = velocity; + g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl new file mode 100644 index 000000000..97e708bc3 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl @@ -0,0 +1,28 @@ + +MSTRINGIFY( + + + + +__kernel void +UpdatePositionsFromVelocitiesKernel( + const int numNodes, + const float solverSDT, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexPreviousPositions, + __global float4 * g_vertexCurrentPosition GUID_ARG) +{ + int vertexID = get_global_id(0); + if( vertexID < numNodes ) + { + float4 previousPosition = g_vertexPreviousPositions[vertexID]; + float4 velocity = g_vertexVelocities[vertexID]; + + float4 newPosition = previousPosition + velocity*solverSDT; + + g_vertexCurrentPosition[vertexID] = newPosition; + g_vertexPreviousPositions[vertexID] = newPosition; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl new file mode 100644 index 000000000..a618d69cc --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl @@ -0,0 +1,45 @@ +MSTRINGIFY( + +__kernel void +VSolveLinksKernel( + int startLink, + int numLinks, + float kst, + __global int2 * g_linksVertexIndices, + __global float * g_linksLengthRatio, + __global float4 * g_linksCurrentLength, + __global float * g_vertexInverseMass, + __global float4 * g_vertexVelocity GUID_ARG) +{ + int linkID = get_global_id(0) + startLink; + if( get_global_id(0) < numLinks ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float linkLengthRatio = g_linksLengthRatio[linkID]; + float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz; + + float3 vertexVelocity0 = g_vertexVelocity[node0].xyz; + float3 vertexVelocity1 = g_vertexVelocity[node1].xyz; + + float vertexInverseMass0 = g_vertexInverseMass[node0]; + float vertexInverseMass1 = g_vertexInverseMass[node1]; + + float3 nodeDifference = vertexVelocity0 - vertexVelocity1; + float dotResult = dot(linkCurrentLength, nodeDifference); + float j = -dotResult*linkLengthRatio*kst; + + float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0); + float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1); + + vertexVelocity0 += velocityChange0; + vertexVelocity1 -= velocityChange1; + + g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f); + g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h index e71ae8778..8fa58cd16 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h @@ -17,7 +17,16 @@ subject to the following restrictions: #define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H // OpenCL support -#include + +#ifdef USE_MINICL + #include "MiniCL/cl.h" +#else //USE_MINICL + #ifdef __APPLE__ + #include + #else + #include + #endif //__APPLE__ +#endif//USE_MINICL #ifndef SAFE_RELEASE #define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } @@ -25,22 +34,25 @@ subject to the following restrictions: template class btOpenCLBuffer { -protected: - cl::CommandQueue m_queue; - btAlignedObjectArray< ElementType > * m_CPUBuffer; - cl::Buffer m_buffer; +public: + cl_command_queue m_cqCommandQue; + cl_context m_clContext; + cl_mem m_buffer; + + + + btAlignedObjectArray< ElementType > * m_CPUBuffer; + int m_gpuSize; bool m_onGPU; - bool m_readOnlyOnGPU; - bool m_allocated; - // TODO: Remove this once C++ bindings are fixed - cl::Context context; - bool createBuffer( cl::Buffer *preexistingBuffer = 0) + + bool createBuffer( cl_mem* preexistingBuffer = 0) { + cl_int err; @@ -49,12 +61,11 @@ protected: m_buffer = *preexistingBuffer; } else { - m_buffer = cl::Buffer( - context, - m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE, - m_CPUBuffer->size() * sizeof(ElementType), - 0, - &err); + + cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE; + + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err); if( err != CL_SUCCESS ) { btAssert( "Buffer::Buffer(m_buffer)"); @@ -62,35 +73,31 @@ protected: } m_gpuSize = m_CPUBuffer->size(); + return true; } public: - btOpenCLBuffer( - cl::CommandQueue queue, - btAlignedObjectArray< ElementType > *CPUBuffer, - bool readOnly) : - m_queue(queue), + btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly) + :m_cqCommandQue(commandQue), + m_clContext(ctx), m_CPUBuffer(CPUBuffer), m_gpuSize(0), m_onGPU(false), m_readOnlyOnGPU(readOnly), m_allocated(false) { - context = m_queue.getInfo(); } ~btOpenCLBuffer() { } - cl::Buffer getBuffer() - { - return m_buffer; - } bool moveToGPU() { + + cl_int err; if( (m_CPUBuffer->size() != m_gpuSize) ) @@ -107,12 +114,12 @@ public: m_allocated = true; } - err = m_queue.enqueueWriteBuffer( - m_buffer, + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer, CL_FALSE, 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" ); @@ -122,20 +129,23 @@ public: } return true; + } bool moveFromGPU() { + cl_int err; if (m_CPUBuffer->size() > 0) { if (m_onGPU && !m_readOnlyOnGPU) { - err = m_queue.enqueueReadBuffer( + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + err = clEnqueueReadBuffer(m_cqCommandQue, m_buffer, CL_TRUE, 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { @@ -151,16 +161,17 @@ public: bool copyFromGPU() { + cl_int err; + size_t size = m_CPUBuffer->size() * sizeof(ElementType); if (m_CPUBuffer->size() > 0) { if (m_onGPU && !m_readOnlyOnGPU) { - err = m_queue.enqueueReadBuffer( + err = clEnqueueReadBuffer(m_cqCommandQue, m_buffer, CL_TRUE, - 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + 0,size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h index 6c270c5b5..cef924f6f 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h @@ -13,8 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H @@ -25,7 +25,9 @@ class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData { public: bool m_onGPU; - cl::CommandQueue m_queue; + + cl_command_queue m_cqCommandQue; + btOpenCLBuffer m_clLinks; btOpenCLBuffer m_clLinkStrength; @@ -36,6 +38,24 @@ public: btOpenCLBuffer m_clLinksRestLength; btOpenCLBuffer m_clLinksMaterialLinearStiffnessCoefficient; + struct BatchPair + { + int start; + int length; + + BatchPair() : + start(0), + length(0) + { + } + + BatchPair( int s, int l ) : + start( s ), + length( l ) + { + } + }; + /** * Link addressing information for each cloth. * Allows link locations to be computed independently of data batching. @@ -45,9 +65,9 @@ public: /** * Start and length values for computation batches over link data. */ - btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; + btAlignedObjectArray< BatchPair > m_batchStartLengths; - btSoftBodyLinkDataOpenCL(cl::CommandQueue queue); + btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx); virtual ~btSoftBodyLinkDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h index 4bc9215ea..e1094e38a 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h @@ -14,8 +14,8 @@ subject to the following restrictions: */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H @@ -26,7 +26,7 @@ class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData { public: bool m_onGPU; - cl::CommandQueue m_queue; + cl_command_queue m_queue; btOpenCLBuffer m_clVertexIndices; btOpenCLBuffer m_clArea; @@ -41,10 +41,20 @@ public: /** * Start and length values for computation batches over link data. */ - btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; + struct btSomePair + { + btSomePair() {} + btSomePair(int f,int s) + :first(f),second(s) + { + } + int first; + int second; + }; + btAlignedObjectArray< btSomePair > m_batchStartLengths; public: - btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ); + btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx ); virtual ~btSoftBodyTriangleDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h index 8f65c9de4..24997e726 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h @@ -13,8 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H #define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H @@ -24,7 +24,7 @@ class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData { protected: bool m_onGPU; - cl::CommandQueue m_queue; + cl_command_queue m_queue; public: btOpenCLBuffer m_clClothIdentifier; @@ -37,7 +37,7 @@ public: btOpenCLBuffer m_clVertexArea; btOpenCLBuffer m_clVertexTriangleCount; public: - btSoftBodyVertexDataOpenCL( cl::CommandQueue queue); + btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx); virtual ~btSoftBodyVertexDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp index 31b52f679..8198a12d3 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp @@ -16,10 +16,18 @@ subject to the following restrictions: #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" #include "vectormath/vmInclude.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolver_OpenCL.h" -#include "BulletSoftBody/VertexBuffers/btSoftBodySolverVertexBuffer.h" +#include //@todo: remove the debugging printf at some stage +#include "btSoftBodySolver_OpenCL.h" +#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" #include "BulletSoftBody/btSoftBody.h" + static const size_t workGroupSize = 128; + + +//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it + +#if (0)//CL_VERSION_1_1 == 1) + //OpenCL 1.1 kernels use float3 #define MSTRINGIFY(A) #A static char* PrepareLinksCLString = #include "OpenCLC/PrepareLinks.cl" @@ -41,19 +49,43 @@ static char* UpdateNormalsCLString = #include "OpenCLC/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC/VSolveLinks.cl" +#else +////OpenCL 1.0 kernels don't use float3 +#define MSTRINGIFY(A) #A +static char* PrepareLinksCLString = +#include "OpenCLC10/PrepareLinks.cl" +static char* UpdatePositionsFromVelocitiesCLString = +#include "OpenCLC10/UpdatePositionsFromVelocities.cl" +static char* SolvePositionsCLString = +#include "OpenCLC10/SolvePositions.cl" +static char* UpdateNodesCLString = +#include "OpenCLC10/UpdateNodes.cl" +static char* UpdatePositionsCLString = +#include "OpenCLC10/UpdatePositions.cl" +static char* UpdateConstantsCLString = +#include "OpenCLC10/UpdateConstants.cl" +static char* IntegrateCLString = +#include "OpenCLC10/Integrate.cl" +static char* ApplyForcesCLString = +#include "OpenCLC10/ApplyForces.cl" +static char* UpdateNormalsCLString = +#include "OpenCLC10/UpdateNormals.cl" +static char* VSolveLinksCLString = +#include "OpenCLC10/VSolveLinks.cl" +#endif //CL_VERSION_1_1 -btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl::CommandQueue queue) : +btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) : m_queue(queue), - m_clClothIdentifier( queue, &m_clothIdentifier, false ), - m_clVertexPosition( queue, &m_vertexPosition, false ), - m_clVertexPreviousPosition( queue, &m_vertexPreviousPosition, false ), - m_clVertexVelocity( queue, &m_vertexVelocity, false ), - m_clVertexForceAccumulator( queue, &m_vertexForceAccumulator, false ), - m_clVertexNormal( queue, &m_vertexNormal, false ), - m_clVertexInverseMass( queue, &m_vertexInverseMass, false ), - m_clVertexArea( queue, &m_vertexArea, false ), - m_clVertexTriangleCount( queue, &m_vertexTriangleCount, false ) + m_clClothIdentifier( queue, ctx, &m_clothIdentifier, false ), + m_clVertexPosition( queue, ctx, &m_vertexPosition, false ), + m_clVertexPreviousPosition( queue, ctx, &m_vertexPreviousPosition, false ), + m_clVertexVelocity( queue, ctx, &m_vertexVelocity, false ), + m_clVertexForceAccumulator( queue, ctx, &m_vertexForceAccumulator, false ), + m_clVertexNormal( queue, ctx, &m_vertexNormal, false ), + m_clVertexInverseMass( queue, ctx, &m_vertexInverseMass, false ), + m_clVertexArea( queue, ctx, &m_vertexArea, false ), + m_clVertexTriangleCount( queue, ctx, &m_vertexTriangleCount, false ) { } @@ -108,16 +140,16 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator() -btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl::CommandQueue queue) : - m_queue(queue), - m_clLinks( queue, &m_links, false ), - m_clLinkStrength( queue, &m_linkStrength, false ), - m_clLinksMassLSC( queue, &m_linksMassLSC, false ), - m_clLinksRestLengthSquared( queue, &m_linksRestLengthSquared, false ), - m_clLinksCLength( queue, &m_linksCLength, false ), - m_clLinksLengthRatio( queue, &m_linksLengthRatio, false ), - m_clLinksRestLength( queue, &m_linksRestLength, false ), - m_clLinksMaterialLinearStiffnessCoefficient( queue, &m_linksMaterialLinearStiffnessCoefficient, false ) +btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx) +:m_cqCommandQue(queue), + m_clLinks( queue, ctx, &m_links, false ), + m_clLinkStrength( queue, ctx, &m_linkStrength, false ), + m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ), + m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ), + m_clLinksCLength( queue, ctx, &m_linksCLength, false ), + m_clLinksLengthRatio( queue, ctx, &m_linksLengthRatio, false ), + m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ), + m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false ) { } @@ -272,13 +304,13 @@ void btSoftBodyLinkDataOpenCL::generateBatches() if( m_batchStartLengths.size() > 0 ) { m_batchStartLengths.resize(batchCounts.size()); - m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); + m_batchStartLengths[0] = BatchPair(0, 0); int sum = 0; for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex ) { - m_batchStartLengths[batchIndex].first = sum; - m_batchStartLengths[batchIndex].second = batchCounts[batchIndex]; + m_batchStartLengths[batchIndex].start = sum; + m_batchStartLengths[batchIndex].length = batchCounts[batchIndex]; sum += batchCounts[batchIndex]; } } @@ -313,7 +345,7 @@ void btSoftBodyLinkDataOpenCL::generateBatches() // next element in that batch, incrementing the batch counter // afterwards int batch = batchValues[linkIndex]; - int newLocation = m_batchStartLengths[batch].first + batchCounts[batch]; + int newLocation = m_batchStartLengths[batch].start + batchCounts[batch]; batchCounts[batch] = batchCounts[batch] + 1; m_links[newLocation] = m_links_Backup[linkLocation]; @@ -336,11 +368,11 @@ void btSoftBodyLinkDataOpenCL::generateBatches() -btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ) : +btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl_command_queue queue , cl_context ctx) : m_queue( queue ), - m_clVertexIndices( queue, &m_vertexIndices, false ), - m_clArea( queue, &m_area, false ), - m_clNormal( queue, &m_normal, false ) + m_clVertexIndices( queue, ctx, &m_vertexIndices, false ), + m_clArea( queue, ctx, &m_area, false ), + m_clNormal( queue, ctx, &m_normal, false ) { } @@ -493,7 +525,7 @@ void btSoftBodyTriangleDataOpenCL::generateBatches() m_batchStartLengths.resize(batchCounts.size()); - m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); + m_batchStartLengths[0] = btSomePair(0,0); int sum = 0; @@ -547,18 +579,19 @@ void btSoftBodyTriangleDataOpenCL::generateBatches() -btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(const cl::CommandQueue &queue) : - m_linkData(queue), - m_vertexData(queue), - m_triangleData(queue), - m_clPerClothAcceleration(queue, &m_perClothAcceleration, true ), - m_clPerClothWindVelocity(queue, &m_perClothWindVelocity, true ), - m_clPerClothDampingFactor(queue, &m_perClothDampingFactor, true ), - m_clPerClothVelocityCorrectionCoefficient(queue, &m_perClothVelocityCorrectionCoefficient, true ), - m_clPerClothLiftFactor(queue, &m_perClothLiftFactor, true ), - m_clPerClothDragFactor(queue, &m_perClothDragFactor, true ), - m_clPerClothMediumDensity(queue, &m_perClothMediumDensity, true ), - m_queue( queue ) +btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) : + m_linkData(queue, ctx), + m_vertexData(queue, ctx), + m_triangleData(queue, ctx), + m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ), + m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ), + m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ), + m_clPerClothVelocityCorrectionCoefficient(queue, ctx,&m_perClothVelocityCorrectionCoefficient, true ), + m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ), + m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ), + m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ), + m_cqCommandQue( queue ), + m_cxMainContext(ctx) { // Initial we will clearly need to update solver constants // For now this is global for the cloths linked with this solver - we should probably make this body specific @@ -590,7 +623,7 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof using Vectormath::Aos::Point3; // Create SoftBody that will store the information within the solver - btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + btOpenCLAcceleratedSoftBodyInterface *newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody ); m_softBodySet.push_back( newSoftBody ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); @@ -712,51 +745,58 @@ bool btOpenCLSoftBodySolver::checkInitialized() void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices ) { - resetNormalsAndAreasKernel.kernel.setArg(0, numVertices); - resetNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexNormal.getBuffer()); - resetNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexArea.getBuffer()); + cl_int ciErrNum; + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 0, sizeof(numVertices), (void*)&numVertices); //oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS); + size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 ); - int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(resetNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" ); + btAssert( 0 && "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" ); } + } void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) { - normalizeNormalsAndAreasKernel.kernel.setArg(0, numVertices); - normalizeNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexTriangleCount.getBuffer()); - normalizeNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexNormal.getBuffer()); - normalizeNormalsAndAreasKernel.kernel.setArg(3, m_vertexData.m_clVertexArea.getBuffer()); - int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(normalizeNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 1, sizeof(cl_mem), &m_vertexData.m_clVertexTriangleCount.m_buffer); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); } + } void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) { - updateSoftBodiesKernel.kernel.setArg(0, firstTriangle); - updateSoftBodiesKernel.kernel.setArg(1, numTriangles); - updateSoftBodiesKernel.kernel.setArg(2, m_triangleData.m_clVertexIndices.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(6, m_triangleData.m_clNormal.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(7, m_triangleData.m_clArea.getBuffer()); + cl_int ciErrNum; + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 1, sizeof(int), &numTriangles); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 2, sizeof(cl_mem), &m_triangleData.m_clVertexIndices.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 6, sizeof(cl_mem), &m_triangleData.m_clNormal.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 7, sizeof(cl_mem), &m_triangleData.m_clArea.m_buffer); - int numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateSoftBodiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + size_t numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, updateSoftBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); } + } void btOpenCLSoftBodySolver::updateSoftBodies() @@ -807,6 +847,7 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath void btOpenCLSoftBodySolver::applyForces( float solverdt ) { + // Ensure data is on accelerator m_vertexData.moveToAccelerator(); m_clPerClothAcceleration.moveToGPU(); @@ -815,85 +856,30 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) m_clPerClothMediumDensity.moveToGPU(); m_clPerClothWindVelocity.moveToGPU(); - cl_int err; - err = applyForcesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - if( err != CL_SUCCESS ) + cl_int ciErrNum ; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(applyForcesKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(applyForcesKernel, 1, sizeof(float), &solverdt); + float fl = FLT_EPSILON; + ciErrNum = clSetKernelArg(applyForcesKernel, 2, sizeof(float), &fl); + ciErrNum = clSetKernelArg(applyForcesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 7, sizeof(cl_mem), &m_clPerClothLiftFactor.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 8 ,sizeof(cl_mem), &m_clPerClothDragFactor.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 9, sizeof(cl_mem), &m_clPerClothWindVelocity.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,10, sizeof(cl_mem), &m_clPerClothAcceleration.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,applyForcesKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(1, solverdt); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(2, FLT_EPSILON); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(3, m_vertexData.m_clClothIdentifier.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(6, m_vertexData.m_clVertexInverseMass.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(7, m_clPerClothLiftFactor.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(8, m_clPerClothDragFactor.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(9, m_clPerClothWindVelocity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(10, m_clPerClothAcceleration.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(11, m_clPerClothMediumDensity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(12, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(13, m_vertexData.m_clVertexVelocity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(applyForcesKernel)"); } - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - - err = m_queue.enqueueNDRangeKernel(applyForcesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } } /** @@ -901,22 +887,26 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) */ void btOpenCLSoftBodySolver::integrate( float solverdt ) { + + // Ensure data is on accelerator m_vertexData.moveToAccelerator(); - integrateKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - integrateKernel.kernel.setArg(1, solverdt); - integrateKernel.kernel.setArg(2, m_vertexData.m_clVertexInverseMass.getBuffer()); - integrateKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer()); - integrateKernel.kernel.setArg(4, m_vertexData.m_clVertexVelocity.getBuffer()); - integrateKernel.kernel.setArg(5, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - integrateKernel.kernel.setArg(6, m_vertexData.m_clVertexForceAccumulator.getBuffer()); + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(integrateKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(integrateKernel, 1, sizeof(float), &solverdt); + ciErrNum = clSetKernelArg(integrateKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(integrateKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,integrateKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(integrateKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(integrateKernel)"); } } @@ -935,6 +925,7 @@ float btOpenCLSoftBodySolver::computeTriangleArea( void btOpenCLSoftBodySolver::updateConstants( float timeStep ) { + using namespace Vectormath::Aos; if( m_updateSolverConstants ) @@ -959,10 +950,12 @@ void btOpenCLSoftBodySolver::updateConstants( float timeStep ) m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; } } + } void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) { + using Vectormath::Aos::Vector3; using Vectormath::Aos::Point3; using Vectormath::Aos::lengthSqr; @@ -988,33 +981,34 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) - // Prepare anchors - /*for(i=0,ni=m_anchors.size();igetWorldTransform().getBasis()*a.m_local; - a.m_c0 = ImpulseMatrix( m_sst.sdt, - a.m_node->m_im, - a.m_body->getInvMass(), - a.m_body->getInvInertiaTensorWorld(), - ra); - a.m_c1 = ra; - a.m_c2 = m_sst.sdt*a.m_node->m_im; - a.m_body->activate(); - }*/ + for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) + { + int startLink = m_linkData.m_batchStartLengths[i].start; + int numLinks = m_linkData.m_batchStartLengths[i].length; - // Really want to combine these into a single loop, don't we? No update in the middle? - - // TODO: Double check what kst is meant to mean - passed in as 1 in the bullet code + solveLinksForVelocity( startLink, numLinks, kst ); + } + } + // Compute new positions from velocity + // Also update the previous position so that our position computation is now based on the new position from the velocity solution + // rather than based directly on the original positions + if( m_numberOfVelocityIterations > 0 ) + { + updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt ); + } else { + updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + } // Solve drift for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) { for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) { - int startLink = m_linkData.m_batchStartLengths[i].first; - int numLinks = m_linkData.m_batchStartLengths[i].second; + int startLink = m_linkData.m_batchStartLengths[i].start; + int numLinks = m_linkData.m_batchStartLengths[i].length; solveLinksForPosition( startLink, numLinks, kst, ti ); } @@ -1023,6 +1017,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + } @@ -1030,96 +1025,136 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) // Kernel dispatches void btOpenCLSoftBodySolver::prepareLinks() { - prepareLinksKernel.kernel.setArg(0, m_linkData.getNumLinks()); - prepareLinksKernel.kernel.setArg(1, m_linkData.m_clLinks.getBuffer()); - prepareLinksKernel.kernel.setArg(2, m_linkData.m_clLinksMassLSC.getBuffer()); - prepareLinksKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - prepareLinksKernel.kernel.setArg(4, m_linkData.m_clLinksLengthRatio.getBuffer()); - prepareLinksKernel.kernel.setArg(5, m_linkData.m_clLinksCLength.getBuffer()); - int numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(prepareLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numLinks = m_linkData.getNumLinks(); + ciErrNum = clSetKernelArg(prepareLinksKernel,0, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(prepareLinksKernel,1, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,2, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer); + + size_t numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,prepareLinksKernel, 1 , NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(prepareLinksKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(prepareLinksKernel)"); } + } void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) { - updatePositionsFromVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updatePositionsFromVelocitiesKernel.kernel.setArg(1, solverdt); - updatePositionsFromVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexVelocity.getBuffer()); - updatePositionsFromVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updatePositionsFromVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clVertexPosition.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,1, sizeof(float), &solverdt); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,4, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updatePositionsFromVelocitiesKernel, 1, NULL, &numWorkItems,&workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)"); } + } void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti ) { - solvePositionsFromLinksKernel.kernel.setArg(0, startLink); - solvePositionsFromLinksKernel.kernel.setArg(1, numLinks); - solvePositionsFromLinksKernel.kernel.setArg(2, kst); - solvePositionsFromLinksKernel.kernel.setArg(3, ti); - solvePositionsFromLinksKernel.kernel.setArg(4, m_linkData.m_clLinks.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(5, m_linkData.m_clLinksMassLSC.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(6, m_linkData.m_clLinksRestLengthSquared.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(7, m_vertexData.m_clVertexInverseMass.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(8, m_vertexData.m_clVertexPosition.getBuffer()); - int numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(solvePositionsFromLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,0, sizeof(int), &startLink); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,1, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,2, sizeof(float), &kst); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,3, sizeof(float), &ti); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + + size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0); + if( ciErrNum!= CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(solvePositionsFromLinksKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(solvePositionsFromLinksKernel)"); } + } // solveLinksForPosition +void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst ) +{ + + cl_int ciErrNum; + ciErrNum = clSetKernelArg(vSolveLinksKernel, 0, sizeof(int), &startLink); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 1, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 2, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 3, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 4, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + + size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,vSolveLinksKernel,1,NULL,&numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) + { + btAssert( 0 && "enqueueNDRangeKernel(vSolveLinksKernel)"); + } + +} + void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) { - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(1, isolverdt); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(5, m_clPerClothVelocityCorrectionCoefficient.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(6, m_clPerClothDampingFactor.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexVelocity.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(8, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 1, sizeof(float), &isolverdt); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 5, sizeof(cl_mem), &m_clPerClothVelocityCorrectionCoefficient.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 6, sizeof(cl_mem), &m_clPerClothDampingFactor.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 8, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)"); } + } // updateVelocitiesFromPositionsWithVelocities void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) { - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(1, isolverdt); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(5, m_clPerClothDampingFactor.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(6, m_vertexData.m_clVertexVelocity.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, sizeof(float), &isolverdt); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 4, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 6, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 7, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)"); } + } // updateVelocitiesFromPositionsWithoutVelocities // End kernel dispatches @@ -1133,15 +1168,20 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons // and use them together on a single kernel call if possible by setting up a // per-cloth target buffer array for the copy kernel. - btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + btOpenCLAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) { - const int firstVertex = currentCloth->getFirstVertex(); - const int lastVertex = firstVertex + currentCloth->getNumVertices(); const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); float *basePointer = cpuVertexBuffer->getBasePointer(); + m_vertexData.m_clVertexPosition.copyFromGPU(); + m_vertexData.m_clVertexNormal.copyFromGPU(); + if( vertexBuffer->hasVertexPositions() ) { const int vertexOffset = cpuVertexBuffer->getVertexOffset(); @@ -1173,43 +1213,46 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons } } } + } // btCPUSoftBodySolver::outputToVertexBuffers -btOpenCLSoftBodySolver::KernelDesc btOpenCLSoftBodySolver::compileCLKernelFromString( const char *shaderString, const char *shaderName ) +cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelSource, const char* kernelName ) { - cl_int err; + printf("compiling kernalName: %s ",kernelName); + cl_kernel kernel; + cl_int ciErrNum; + size_t program_length = strlen(kernelSource); - context = m_queue.getInfo(); - device = m_queue.getInfo(); - std::vector< cl::Device > devices; - devices.push_back( device ); + cl_program m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&kernelSource, &program_length, &ciErrNum); +// oclCHECKERROR(ciErrNum, CL_SUCCESS); + + // Build the program with 'mad' Optimization option +#ifdef MAC + char* flags = "-cl-mad-enable -DMAC -DGUID_ARG"; +#else + const char* flags = "-DGUID_ARG="; +#endif + ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, flags, NULL, NULL); + if (ciErrNum != CL_SUCCESS) + { + printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__); + btAssert(0); + exit(0); + } + + // Create the kernel + kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum); + if (ciErrNum != CL_SUCCESS) + { + printf("Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__); + btAssert(0); + exit(0); + } - cl::Program::Sources source(1, std::make_pair(shaderString, strlen(shaderString) + 1)); - cl::Program program(context, source, &err); - if( err != CL_SUCCESS ) - { - btAssert( "program" ); - } - err = program.build(devices); - if (err != CL_SUCCESS) { - //std::string str; - //str = program.getBuildInfo(devices[0]); - //std::cout << "Program Info: " << str; - if( err != CL_SUCCESS ) - { - btAssert( "Program::build()" ); - } - } - cl::Kernel kernel(program, shaderName, &err); - if( err != CL_SUCCESS ) - { - btAssert( "kernel" ); - } + printf("ready. \n"); + return kernel; - KernelDesc descriptor; - descriptor.kernel = kernel; - return descriptor; } void btOpenCLSoftBodySolver::predictMotion( float timeStep ) @@ -1234,11 +1277,11 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep ) -btOpenCLSoftBodySolver::btAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +btOpenCLAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) { for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; if( softBodyInterface->getSoftBody() == softBody ) return softBodyInterface; } @@ -1273,4 +1316,4 @@ bool btOpenCLSoftBodySolver::buildShaders() m_shadersInitialized = true; return returnVal; -} \ No newline at end of file +} diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h index 84d208488..b023d475c 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h @@ -16,204 +16,165 @@ subject to the following restrictions: #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H #define BT_SOFT_BODY_SOLVER_OPENCL_H +#include "stddef.h" //for size_t #include "vectormath/vmInclude.h" + #include "BulletSoftBody/btSoftBodySolvers.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h" +#include "btSoftBodySolverBuffer_OpenCL.h" +#include "btSoftBodySolverLinkData_OpenCL.h" +#include "btSoftBodySolverVertexData_OpenCL.h" +#include "btSoftBodySolverTriangleData_OpenCL.h" + + +/** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ +class btOpenCLAcceleratedSoftBodyInterface +{ +protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + +public: + btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + // TODO: All of these set functions will have to do checks and + // update the world because restructuring of the arrays will be necessary + // Reasonable use of "friend"? + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + +}; class btOpenCLSoftBodySolver : public btSoftBodySolver { private: - /** - * SoftBody class to maintain information about a soft body instance - * within a solver. - * This data addresses the main solver arrays. - */ - class btAcceleratedSoftBodyInterface - { - protected: - /** Current number of vertices that are part of this cloth */ - int m_numVertices; - /** Maximum number of vertices allocated to be part of this cloth */ - int m_maxVertices; - /** Current number of triangles that are part of this cloth */ - int m_numTriangles; - /** Maximum number of triangles allocated to be part of this cloth */ - int m_maxTriangles; - /** Index of first vertex in the world allocated to this cloth */ - int m_firstVertex; - /** Index of first triangle in the world allocated to this cloth */ - int m_firstTriangle; - /** Index of first link in the world allocated to this cloth */ - int m_firstLink; - /** Maximum number of links allocated to this cloth */ - int m_maxLinks; - /** Current number of links allocated to this cloth */ - int m_numLinks; - - /** The actual soft body this data represents */ - btSoftBody *m_softBody; - - - public: - btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : - m_softBody( softBody ) - { - m_numVertices = 0; - m_maxVertices = 0; - m_numTriangles = 0; - m_maxTriangles = 0; - m_firstVertex = 0; - m_firstTriangle = 0; - m_firstLink = 0; - m_maxLinks = 0; - m_numLinks = 0; - } - int getNumVertices() - { - return m_numVertices; - } - - int getNumTriangles() - { - return m_numTriangles; - } - - int getMaxVertices() - { - return m_maxVertices; - } - - int getMaxTriangles() - { - return m_maxTriangles; - } - - int getFirstVertex() - { - return m_firstVertex; - } - - int getFirstTriangle() - { - return m_firstTriangle; - } - - // TODO: All of these set functions will have to do checks and - // update the world because restructuring of the arrays will be necessary - // Reasonable use of "friend"? - void setNumVertices( int numVertices ) - { - m_numVertices = numVertices; - } - - void setNumTriangles( int numTriangles ) - { - m_numTriangles = numTriangles; - } - - void setMaxVertices( int maxVertices ) - { - m_maxVertices = maxVertices; - } - - void setMaxTriangles( int maxTriangles ) - { - m_maxTriangles = maxTriangles; - } - - void setFirstVertex( int firstVertex ) - { - m_firstVertex = firstVertex; - } - - void setFirstTriangle( int firstTriangle ) - { - m_firstTriangle = firstTriangle; - } - - void setMaxLinks( int maxLinks ) - { - m_maxLinks = maxLinks; - } - - void setNumLinks( int numLinks ) - { - m_numLinks = numLinks; - } - - void setFirstLink( int firstLink ) - { - m_firstLink = firstLink; - } - - int getMaxLinks() - { - return m_maxLinks; - } - - int getNumLinks() - { - return m_numLinks; - } - - int getFirstLink() - { - return m_firstLink; - } - - btSoftBody* getSoftBody() - { - return m_softBody; - } - - #if 0 - void setAcceleration( Vectormath::Aos::Vector3 acceleration ) - { - m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); - } - - void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) - { - m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); - } - - /** - * Set the density of the air in which the cloth is situated. - */ - void setAirDensity( btScalar density ) - { - m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); - } - - /** - * Add a collision object to this soft body. - */ - void addCollisionObject( btCollisionObject *collisionObject ) - { - m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); - } - #endif - }; - - - class KernelDesc - { - protected: - public: - cl::Kernel kernel; - - KernelDesc() - { - } - - virtual ~KernelDesc() - { - } - }; btSoftBodyLinkDataOpenCL m_linkData; btSoftBodyVertexDataOpenCL m_vertexData; @@ -228,7 +189,7 @@ private: * Cloths owned by this solver. * Only our cloths are in this array. */ - btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet; /** Acceleration value to be applied to all non-static vertices in the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver. @@ -262,37 +223,34 @@ private: btAlignedObjectArray< float > m_perClothMediumDensity; btOpenCLBuffer m_clPerClothMediumDensity; - KernelDesc prepareLinksKernel; - KernelDesc solvePositionsFromLinksKernel; - KernelDesc updateConstantsKernel; - KernelDesc integrateKernel; - KernelDesc addVelocityKernel; - KernelDesc updatePositionsFromVelocitiesKernel; - KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel; - KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel; - KernelDesc vSolveLinksKernel; - KernelDesc resetNormalsAndAreasKernel; - KernelDesc normalizeNormalsAndAreasKernel; - KernelDesc updateSoftBodiesKernel; - KernelDesc outputToVertexArrayWithNormalsKernel; - KernelDesc outputToVertexArrayWithoutNormalsKernel; + cl_kernel prepareLinksKernel; + cl_kernel solvePositionsFromLinksKernel; + cl_kernel updateConstantsKernel; + cl_kernel integrateKernel; + cl_kernel addVelocityKernel; + cl_kernel updatePositionsFromVelocitiesKernel; + cl_kernel updateVelocitiesFromPositionsWithoutVelocitiesKernel; + cl_kernel updateVelocitiesFromPositionsWithVelocitiesKernel; + cl_kernel vSolveLinksKernel; + cl_kernel resetNormalsAndAreasKernel; + cl_kernel normalizeNormalsAndAreasKernel; + cl_kernel updateSoftBodiesKernel; + cl_kernel outputToVertexArrayWithNormalsKernel; + cl_kernel outputToVertexArrayWithoutNormalsKernel; - KernelDesc outputToVertexArrayKernel; - KernelDesc applyForcesKernel; - KernelDesc collideSphereKernel; - KernelDesc collideCylinderKernel; + cl_kernel outputToVertexArrayKernel; + cl_kernel applyForcesKernel; + cl_kernel collideSphereKernel; + cl_kernel collideCylinderKernel; - static const int workGroupSize = 128; - - cl::CommandQueue m_queue; - cl::Context context; - cl::Device device; + cl_command_queue m_cqCommandQue; + cl_context m_cxMainContext; /** - * Compile a compute shader kernel from a string and return the appropriate KernelDesc object. + * Compile a compute shader kernel from a string and return the appropriate cl_kernel object. */ - KernelDesc compileCLKernelFromString( const char *shaderString, const char *shaderName ); + cl_kernel compileCLKernelFromString( const char *shaderString, const char *shaderName ); bool buildShaders(); @@ -306,7 +264,7 @@ private: void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); - btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); virtual void applyForces( float solverdt ); @@ -342,7 +300,7 @@ private: public: - btOpenCLSoftBodySolver(const cl::CommandQueue &queue); + btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx); virtual ~btOpenCLSoftBodySolver(); @@ -371,4 +329,4 @@ public: virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer ); }; // btOpenCLSoftBodySolver -#endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H \ No newline at end of file +#endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H diff --git a/src/BulletSoftBody/btSoftBodySolvers.h b/src/BulletSoftBody/btSoftBodySolvers.h index 24a742e49..79a55f706 100644 --- a/src/BulletSoftBody/btSoftBodySolvers.h +++ b/src/BulletSoftBody/btSoftBodySolvers.h @@ -23,7 +23,6 @@ class btSoftBodyTriangleData; class btSoftBodyLinkData; class btSoftBodyVertexData; class btVertexBufferDescriptor; -class btAcceleratedSoftBodyInterface; class btCollisionObject; class btSoftBody; diff --git a/src/MiniCL/MiniCL.cpp b/src/MiniCL/MiniCL.cpp index 9cb1ca331..1e0823a1c 100644 --- a/src/MiniCL/MiniCL.cpp +++ b/src/MiniCL/MiniCL.cpp @@ -30,6 +30,7 @@ subject to the following restrictions: //#define DEBUG_MINICL_KERNELS 1 static char* spPlatformID = "MiniCL, SCEA"; +static char* spDriverVersion= "1.0"; CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs( cl_uint num_entries, @@ -91,23 +92,24 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( cl_device_info param_name , size_t param_value_size , void * param_value , - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { switch (param_name) { case CL_DEVICE_NAME: { - char deviceName[] = "CPU"; + char deviceName[] = "MiniCL CPU"; unsigned int nameLen = strlen(deviceName)+1; btAssert(param_value_size>strlen(deviceName)); if (nameLen < param_value_size) { - const char* cpuName = "CPU"; + const char* cpuName = "MiniCL CPU"; sprintf((char*)param_value,"%s",cpuName); } else { printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); + return CL_INVALID_VALUE; } break; } @@ -120,6 +122,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); + return CL_INVALID_VALUE; } break; } @@ -132,6 +135,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + return CL_INVALID_VALUE; } break; @@ -149,6 +153,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + return CL_INVALID_VALUE; } break; } @@ -158,6 +163,142 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( *clock_frequency = 3*1024; break; } + + case CL_DEVICE_VENDOR : + { + if(param_value_size < (strlen(spPlatformID) + 1)) + { + return CL_INVALID_VALUE; + } + strcpy((char*)param_value, spPlatformID); + if(param_value_size_ret != NULL) + { + *param_value_size_ret = strlen(spPlatformID) + 1; + } + break; + } + case CL_DRIVER_VERSION: + { + if(param_value_size < (strlen(spDriverVersion) + 1)) + { + return CL_INVALID_VALUE; + } + strcpy((char*)param_value, spDriverVersion); + if(param_value_size_ret != NULL) + { + *param_value_size_ret = strlen(spDriverVersion) + 1; + } + + break; + } + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: + { + cl_uint* maxDimensions = (cl_uint*)param_value; + *maxDimensions = 1; + break; + } + case CL_DEVICE_MAX_WORK_GROUP_SIZE: + { + cl_uint* maxWorkGroupSize = (cl_uint*)param_value; + *maxWorkGroupSize = 128;//1; + break; + } + case CL_DEVICE_ADDRESS_BITS: + { + cl_uint* addressBits = (cl_uint*)param_value; + *addressBits= 32; //@todo: should this be 64 for 64bit builds? + break; + } + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: + { + cl_ulong* maxMemAlloc = (cl_ulong*)param_value; + *maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ? + break; + } + case CL_DEVICE_GLOBAL_MEM_SIZE: + { + cl_ulong* maxMemAlloc = (cl_ulong*)param_value; + *maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ? + break; + } + + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: + { + cl_bool* error_correction_support = (cl_bool*)param_value; + *error_correction_support = CL_FALSE; + break; + } + + case CL_DEVICE_LOCAL_MEM_TYPE: + { + cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value; + *local_mem_type = CL_GLOBAL; + break; + } + case CL_DEVICE_LOCAL_MEM_SIZE: + { + cl_ulong* localmem = (cl_ulong*) param_value; + *localmem = 32*1024; + break; + } + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: + { + cl_ulong* localmem = (cl_ulong*) param_value; + *localmem = 64*1024; + break; + } + case CL_DEVICE_QUEUE_PROPERTIES: + { + cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value; + memset(queueProp,0,param_value_size); + + break; + } + case CL_DEVICE_IMAGE_SUPPORT: + { + cl_bool* imageSupport = (cl_bool*) param_value; + *imageSupport = CL_FALSE; + break; + } + + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: + case CL_DEVICE_MAX_READ_IMAGE_ARGS: + { + cl_uint* imageArgs = (cl_uint*) param_value; + *imageArgs = 0; + break; + } + case CL_DEVICE_IMAGE3D_MAX_DEPTH: + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: + case CL_DEVICE_IMAGE3D_MAX_WIDTH: + case CL_DEVICE_IMAGE2D_MAX_WIDTH: + { + size_t* maxSize = (size_t*) param_value; + *maxSize = 0; + break; + } + + case CL_DEVICE_EXTENSIONS: + { + char* extensions = (char*) param_value; + *extensions = 0; + break; + } + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: + { + cl_uint* width = (cl_uint*) param_value; + *width = 1; + break; + } + default: { printf("error: unsupported param_name:%d\n",param_name); @@ -486,7 +627,7 @@ extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* co } CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, - cl_device_type /* device_type */, + cl_device_type device_type , void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void * /* user_data */, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 @@ -502,14 +643,18 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" }; -#ifdef DEBUG_MINICL_KERNELS - SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); - SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); -#else + btThreadSupportInterface* threadSupport = 0; + + if (device_type==CL_DEVICE_TYPE_DEBUG) + { + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + threadSupport = new SequentialThreadSupport(stc); + } else + { #if _WIN32 btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports); - Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( + threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( // "MiniCL", sUniqueThreadSupportName[sUniqueThreadSupportIndex++], processMiniCLTask, //processCollisionTask, @@ -518,10 +663,10 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie #else ///todo: add posix thread support for other platforms SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); - SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); + threadSupport = new SequentialThreadSupport(stc); #endif -#endif //DEBUG_MINICL_KERNELS + } MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); diff --git a/src/MiniCL/cl.h b/src/MiniCL/cl.h index b0cda4237..053491ee2 100644 --- a/src/MiniCL/cl.h +++ b/src/MiniCL/cl.h @@ -155,8 +155,10 @@ typedef struct _cl_image_format { #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_DEBUG (1 << 4) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + // cl_device_info #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 diff --git a/src/MiniCL/cl_MiniCL_Defs.h b/src/MiniCL/cl_MiniCL_Defs.h index ffdac1026..4a7a84526 100644 --- a/src/MiniCL/cl_MiniCL_Defs.h +++ b/src/MiniCL/cl_MiniCL_Defs.h @@ -140,6 +140,8 @@ static float4 operator+(const float4& a,const float4& b) return tmp; } + + static float4 operator-(const float4& a,const float4& b) { float4 tmp; @@ -159,6 +161,17 @@ static float4 operator*(float a,const float4& b) return tmp; } +static float4 operator/(const float4& b,float a) +{ + float4 tmp; + tmp.x = b.x/a; + tmp.y = b.y/a; + tmp.z = b.z/a; + tmp.w = b.w/a; + return tmp; +} + + static float dot(const float4&a ,const float4& b) { @@ -170,6 +183,22 @@ static float dot(const float4&a ,const float4& b) return tmp.x+tmp.y+tmp.z+tmp.w; } +static float length(const float4&a) +{ + float l = sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + return l; +} + +static float4 normalize(const float4&a) +{ + float4 tmp; + float l = length(a); + tmp = 1.f/l*a; + return tmp; +} + + + static float4 cross(const float4&a ,const float4& b) { float4 tmp;