From 4f9b450200e34525ef02b47046923279104394b2 Mon Sep 17 00:00:00 2001 From: "erwin.coumans" Date: Sat, 14 Aug 2010 00:56:17 +0000 Subject: [PATCH] added OpenCL cloth demo, contributed by AMD. updated GpuSoftBodySolvers updated DirectCompute cloth demo --- Demos/CMakeLists.txt | 3 +- Demos/DX11ClothDemo/btDirectComputeSupport.h | 4 +- Demos/DX11ClothDemo/cap.h | 15 + Demos/DX11ClothDemo/cloth.h | 26 +- Demos/DX11ClothDemo/cloth_renderer.cpp | 20 +- Demos/DX11ClothDemo/cylinder.h | 15 + Demos/OpenCLClothDemo/AMD/CMakeLists.txt | 102 + Demos/OpenCLClothDemo/Apple/CMakeLists.txt | 60 + Demos/OpenCLClothDemo/CLClothDemo.sln | 20 + Demos/OpenCLClothDemo/CLClothDemo.vcproj | 233 +++ Demos/OpenCLClothDemo/CMakeLists.txt | 15 + Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt | 86 + Demos/OpenCLClothDemo/NVidia/CMakeLists.txt | 102 + Demos/OpenCLClothDemo/amdFlag.bmp | Bin 0 -> 1396038 bytes Demos/OpenCLClothDemo/atiFlag.bmp | Bin 0 -> 1396038 bytes Demos/OpenCLClothDemo/bmpLoader.cpp | 325 +++ Demos/OpenCLClothDemo/bmpLoader.h | 201 ++ Demos/OpenCLClothDemo/bmpLoader.hpp | 189 ++ Demos/OpenCLClothDemo/btOpenCLSupport.h | 84 + Demos/OpenCLClothDemo/cl_cloth_demo.cpp | 470 +++++ Demos/OpenCLClothDemo/cloth.h | 183 ++ Demos/OpenCLClothDemo/clstuff.cpp | 53 + Demos/OpenCLClothDemo/clstuff.h | 10 + Demos/OpenCLClothDemo/clstuff.hpp | 10 + Demos/OpenCLClothDemo/fragment.glsl | 7 + Demos/OpenCLClothDemo/gl_win.cpp | 272 +++ Demos/OpenCLClothDemo/gl_win.h | 49 + Demos/OpenCLClothDemo/gl_win.hpp | 34 + Demos/OpenCLClothDemo/shaders.cl | 535 +++++ Demos/OpenCLClothDemo/texture1.bmp | Bin 0 -> 786486 bytes Demos/OpenCLClothDemo/vertex.glsl | 7 + Demos/ParticlesOpenCL/AMD/CMakeLists.txt | 8 +- .../btParticlesDemoDynamicsWorld.cpp | 4 +- Demos/SharedOpenCL/btOclCommon.cpp | 2 +- Demos/SharedOpenCL/btOclUtils.cpp | 15 + Demos/SharedOpenCL/btOclUtils.h | 14 + Demos/VectorAdd_OpenCL/VectorAddKernels.cl | 9 - src/BulletMultiThreaded/CMakeLists.txt | 6 +- .../GpuSoftBodySolvers/CMakeLists.txt | 22 +- .../GpuSoftBodySolvers/DX11/CMakeLists.txt | 4 + .../DX11/HLSL/SolvePositionsSIMDBatched.hlsl | 128 ++ .../btSoftBodySolverLinkData_DX11SIMDAware.h | 173 ++ .../DX11/btSoftBodySolver_DX11.cpp | 8 +- .../DX11/btSoftBodySolver_DX11.h | 354 ++-- .../DX11/btSoftBodySolver_DX11SIMDAware.cpp | 1793 +++++++++++++++++ .../DX11/btSoftBodySolver_DX11SIMDAware.h | 432 ++++ .../OpenCL/AMD/CMakeLists.txt | 82 + .../OpenCL/Apple/CMakeLists.txt | 73 + .../GpuSoftBodySolvers/OpenCL/CMakeLists.txt | 79 +- .../OpenCL/MiniCL/CMakeLists.txt | 75 + .../OpenCL/MiniCL/MiniCLTaskWrap.cpp | 40 + .../OpenCL/NVidia/CMakeLists.txt | 79 + .../OpenCL/OpenCLC10/ApplyForces.cl | 91 + .../OpenCL/OpenCLC10/Integrate.cl | 35 + .../OpenCL/OpenCLC10/PrepareLinks.cl | 41 + .../OpenCL/OpenCLC10/SolvePositions.cl | 57 + .../OpenCL/OpenCLC10/UpdateConstants.cl | 44 + .../OpenCL/OpenCLC10/UpdateNodes.cl | 39 + .../OpenCL/OpenCLC10/UpdateNormals.cl | 102 + .../OpenCL/OpenCLC10/UpdatePositions.cl | 34 + .../UpdatePositionsFromVelocities.cl | 28 + .../OpenCL/OpenCLC10/VSolveLinks.cl | 45 + .../OpenCL/btSoftBodySolverBuffer_OpenCL.h | 85 +- .../OpenCL/btSoftBodySolverLinkData_OpenCL.h | 30 +- .../btSoftBodySolverTriangleData_OpenCL.h | 20 +- .../btSoftBodySolverVertexData_OpenCL.h | 8 +- .../OpenCL/btSoftBodySolver_OpenCL.cpp | 581 +++--- .../OpenCL/btSoftBodySolver_OpenCL.h | 398 ++-- src/BulletSoftBody/btSoftBodySolvers.h | 1 - src/MiniCL/MiniCL.cpp | 167 +- src/MiniCL/cl.h | 2 + src/MiniCL/cl_MiniCL_Defs.h | 29 + 72 files changed, 7524 insertions(+), 843 deletions(-) create mode 100644 Demos/OpenCLClothDemo/AMD/CMakeLists.txt create mode 100644 Demos/OpenCLClothDemo/Apple/CMakeLists.txt create mode 100644 Demos/OpenCLClothDemo/CLClothDemo.sln create mode 100644 Demos/OpenCLClothDemo/CLClothDemo.vcproj create mode 100644 Demos/OpenCLClothDemo/CMakeLists.txt create mode 100644 Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt create mode 100644 Demos/OpenCLClothDemo/NVidia/CMakeLists.txt create mode 100644 Demos/OpenCLClothDemo/amdFlag.bmp create mode 100644 Demos/OpenCLClothDemo/atiFlag.bmp create mode 100644 Demos/OpenCLClothDemo/bmpLoader.cpp create mode 100644 Demos/OpenCLClothDemo/bmpLoader.h create mode 100644 Demos/OpenCLClothDemo/bmpLoader.hpp create mode 100644 Demos/OpenCLClothDemo/btOpenCLSupport.h create mode 100644 Demos/OpenCLClothDemo/cl_cloth_demo.cpp create mode 100644 Demos/OpenCLClothDemo/cloth.h create mode 100644 Demos/OpenCLClothDemo/clstuff.cpp create mode 100644 Demos/OpenCLClothDemo/clstuff.h create mode 100644 Demos/OpenCLClothDemo/clstuff.hpp create mode 100644 Demos/OpenCLClothDemo/fragment.glsl create mode 100644 Demos/OpenCLClothDemo/gl_win.cpp create mode 100644 Demos/OpenCLClothDemo/gl_win.h create mode 100644 Demos/OpenCLClothDemo/gl_win.hpp create mode 100644 Demos/OpenCLClothDemo/shaders.cl create mode 100644 Demos/OpenCLClothDemo/texture1.bmp create mode 100644 Demos/OpenCLClothDemo/vertex.glsl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl create mode 100644 src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl diff --git a/Demos/CMakeLists.txt b/Demos/CMakeLists.txt index 7d70282ca..35d90a508 100644 --- a/Demos/CMakeLists.txt +++ b/Demos/CMakeLists.txt @@ -14,7 +14,7 @@ IF(BUILD_CPU_DEMOS) CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo DoublePrecisionDemo ConcaveDemo CollisionDemo ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo - MultiMaterialDemo SerializeDemo InternalEdgeDemo + MultiMaterialDemo SerializeDemo InternalEdgeDemo ) ELSE() SET(SharedDemoSubdirs @@ -28,6 +28,7 @@ ENDIF() MultiThreadedDemo VectorAdd_OpenCL ParticlesOpenCL + OpenCLClothDemo ) ELSE (USE_GLUT) diff --git a/Demos/DX11ClothDemo/btDirectComputeSupport.h b/Demos/DX11ClothDemo/btDirectComputeSupport.h index 52843484a..69360865a 100644 --- a/Demos/DX11ClothDemo/btDirectComputeSupport.h +++ b/Demos/DX11ClothDemo/btDirectComputeSupport.h @@ -1,6 +1,6 @@ /* Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ +Copyright (c) 2010 Advanced Micro Devices This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. @@ -13,6 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ + + #ifndef BT_DIRECT_COMPUTE_SUPPORT_HPP #define BT_DIRECT_COMPUTE_SUPPORT_HPP diff --git a/Demos/DX11ClothDemo/cap.h b/Demos/DX11ClothDemo/cap.h index 38cfae21b..e2d3d8e81 100644 --- a/Demos/DX11ClothDemo/cap.h +++ b/Demos/DX11ClothDemo/cap.h @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + class cap { diff --git a/Demos/DX11ClothDemo/cloth.h b/Demos/DX11ClothDemo/cloth.h index fd1983811..c130548c1 100644 --- a/Demos/DX11ClothDemo/cloth.h +++ b/Demos/DX11ClothDemo/cloth.h @@ -1,4 +1,22 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include +#include +#include class piece_of_cloth { @@ -171,7 +189,8 @@ public: pd3dImmediateContext->PSSetShaderResources(0,1,&texture2D_view); - pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart ); + //pd3dImmediateContext->DrawIndexed( (width*3*2+2 + height*width*3*2), 0, ( UINT )pSubset->VertexStart ); + pd3dImmediateContext->DrawIndexed( ((height-1)*(width-1)*3*2), 0, ( UINT )pSubset->VertexStart ); } SAFE_RELEASE(pd3dImmediateContext); @@ -246,7 +265,7 @@ public: //unsigned int indices[] = {0,1,2, 1,3,2}; - unsigned int* indices = new unsigned int[width*3*2+2 + height*width*3*2]; + unsigned int* indices = new unsigned int[(height-1)*(width-1)*3*2]; for(int y = 0; y < height-1; y++) { @@ -265,7 +284,8 @@ public: } } - bufferDesc.ByteWidth = sizeof(unsigned int)*(width*3*2+2 + height*width*3*2); + + bufferDesc.ByteWidth = sizeof(unsigned int)*((height-1)*(width-1)*3*2); bufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; InitData.pSysMem = indices; diff --git a/Demos/DX11ClothDemo/cloth_renderer.cpp b/Demos/DX11ClothDemo/cloth_renderer.cpp index 9d65257cb..a5c95e51c 100644 --- a/Demos/DX11ClothDemo/cloth_renderer.cpp +++ b/Demos/DX11ClothDemo/cloth_renderer.cpp @@ -32,18 +32,15 @@ class btDX11SIMDAwareSoftBodySolver; #include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btDefaultSoftBodySolver.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" -//#include "BulletSoftBody/Solvers/CPU/btAcceleratedSoftBody_CPUVertexSolver.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h" -//#include "BulletSoftBody/Solvers/DX11/btAcceleratedSoftBody_DX11SIMDAwareSolver.h" -//#include "BulletSoftBody/btAcceleratedSoftBody_DXVertexBuffers.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h" #include "BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h" -//#define USE_SIMDAWARE_SOLVER -#define USE_GPU_SOLVER -//#define USE_VERTEX_SOLVER +#define USE_SIMDAWARE_SOLVER +//#define USE_GPU_SOLVER #define USE_GPU_COPY -const int numFlags = 2; +const int numFlags = 5; const int clothWidth = 40; const int clothHeight = 60;//60; float _windAngle = 1.0;//0.4; @@ -206,6 +203,7 @@ btSoftRigidDynamicsWorld* m_dynamicsWorld; btDefaultSoftBodySolver *g_defaultSolver = NULL; btCPUSoftBodySolver *g_cpuSolver = NULL; btDX11SoftBodySolver *g_dx11Solver = NULL; +btDX11SIMDAwareSoftBodySolver *g_dx11SIMDSolver = NULL; btSoftBodySolver *g_solver = NULL; @@ -454,12 +452,17 @@ void initBullet(void) #ifdef USE_GPU_SOLVER g_dx11Solver = new btDX11SoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() ); g_solver = g_dx11Solver; +#else +#ifdef USE_SIMDAWARE_SOLVER + g_dx11SIMDSolver = new btDX11SIMDAwareSoftBodySolver( g_pd3dDevice, DXUTGetD3D11DeviceContext() ); + g_solver = g_dx11SIMDSolver; #else g_cpuSolver = new btCPUSoftBodySolver; g_solver = g_cpuSolver; //g_defaultSolver = new btDefaultSoftBodySolver; //g_solver = g_defaultSolver; #endif +#endif @@ -1260,6 +1263,9 @@ void CALLBACK OnD3D11DestroyDevice( void* pUserContext ) delete g_cpuSolver; if( g_dx11Solver ) delete g_dx11Solver; + if( g_dx11SIMDSolver ) + delete g_dx11SIMDSolver; + for(int i=0; i< m_collisionShapes.size(); i++) delete m_collisionShapes[i]; diff --git a/Demos/DX11ClothDemo/cylinder.h b/Demos/DX11ClothDemo/cylinder.h index a9c6edb7e..517fcdf51 100644 --- a/Demos/DX11ClothDemo/cylinder.h +++ b/Demos/DX11ClothDemo/cylinder.h @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + class cylinder { diff --git a/Demos/OpenCLClothDemo/AMD/CMakeLists.txt b/Demos/OpenCLClothDemo/AMD/CMakeLists.txt new file mode 100644 index 000000000..4140d59f4 --- /dev/null +++ b/Demos/OpenCLClothDemo/AMD/CMakeLists.txt @@ -0,0 +1,102 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + + +IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ENDIF() + + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_AMD + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_ATISTREAMSDK_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_AMD + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_AMD pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/Apple/CMakeLists.txt b/Demos/OpenCLClothDemo/Apple/CMakeLists.txt new file mode 100644 index 000000000..e89513c18 --- /dev/null +++ b/Demos/OpenCLClothDemo/Apple/CMakeLists.txt @@ -0,0 +1,60 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + + +IF (APPLE) + FIND_LIBRARY(OPENCL_LIBRARY OpenCL DOC "OpenCL lib for OSX") + FIND_PATH(OPENCL_INCLUDE_DIR OpenCL/cl.h DOC "Include for OpenCL on OSX") +ENDIF (APPLE) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_Apple + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${OPENCL_LIBRARY} + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_Apple + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Apple pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/CLClothDemo.sln b/Demos/OpenCLClothDemo/CLClothDemo.sln new file mode 100644 index 000000000..48af26cde --- /dev/null +++ b/Demos/OpenCLClothDemo/CLClothDemo.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLClothDemo", "CLClothDemo.vcproj", "{A61906AF-B5DE-454E-99F6-B653C250D221}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.ActiveCfg = Debug|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Debug|Win32.Build.0 = Debug|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.ActiveCfg = Release|Win32 + {A61906AF-B5DE-454E-99F6-B653C250D221}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Demos/OpenCLClothDemo/CLClothDemo.vcproj b/Demos/OpenCLClothDemo/CLClothDemo.vcproj new file mode 100644 index 000000000..1023daf69 --- /dev/null +++ b/Demos/OpenCLClothDemo/CLClothDemo.vcproj @@ -0,0 +1,233 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Demos/OpenCLClothDemo/CMakeLists.txt b/Demos/OpenCLClothDemo/CMakeLists.txt new file mode 100644 index 000000000..1f378a3e1 --- /dev/null +++ b/Demos/OpenCLClothDemo/CMakeLists.txt @@ -0,0 +1,15 @@ +IF(BUILD_MINICL_OPENCL_DEMOS) + SUBDIRS( MiniCL ) +ENDIF() + +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() + +IF(BUILD_NVIDIA_OPENCL_DEMOS) + SUBDIRS(NVidia) +ENDIF() + +IF(APPLE) + SUBDIRS(Apple) +ENDIF() diff --git a/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt new file mode 100644 index 000000000..e6e216763 --- /dev/null +++ b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt @@ -0,0 +1,86 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +ADD_DEFINITIONS(-DUSE_MINICL) + +IF (WIN32) + IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) + ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) + ENDIF(CMAKE_CL_64) +ENDIF() + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_Mini + BulletSoftBodySolvers_CPU + MiniCL + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_Mini + ../cl_cloth_demo.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + ${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_Mini pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt b/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt new file mode 100644 index 000000000..d41b8f377 --- /dev/null +++ b/Demos/OpenCLClothDemo/NVidia/CMakeLists.txt @@ -0,0 +1,102 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + + + + +IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/x64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/Win32 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/x64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/Win32 ) + ENDIF(CMAKE_CL_64) +ENDIF() + + + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletSoftBodySolvers_OpenCL_NVidia + BulletSoftBodySolvers_CPU + BulletMultiThreaded + BulletSoftBody + BulletDynamics + BulletCollision + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_NVSDKCOMPUTE_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppOpenCLClothDemo_NVidia + ../cl_cloth_demo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ../gl_win.cpp + ../clstuff.cpp + ../bmpLoader.cpp + ../bmpLoader.h + ../clstuff.h + ../gl_win.h + + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +ADD_CUSTOM_COMMAND( TARGET AppOpenCLClothDemo_NVidia POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/amdFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenCLClothDemo/atiFlag.bmp ${CMAKE_CURRENT_BINARY_DIR} + ) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppOpenCLClothDemo_NVidia pthread) +ENDIF(UNIX) + diff --git a/Demos/OpenCLClothDemo/amdFlag.bmp b/Demos/OpenCLClothDemo/amdFlag.bmp new file mode 100644 index 0000000000000000000000000000000000000000..dd1d394ec7ef603cd68b9956780c5430106dc9f8 GIT binary patch literal 1396038 zcmeI*39#jNSttCHeFGutt7wUZVu58b&MQ`FQR9jOF6B%yRceie0zyc-JDrWq5(C1^ z7Ls(*=}u=M3qg>Xw|GaQpcaC_C?au$aWHIw1VVri5X{m`db{U+&V7;}CwJpX)0r{P z{T$oF|D5}r|M@?k=j40-_kWiA)c^4G|L}iJW~ZNs|Nr6m|NTcyCYK&HnH({B% zd+)tBu8eEqls_HkSrgZfd(Y)ju-g4bS+us~r+M2YSI%D?1!H0UrXlmtu2dDQ;r*}=KyJq*Ye|m7= zbl;v8R-9Muzh`>yp6S8q&iJ=~Z`{w~TBp1B$B_5MwoMQ2nI4$U{A+vf-MRSVRafN) zZ3JX~n^At#Mk3-eMI(qZi{d<@I8Jff$dEYSyEk&-L7#H9xrPkVi1GPE-MxGFSn0Sv zZe_ecH#f`b6ei}A20p0U%2%8U)}QmK7Ggazr5vb|9<6vZ~c!~|F<*m z-WyL7`AoNP;Nbq*lj|y{#lWBLo9^Cw@a}j9j`ZKZ=Ro}L;`Mv}=}YhUmwRrHCyr@6 zj~tvHJTQA0IJFm$`RzwM8jSFXd>ctM_Q0bcGBEa6=Ob&}s<@C5ovWJPZy(pkk9O_a zwd7Zc2*&x%$PE~qF@M|DHY0AH>x_a}&H&HkUw)pOH$pzvKQ?uF@fz3c+!=etvs2u4 zG?mNaJhp#4$Hjfcdd82IEqv+BYma>PMU%~!ov`5zlhd|MPKo8qC!2Sy{O{DuCmSw$ z^aa=b`d#JdXyJCdizsj}#H`bqP7*82%Rp0-6O>WHEmCJvc zw;Fr$Q4o2W_defdwzk$?EbRN8*B^7rmLt#D@omrFG1<8F=+m|wv+t_dirVIP_9+dg3F*dti<~JMY8tvS3&pki><3E1XQAfqlPdxF& zIFHj2M;wvY9CzGtpZe6N*0}lFKZ*c+?sK0TD>?GWBXiBU+GCD6CNI9`HLv;SfBxrO z$xB}HlH7o-$%3PgJ~|fS`dC%uWHG=qQAg6g;~no9_n51lKhNS=b1p==H$s50F*3;Z~` zHoJKDEqA>8y8eWo0W|9qW(%_QyWew zXU2ZCyhie&gc8bi>x;Pu(#&b?ao~ zm6OxA9dYVKlTDXR&bV}yf0rEh{0k;$UwOpFD<&Ja9dW^b`S&~b-ZWj~Fnomk!T23y z=Y3BN2ln4~=U=|!nxA;?*2(58CY!fE>ZZ?rVBg(wT<5NMR#@Y8fp2WErH8*jJaixa zP3Yj82;}}}oZofVT_>M>a%OU-)`lJUZ+Le*^v8nUFQ{=PT&-~2Ky!+knp1-5f&e+axEbN<}zV*b-mmP7&r6-(m z+2qvoj@^9G(dR6Nf8_u2e;Y5E#4(>2?0D1}J0_=WpKQ2f^2*D9b~?TF8!KG0bm0CM zZD)sc?vB8J$z@Ocor@=DT{(HdmB&5%|2gtAU;N9(TkMJV@a$cX`b+qFUW|uy&&G7M ziokFG_HSp7McQZT=2JjM-yi?+ALkD6Dz{$w>bS|1pZw%l?ff%HtT1C(qdhDBJLj6%IiH-q z<*4VMKRN4)BR0hH)%H!(Y_q}L4JrIXMXYV-b%!^Ohbnywd z-u?CivzG%79NZhPvFq=RNPqb2e-_;f#2@P8`zNF*#%FQ5!BkD)M~ew#hkH9)0>HlhZGa<2jQTUU@`RuK4fO z|Me&KEZjETHI3u;2d4Ytvk?blzkRwZ4$()L$Fy#{^F6EzjdrV^i;6SUK}I#d|fcm__~O zOO83?qT|lodh9vdPTF+dai?E$)QbP&6@iyu`S0%9{i(RGd3KX#*J_zVVH34C(P4 zkqZwt^K(|QhoAADD;cYO=rTX=clmj|IWyj(`D?%SYnkoAnC<6tXf6+jCp_T^U;gr! zVmRzzk2SA&iU(5evBxJ4Ui6|DMau~?1mahx zFo}!#CfKvjK6^a&#`Z5>vvTIY;L0P;+IqqnmmIh8vdMEUjD7fT-*oZG8@3-Cuk)_> z@2stl-gM!K8!nza_rl5fZ+hCo^s5U8?z-ci55zJ2IHVJM_!0Q=yThqlCNH_>q|@SM zT*vdrJl5YrJS^8=)9bP05u4fkEw|hfqaUaFsLv#i5L@vyZW0S|tEid3CW9{55Z6bx zkNX|TA1V4^Ge5R@$xd=y6Kl?u#|pElxOjMv=jUoyJkK}eWb(%c!Q#+;I?NyDdc-3h z5m(01o=E-=fB3_(p0TO-AM9E=^JibbaPd)_w;sPCUdFxbF(3WnbvJ$SuRig`KfC4j z_pJEu=CA+J&0oI$3p?NW@vpowUY7mZZSg&9```Y-bK^zZIEEjGbYc&G7Wmg46L0T4 zeVfc*air_z%6Lq!msZzzpP4qfr=9Wp3%~FS8POxU<6>Tu@wNPXNy5j)xNY2I$@O`| z2)f&DyDi_4AFp7Jo&31w5Hf#!cSI~?)WzsykZp&)IR(8COl7citnW)7#?+{oeTA7IkrJKpx+x5V2rXYbP465r%-WE|4jynXf-&CT17b8KhKVjV8TgL55rt>qNR zH`T{uU?l%|3*!8h7V{7)|{E2cah7H`MHu<&+^lJWiGG%{`{<|pa0+w{$Si`a;0tJ#3sc; zem-|RLoKfM)2F`jhvJ=78CFF$reyaOlp@OMn&T{>}KXVdoMpMTlnf1zu> zdw%f~w6iZMn!V3{AwGN?>%RB;8~$LH?W_Is{`~q}dsW6mv{ysAc@P3Ged$Z%0gySL zahm6OQ2IxI^he`eI!}N4)8i;mzE>wM#%^}Jz5gjsc}jfASlo6zYvc_x;^O+aRR;Bn zTeDi`#|`7TBX)yh$3FM%0^=!iY;E)vr}&n+Fv%yf*yhFX-~a4apSbbr$!S}UnH|HAx7KVu`h}OB zAoJHi_M>l6ARda};u_ri;VbazPk%ZxYJT5-#8e!Mc%NN$j{8-7TkA%$RUvd~~7NCkS#qAm z`6QCdhj%~!^Pm5G-g}?&QxtB>Taew{P5W>~k)N*J>Xf@5$K^ zAH}`sIGI1jYT>X8#D5freVtbLa1n@5{l#DW#ra?ynIBii&U`eIsXJbQjcejo@$%)5 z{K${w4aYlGGT9=u<8xS%z~d#(Z+gj`F^K`Imn= zUlkoO9>-7STbOSxmUBx+aa=sSnI9|s-tYb1xT8F}HJ$`xYh(L=;0JzS*#^Yx5eGhW z%Vp0wKMuki_q+=yXJ0*;eUX}DJ1OP-ArKho{Z|>v6dTicr!mX;8%b3S7Y+aNKFs z#3{CR_SCa)@sHmgJn-SqULBvMJ>}xb3obu-b9@d<=8wmUg~Kfn56Hv4O6z+#2}EGV z{%7XWh^g3B&i(2$&pb1l$xzL%a)&>@U}ro{L~)$*wsEWYwt=sG?Q3fUeynG;%+KB9 z@teihzy9@n)gU(}Lq2Qr?(&7>%PzYt?k=AxKJbALWG^e8XA2MAetxVuKB6@!VjD*I zXA7|b;~8y6w0C-M{K=9?{^`Gbb~Q=aWlZANP{U8Oynmu@J{k;#uI5OD>69MdZHsz3+`HS3BjJAG*ws^~aw8 zOOpf{TasPm2IP4(7=M#|8h?3y@okoI6zwB7zwx=RIp)kAC(8VJD9eXeARd#4cZJsW z@Dhjsj89HwA}xujd{53>-}=^w`jL~dpZ~hoy)GU$k-%{rCKg8iXAXb%v!DH@0&8S` zqIEXEBGTheyN=!ISco{yixE@tZj)$Yb|7N$6q*0H$2~3{HX~Zc4da^e z_4{kQfE%k>Bl9yDpCOA4$g=wuVn=`9>`nD?n*G_5>E2y?zkb7q|9aE5N5vPT z#aC<2zBA#qC&Z2J`w;F5AHiMxQ0{Cvu`nx~EyNeB9o+NI54~>HH|P5t?hI?4p5niY zb=tYM=LGVV+}Ov?bj=;q%&xdH-hvplu@@bAb=6f@&8OMO)kxa7ZR~7EWyad<(8VgJ zH8MX|5(jF&{oB7i*F2w}@uRqy_a47u#689-%i?(_PCxczKlb*wzkO`d@`c=-k-Bm5 z(Dw5)@Z-5MPPxu(Hr}lizj4gpZ*1_`$ap%);=I*tGZsHIo0-3NU;L5sSlcZ2|K@*P z_2R1^^UMn-FT6UwNHac%`}Ku|dxJARd$r=f`;HDR#0vvqzBeu&T-f<%|Kn9MKf#^P z3&i7d{-Wh|B{0G#7Gm!`_C@FODqbPYyt?qh3kS~=kGtTZ~uY#uAEuy$Ju|s@q(9Jb8`H# z^7zA8)9Iai@7)=H0Bcsc;=kFmYdmGdZwhhqdl#PqcK!K>W&U{BTKI-Q{HO7a3zpWI zz4l?nK2bf8^QZSKlDRc`LU0EEDoVW&G=~)#3^qTzbuR+9&1Ccwx1vS>SF`) z#e#87W@&tV<@_to@zcCM&-2+~tYP^=Zq?z+{8(EY!pgN}j6df&&xtz?ob(C0*uq?n z>*IHjS=#U53pZ+OeHnL;gAwv7b+fpZUyZ@~LOsX>40;VYU!& zVU5?yV*|1{){y)8k@oG?qXhYh=0J<6A1q@7e)BhfbN*Iw z+sOH`B@z3xz5Q|AW|sJiM{D*U_~@-~j>U-c{fi&qT8Mr4<^N`H_&l)qotujb@r%OV zU4Qzwua)^}?tES#9+UGIEw3wqjGYY9$e-)4zdrY$<54m8oMSn5Ut=Hp>Z`BL<$PYq z^W2qZ@>M?PkwUVdu&xK zsF2}{?r`(wNZ9BBgk2Pe}=OLX4{P>dR*z7oE+7G?BCblP@E5{##pTGCWFKUS4r>=}o2Vh?|8@ciASRs85FPkBn-b(&-gc}+Am+n)HpXfBV>%({%w-ak8< zvoQVI-Wxvo(VIX1xf?$A)pvj53-4X=-zPtF!%bg!|4pC&z^6XY}wCq52v?z!jY zm#W43gC$O}8F3yLV+d`slk2Z(9X~)IQz*hMu8r?B zi;Rj#MTB|oKj(6s=V9|$h*QMch~=}+Ix7Ng9CeA6WKKTdJ|1{swam|u%wHd3ISw?% z0i3UV%m@DbPX1!x#~W^T?>!g?WM*&I-@oU;?0@&h+3qjydjAVAep37q^Z1*# z@h1*vfAMOy06&E5W&Q&mN9*W9JVe(~w_05$5CNERJ#r~tZOpy*%-#`VxiA8G{4|~> z##%G&4>|MWF7mEpA)Z#^y+HY1m$Clk3vsmM&2N75*vPR}wIV;fdHjVG5_ zhz*XKaWAnPPXT#6KNjNKhsH6kxZnGZ?kdd7JF1R35E`xnZ+K-D*deW2Tb8IXp%aV)pR;y)x+-ZLN_PXn? zi~3l@8aw$>5I-GH3K9EHed<$V4P*1Nh57U48@6`lNBb+Da-H#`Nc%X?i}A8TzOwL3 zzw}EV{pd&IzGB;=g|UU>-oLTgi~IJoLpt$Zomf7w|F%2-@)g(oM7(Gl$MEBj&g?tZ zBJi&}X2T_u)8kK~AHs3_)o$W=e$He4oyWs<{WY!Q`vfwMM&{??ortMB?zrRRlTVJv zM()r?ism7g%(oG$@t*luQQkam6{n)?YMGyh!LPjX$_Vh0_G9ywuRQN@*RD7MK0D=` z_A`N3JkPTE?OZ$aS6oS48NUPt!f+T1@z>!$_`wg3_2-r^$^5u7uDP$N_?m>p7i||0 z>D(QG|B}m|_&XQR-lY>S+Qu>bIHVJM_!0OUFP&`KcC^gT2|E8)1mXewt=!ygAJzhq zP9u?)1k^L1`OHlC@ht}#fN^C;T)as#dXJlrh>MG{oTs%}R?Gaji&IZMH9y-G>sfmx z`O_?mjn9KrAOHBr$A`IcgXhoZZ<5Q0mif8P*vSuy_&i#CemS20;*I*T&fJXHs)Ws_ z(Y&uE=ZoOG?|ad9dLRzz#2$VG{-!ri;%%9;cj;`I#4-Fhq_cVZ5fS*Cw;w0-ml(Qu z2m#Zx{*jpwOYf4pG5V&&RbzVxLpjZKX%KKHrLjo(KyH#Idj{iZ~FXSF2w5y@!iYQ{d?E?LZ7v| z@Uw@PfXsh*&Fg>L)<%NOf0shuZ^U~faQrAfgf+5mEIg=fi)_!J9#I)9&%lrM%c@@sE@@s&^foh_5+T|9fO zcH{X+&ko}}l>Z{|H(V934op7tmFwddjrg}WzS48{g0fS80h!-_#t**3`52FEjO>k2 zT`_?pb>l}5{vKvWf$~-(^G67ew~vmRHP%0BVm)J3Yt)Y)#SI@+{kUN+#LeRr+Z?A{ zUQ$rG7+)rF2lq&(+UM{U03qzyYJ59z z-ZwPc(?2l#jl*dj2*~_(FyupQG14r;KLTz3_9NmBIq2rs7V95RB^jaPVlj~CH)9t1 z2i8ii`00{+jJsR=Ci7c6mIuPP=Fk=n?wNk!%b)pQxBlt>yyb1T-2TB&-u%~}{=&OH zd*^@q^ylCAkp7E9I^Hl%pwdIv`-@EwrZnG_zeH_@Sy@1Sb zKjK4cF!C_7G-53l@@UU|{Kl1;nNe_PcM)sI8^+4>?&7G1;{&h-4hYK_-C+o?%wzH#k4Qn^jj&3iRI81 zzt3~_ci3YT_mAUSS+U#x>lcvu|N3Y2e`E_I6*B|#-sdl7Y{vQgQ^14T$Xs(Id@RJ8 zS9}#Ue^gv;W`5kH61;Jb^IIMli}$jmEbp#zk4tViF7A)NdmdqXAaXzUu@CIpyEAfk z@e9^w->mu2{hPf@=fJ)=h98G?X1`<10)O$}*fI-!1!R8T*&cdlBRnI}a^HA9*+yVS zS=`6Bl9}_H7^_|JUdBhR=8JPV?mD(1&f_$$iG{dw#Z#<5t{>|e7gsD^*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj z0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+ zfB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR z5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq z0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu z7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmv zh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriM zx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQ zH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92 z)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|i zNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH z9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfC zN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj z=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgX zijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qa zk2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2 zSCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8U zd(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI z`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_ z{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t z%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV z=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|F zWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsH zkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO z0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8pp zfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ z2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5 zAt3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvC zDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY z4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1H zP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiK zdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF z2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|M zfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvg zK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX8 z2=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv z2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ z0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BY zK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU z2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M z0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q z0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSP zL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf z2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S z3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u z8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1 z>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ z)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7 zQ5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j% zq=o*6 zB{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^% zbJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_o zD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*D zn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#x zMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBw zJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7 zUr|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W z?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q z{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw z^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}o zGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrc zncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(y zkogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p z$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D z1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW z0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLH zA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*3 z5Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0p zs0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!Kq zJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+ zKt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{ z=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*# zAW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux4 z1bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl} z0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rY zAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt- z1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v z1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs z009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHc zAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T6 z1PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK z1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E z1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^ zAwYmY4*{9qV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MR zE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S z4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64 zs0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP> zQbT|MfgS=fzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXweP zM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3 zk{SX82=ow;`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6 z%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXU zqNIiZ0RlY)WPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9q zV~)BYK!89+K;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*% z6(uzU2oUHYAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q; z9&^+M0RjXn0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Q zenm+Q0RjYi2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=f zzsDSPL4W{(ih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ z^D9bf2oNC9LqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow; z`90>S3jzcPR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U z$oz_u8Uh3e^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY) zWPXo1>Vg0P0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+ zK;~DJ)DR#*pof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHY zAoF|7Q5OUV5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn z0y4j%q=o*6B{c*H5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi z2*~^%bJPU^0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{( zih#_oD5)VpfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9 zLqO*Dn4>NT5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcP zR0L#xMM(_-0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e z^bnBwJ?5wj0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P z0u=$7Ur|y+fB=CW0y4kH9Cbl}0D+2t%&#b^AwYmY4*{9qV~)BYK!89+K;~DJ)DR#* zpof6W?=eSR5FkLHA|UfCN@@rYAkafV=J%MRE(j1HP!W*%6(uzU2oUHYAoF|7Q5OUV z5U2>q{ECtq0t5*35Rmyj=BNt-1PD|FWPU|S4FLiKdI-q;9&^+M0RjXn0y4j%q=o*6B{c*H z5a=Nw^Lxxu7X%0ps0hgXijo=v1PJsHkoi64s0#uF2vh`Qenm+Q0RjYi2*~^%bJPU^ z0t6}oGQXmvh5!KqJp^Qak2&gs009CO0hwP>QbT|MfgS=fzsDSPL4W{(ih#_oD5)Vp zfItrcncriMx*$M+Kt(|2SCrHcAV8ppfXwePM_mvgK%gQZ^D9bf2oNC9LqO*Dn4>NT z5Fk(ykogrQH3SF{=pi8Ud(2T61PBnQ2*~`3k{SX82=ow;`90>S3jzcPR0L#xMM(_- z0t9*p$ow92)CB*#AW#vI`4uHK1PBo5At3X6%uyEv2oR_U$oz_u8Uh3e^bnBwJ?5wj z0t5(D1Y~|iNeux41bPU_{2p`E1pxvCDgrXUqNIiZ0RlY)WPXo1>Vg0P0u_PC{Qn=z CQsM#t literal 0 HcmV?d00001 diff --git a/Demos/OpenCLClothDemo/atiFlag.bmp b/Demos/OpenCLClothDemo/atiFlag.bmp new file mode 100644 index 0000000000000000000000000000000000000000..2be4847dd830b84f51b3a3fdd07f6e0d98a5315c GIT binary patch literal 1396038 zcmeF42YeLO_Qz*tLntX&01@zs_4#~O>+7UeDs+ zL-==9Rg@t%MNyTSY_W0+{(ifU!vFCFaw0t-Js>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?G zJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJs>?GJ>bRz1N!!9-Kyo!ZGQaqr?$WS z@^gpZe(lu0U6+m>x^?c1 zh75`sHe_(zu%QWYvBMMMM+{FKH6m%usFBHIMvqG#J7HYPqzU7vOqw`t%H$c-rp}r% zea@_zsdHwh%}ZS{KW)*%1xpq$TAIEjGh^xUWtl6pmaks9V$JGRx!G&h=47v5o4axS zy3HFmY~8$R`_?Txw{OeexnoyB{+`{tiVFAa+go&C|Gq;94;(psXz`+jBa)K-EW_UE z05HwlxdX>@_|QQZj_u%q{W!?|`}X2!_ZICb+OxZG_pUv=3w9Ue@5;|B$lHlC+PP!< zj_uoaY}>kR>z1usHgDOyY4fIyn>KFPxMBT<_3PHJTf1&;?%Ld(+??#3>^0eIR>ME5 zSFK#Na>dFOSu3)ZWi8KKzAR%|=F-fJ^o*rTmZmRGU$SV);)RPBE?Bf+{=x-m3+B(8 zKQA>cb?&^mbKv>8vuEKWm_2g_K8u;tr{P1HK6T2pDU+s7o`jES^2G6zCXAanK4s$g zuH;!2|mT4d^$p zf8PQ9`tbRzVIje|h<^I9HSCwRZGQXtr}n@8f+d|hwC~!fBd)cc-MaSf z(Y zV@Hol89N5RpE!Qpl!+6}z)zjKAZ;Fqv}Do3rAroPE=^ySnUS?Da}{TPHkhB24bXvj zKpz+g7=ntxX+eJ8?p+0idv@UQs>T|J7?CM*)wO)nlWqU z^qDgl@a@c>Y-Ij;d}{dY#-}9Xt_|QPj~O*~^vE%zMvNYrG-|}~kx7Xoh9@K?#t%=3 zON@_Ah#M9kJ2Y&bI10WqS=s`7&l*R`6uZC=>d}m21Q4M;cZ+0@O_KGHa~n%L4G>~ z`R#t|)B%@OM+)+Lx(MFML|AJ&?(4Yw_*L-_3Luit<71Ro1L4pCMSD! z_8L3zSFOmxZ=w~;m*GaaJTqfi#!~nYp2Q6lF2$YH9^}tY!@U(hsq`R!)|@#rXV0E7 zYu5CcGpEg%p#y*FWIf2AfFETOCybjgK4tv4J}S9^@}vFdvXiU$Pj{DX0Z}fn$hZ5z1PqIy<(Psp|Y$*cbaHRp$sK zol;e2uUXaM5xz;)u>!s;RcGbuRV!Al%v!l(dD*JY{IrE>Y4hjJBjC@Q17XK8$e%uS z`m`z20Q@PFrc9nVdD4VQ6B+O!>_F9Fz)v2NTuxPIC{&$6e_YieVaHO{f!GMh)#=rv zd!JrC`;|}C8KJ8>$xwBY$GQyiQ|Hc4o5z5!2l)cjUDa8FpBtsB4ubshDfttpOqw8+e4~tCrm7Pc%Vd0`sxv4i zDkeG-QGt`H<4(pGst%NVNIK|9f|73$)2Dk)1 zx^9J%Z|J%$Q`O1O+v%$7R#5l9Onb3A9KiJh-<7JfVbjL-8#g$rIwa#m)mhDCd|lO9 zp0#Y*^32R-85x;NNy*o{ZVk+z@2upb>sD9tdDpF@s#7ZCvwmAgRR=Qu%~W;z_v?dx zTVvO4d4hZZpMw1Mzx{$BAKzPaLg80xo7UgYPfIg>;pJQD0Z;b8$`vbqXxRdg7r?g% z`N-9wu3M-&rmkC)suL2-R2}5%L`4pXiN1kcop}QIde^P9s^i>s%T%5Gygj=Lko2;z zsPMqPy{259KMVV`-!g%3&($gIw{-=4t6ZI2$oMQ*hj-nsSiLf9)r#dSvzDz`o|&~Q z!&%A4!5|firR(TQzIE5F3sr~7_|$I;RflAJ=UknHcy!hyN6Vh8V^?+D$@t-+L8f$_ z0sZ@mTpeS&4yihpUALvGPP<sBq$ukFd#xIB010pWqon>PXY zKmF(geB|ozAip#}vrFe5-MW^lI+UMzJ-IsOAb-a6*+|Z`>bfrEO=mf@-tbkj-H=c9#!YJUw&fD|EVqFd}#Y^e?&q;06P7}*Iv#@4|uEx zc=|m{$Se)=sq5Ae_$)t@cilq9?+@Ce(>6HB)M-muI+TiU&~rwcQSUlShoOFXCPFeK znInQlS|-vFSz6|%4I)LyjhNG`XU|t(edT}WqpVxEE{>=9x8LHxT;Brp{QN@P0(gG@ zuG0McydB#y^neNb`YnL+^KtJhD?i_DPp(PiDI-5$7kMU(pEQ2l#BnJTQj*6fj~zF5 zOv;#1$)iV(9W`Q%AyIRLi$qO(-cEEB@_3n~&)|_ z=mQV~3^b3T>V+?Z=zi6s^e)|a_p3;-#22YU`*t08-)p*%@tss1G+;qK0iXYj#an#g|C9?&?o@i%_>QU$Qg`s(MpOg-8&{X} z(gPmlfzBP;@n62I_ZF9fD^&*-b-w%Nn>w{?+ceGOAIW^_f&ab-OkYBLEo;@R`Ne0S zBDIWib&RTx2=d!}4_&Aw3~fwXkZ+|2Jj4T{Z;L`8OH~K;)81*^__kYa!KF>F|Nb>D z_b5H!$^+s%c}vx*Z#Ha*dOIRl$Ef79Z}<*-8&63hJjJu%`$gQ#%z+l@iKl`jk_3GrUKkFAt%&ded|0bMnW*n+z?0{zxC!#{>lppE z4PSfJ%VvY8{`6~E9)R>fIX!@{E55Y#Uwi>MJmMQ}S9N%V589JS1V|5fng@icV-NCw z{;8e*NR)1!I=27y*V}HjXW*Cfa+A%Z2mW*q&{Yg!A7IB9oAujrRfkDBE$tCL{)AB9 z)2?@UzS0A(R2@{v$4J{oZ@%eWp#r@0r(ebL0Hg=5%LDk*R`BvdsE>pMJMekGElD~p z1OAN}nYtqZ=z$)<<>1_P3snb?L_rm`yXw@zg@)I4U2d|m^uV9)0lJo}-F6$w29^f- zT*D_-2LV6ieQGAB(gU930RUgr)q$#m=N@+E4>w1FgU~G?h)64W%dh4oJy!Jge*Lvq5!ev(L3^R*%%Fs?J;k9nf^*| zUA1wwTx`VDK)e+V#Ekfp<&0=1W-OaoE)=U^rp%YIv1z`{bc+qRC+4xo z3N~B4CmzR_TtxUS1OH8`4uCJ2@BE?2CI9a|fI%x3#WTBh!86Std*q)~TIXh6k8o!7 zYSo^9?zv~4e)`ELo~U)_omgVpIF_L6t?u1-Kl{uxPe1jPd&3s6I&Fmiui$`D?Xzv$ zw!i)M+pqeoef#$Byz|Zr&p%()-=CU^o#O5N_uYqm;gGN*H4uNL8MLW+#~nCq9L#gi zK6_8yx>$lg;f2f)b~;ZzRqx3s(f2{i#p-ygsVY`6p2bs7KI!Y@V{D0eyx@@c-+%v? zUw#os-mYD{&p-eCrI%i!1~e6i^!EoIaPJ~IR5%kWinTCbqgu7M8Z~PD>C(oM+i$xqJUsm9(WCp^zV`0j3+50{uokZFHb4HjZQC}i zbo}`76NcBxlP3=yI<$ZPe*9_Vh!J=H?Qir6)4IbG6VVn8a&vNCd-YZP$@!34wQ6g36&s3>ZK>6>r7asK@I0|yS|<>mG6)yudp<{>8I?sM$eF>xL^hJu0u+ycZF zFazGMUcDNpbNKLK!%?%@BS(&45I=lj|ak{0sL=r5p4>`h7$V^Jn#VRh5SIP(sfAKY1>*dpFS=*^&k(h zAfLK!JGAeDXJB#QBV#A1f4@in1I%yPvgPmmb>YH=KD~Rx(Wa{#OYmD@)TmL^%>6Vt zI2fzbD=;8nZ&8u^_W0}N%a_Ncr2Or!yRa6mJ9FktTIeQ(NLW&h>rntEHr59hM2H49NUwlzij>A5C_Ux#U zBjFjWNQb>NJ>9fr=eN_QP8D~G2KDP_XJXFuz~iwT z9*NSm3q1xactHP0{{z}iU|=Az;`Hg$6cd^*b2uMlOG-+jW{9p65)%6L?c3k!)vsT_ zhOfOw0K}k6MEHV2c$hl^eMMb{R+P_Odbjg4Ta=FnO8nO zJ|Q6?Vn_M;`S>Me_iDd>1N!&JO;+5VaQ}*kh`=srUw*#63Gwl?E)E94JpL*U5XK(KM^mMs`chALYq1WKzwb;lEF)0Zqkj7`k>=IgKN)UfVNXFj$G z86PqcZdd}9Q>ILzdBJ=f`GXHUAnpY_ckX=kl~*9eh||PjLjajEefp(KmuTIA0|(;C zQ0#;a%!erS&f9O}79&=q8Egu`$C04%!~gh4YHBKNq_D8CO`A6OD>fxofq&5dK@Bk7 zw6XLl;pof2ZH4}XnI?@JV~JQ1GuY0XufM)$&mQp+IQI+S82)tTld6OKOaXj!W-(XF zm-K)qdB7pa@75Kn4n4{iPq2OLe}MVWeo6Rs|CxOB(MJ#^h)Y8T52hP|X_a2RdI`Yd zCkDb-Y9N>op&K^~(^~Wv9S4Gk)^Yy%^U(&KA5;{K#ZDJ5UJMO~wqRsFb|*HAZUxI- zzx&QRP|K+qey!ZeJHF9U#QD(Jg;->{1uT5}>8CR?GH4@9mMjrH8pifmh}$K#0l=wC z8V&H294rLSaUyHhtTC=)oS!vombj(jhYa0o@IjEFB|4q+cJ8F@=jY|2#~ceqga5<~ zwoh?BVtbQ}j|d;YM`xDt6y&`0fG2oBJife#uIk|NCE4VDY$qqp?K4?aMmi@%?r z(a@$_*RHg2Of8vz?(Es{urRd53qsS^UwE-GOV{;(JXhU0V+w6**Qzys{CGM@ z{1(8kJlYYK)55D)ui{4sEyVT_-s86n84h4xYN}xVr=NUcIv1>kdp9&C+7x6o91<29 zkMpL_WBbH>XgkiT&M$3Q#u}ZDoJtRPjt87o9b{?ZSrQRpp^rcI=QAHUAprpaKnipZ zlTn~ZpH%nm-9aSc2?ArwLZHr{DIkNFU_OM4#Kgp(fBLCI`}Rg_+osLzS+i*4m|8Ob z__1T1I(DR8ptCYZ?=8ihD2Dg#e)Qpov;|kp4+;u$9u0vFq9+~d4Xt}~n7?2FLU(N0 z$XL9^jbO~!v9u#F|DO*(Os8XHKKwun5ys;l3h93Lu3covv!Zn|#mO3(PphE&9!edx zxmsM@vPBE$^EDpQAD_p5iTMz7@Q@Tr&P2bhJ;?tF;}9kD>BEsz5ApzVb-H!w+{2*i zAWJi7K>x__FhKRsjq^neEMQ`sS+jbz_?)_S?JAi6ezRtl%(r9+wm{5xzc1`pG_Yho zg89!r`wUxyWYo7$AJKmMwrvne;70s{po9NkX8zdgVgBn48(u?XufK42L%#!cJ>VlS zF(1EHcJA0={P^IR`uflV%a7)dFT}?|%m?R1a;8vq5ai28;)*Ls&UlUox^?bkRCW6I z?Smj6NjuRI;ZOY8%qMaJlE{HKz0f^6cg`H~F&UZP?7jCanJ*gC3?T*cBF+cZfh^OG z@D}?Lodq}~OXedKhhmSdVRRRQSb`gVq_qB?Wq<>KAAkHYE&Tt%e0cA%M<0djrcRk+ zvRN}{V%(O97RcLBeyB4Aw`R=H4HQ3H{$>~}@y8)=+5&dXn1A2B;JgU(e{RDb{Yk(_ z(i$C}oJtRPjt7vd!-D+o-SBvnetmjVkRK8>FeWPEiN_zOEB7XP!#QuqdE>@hP2{By z9yF*+moD8LUx55qUwtLkH8Nks`QooMgLA>XfDCXDFTs4Ik0NEUW5u@ARj}qkfj+G95g5@@<}kCKOyTTo>94&U%mS9;lqiX zNLj()9aUAmoL zimp!wJ-=p6`qQFCi)df05BKGlMSIJcdd5CnSInsl(Bu<5k>^R#m7Xr+Uy@(4(2~_-+jNdZCxtk zQ;`2l+xCq4GS2^l4wkF`^*n$w3zU#gUAItmLW2f|hlUJ}iAITlN|h=BfMC&$y|6CG ztm!X!r$vhvbg?6-M)D+<=znDYqdlf~)~tyN3DgYCfBf;s(F_c6e$laE+Ne<@A-^Cw zr%IJ7Xe^Ey4X_0$2@q;1!fe>EA(o&4rqicSr+r~i67qSl1nrG4EHN@4+Y#$FYu1d` zMb!cfE8ehS1HHxguOUN*h)rRP5=QCLpTPY3_3LB%*gFneFdsiLu*CVWai{?PfF~9ZobI*K{qZ+I>5ZoQSsq}jPOqLu zRVOSY7)d)r2E{!8+_Qdue&FW~zpy&+f*|t={z4%a42h*H9U?j4eED+ zKp~MR2p}Uc2C1+v+KAc%oIup#?m4`A2>xD z2uHLonE&RRZ-T?7!xnFWKKO0!+_}^qutoU@EUCn+oMBTqPK4i~p`qgVH*MNP8^pfu zy6dhjdfq-hHPpL!dHfz?b>Fw<;j`?QI`1<-H0STNZ<~sl%B}abyRh)&Kksk0650I*ZgnXgu zM1+P!g@+9rJgELlFJe?E#=v4g=nWV$SP&utQ#*F-SkGUmGa!EEL+%Egv4sAFGGa4k z%s{*cMBaDry?_z{0ofp^#Gly_QFO) z0{^niObqz=`;TM2{Vvn`Z1}%}RL(35ZU(u6ApDZmpfLPWz+r>uIBe83+p>AHIBe|V?YG~?iZC32 zsJ8> zLDfN)W^`mk?2y68hri>FI}pX8Q}qT6LHk1wJv4jvY=H~+Ge9l`VZ`JZq(5=uME8y9 zuOEK+Ar1^L1pE+8mM&dd=2{5IQGOE&30A?n5S|t+SU_umX2`vP_HH^^#Gifp_7z)3 z6$Pw?Hn?jT$6kpBX=!P-YSjYYXgk;uhPWo_JrrmL1qloJfmeI~gAY)3L9FX)2Elpn z-o5eL0T83ZUb%85H3l&8^8^8{>9ECH>Z!R^~vN+>oq7C&leo7gO7&;~#BF6$%d)^FH=0(a;HXM4O!hE3I| zQ3G|Zk%v#CZdtm{FF!%X@9-NdInt@UWWFGQobeP7FjYqc`CQeBiU^O1ii{gN2SQ=74IuszVgOjut4glTU8`iJKHjz^V;(pQMTCcAN2^w@H2DWRf`2AWnuNk@7#+-xA20s7;|l#x85udlv>!0z5qS0DUDcN-#bXgjaJ_S#ope(8Rd&p-PN z^#P0rR-<}#+-Q(uin0Hub-(=la}=3G$|4$IV5wLO7(vk+>#Csy6LOQpTE-bH$6?@S2#y4vfv`h=#X-Vjfcf8?ju38a+^7+K&~PKF zx8Y37JQ35exp|9I#(weC0F1rjs?JZ8pGm4trw;A9bZoC*2l9XE0Z;J&jlg2LIw8T4 zVWAj`g)B{U;3mYyzTKoTYTbaqf5dRz;nD`L$~1uD-h1vrG>hF~tuouA0Z?f=Hf#Y9 zaJ4S}WNMGyi6g`TV1sl3#%5T873nN6MSE(4DcYOPj?TikI?Wr8%K5Od#MLQdg8-Ll zgV;6LNT=!QuuZGbTlmM-j<7Xn|Cly{Bf;)0-D^Bf9G-D6I8^M{v?+Qk&Vn54s+r}w z(6c5IjB1AI(NhaBRY7>buQ0SdO-I8$}C_67Uk-Ys)H=ep@Rn{ z#>c()?z^bLi`W~{mo({tznll~)rF4t>MJi}>=iQJsuOk~91=%}HCV$gw`lo0H!p;c&AAdg!e30hbL!t=>R3aV^_(12MowJ z9^?U(Sul3pLe)W*COU9Oj~wyUmtVZ}!V51v_nb88fxoB+kf_=8owuRq*qPsvzZ+ zdrlC00CczBJ-d6_^)Am>dSD3U>I@x{5IbymeB6k{1OR``sFCASl3N4>Ah#J;jWp?j zzo-Y^eEs!LKmG`vwkWe8RGpqo)nUNz)2oLhfb@W;c>rY=M6M30I>^`=J9^ZFaVg)o z{O;|>uf6r=D~%id7wpx>js6n$TH{84b!*tTkw@5DZ#Hc5*6W{t`Vod-*>iPt8NYWA zJP)*cU&(w=9Dc?2k@aoIoG4eFchYrX_i-Cp{-fjj#b z+lWj1s=-UHt^QXH>VN6lzGzV2y?x%Gewp@JgZl36(+2h3+b0d`m&ZPCP~V-sT>oQk zAH7`Py?yv{{c_p|FV`=>z5jCka$2*O>)(LA_j3LJw!PAz*@y3c)x8UnGok9RP`1`@KxvlZ{gRS=WTg7dqzaMM`w=91@*m8frW!y6T{a_jXeoMKf`}@I`a9ix} zw}{(9e?Qm)Zu9;9U}@au`TN0Axy|+Wo5O9kzaMNCx0(KaGq_Fn_k&I2Hr3xBHbu9| z{{EA=P4xGlz-_$0KWrSg6n}qMGPkk*{;)A*RsBbE8&%aGHj>+js{XK~s{X_EX<}7> zSOT~Bs{XJzZn0JUVZ*o$t?ExUq^dt`Ft zRmnoDR%Lce)ezl+Z>dTabW2s(K-~u1QdL<0TdKnP-BPu$!TQ`%m8|zIRfYAsr7Bs^ zTdJ1VZmrtW%6i>e)xmDRwbwIGb?@1|1DElUoLQ>s^zDtO1+gbG%a`gD~d2I!E$PV##rSt zw@Zq0QBf{%JFh6`xSds$Gm3IrQBH9?sVFD79aofNigHx9!-_(7P*Gq96lFiReTuS| zTals^D#{*iyA@>@w*p1U=a#1^I~8RIx9y6ujoVg5*`g?$Rc4!1Wh1u@s)YOrhHbT>qbQ`W|utaVNnwDVG;!<4u9mI z1A2Auh%yV5oM~5edgB?;z54ga!@K-Kr4E|s^NY44M7ORZ#0{#SiVm{p+&q+)<^%Y|d;p)A58xB?O~42C!TeI}+Zg%XU?0pU>;w2y zIQB6W%m?s6dzymzCg6kig83%kkFYb}2IdRe6Y~Ll&_2P7O$G3QdSIWJZ^AwS--vzC z9^@C;2kilWOhJ2^0_K><6sV_Zs1x>Si5>f3eg(olXzvB)hjHwe0w3_N=pFv(KL_^i z!E<$Zawf_6P<15pB>+9s0|36U>lUgGGIl18A2(}8N`sdW=Y#p@ZQ6NFyMSmPQwD!9 zU%jlVmlgGj3Pa2f=Bpy$7vwiFANL4>ePf*OfPK#Vld5t;RgQBzrYc8O^@yq*QPsn$ zdWhRWRXw1p`&D%xx4o(gE7EO`s_y2tOH~WF<*RBQx1FlGgWGmh-KMHrxouI@%`jEn zq^TQKb)%+k(A4$Z)@kZmZn>J8!!27=*Kk{{sjD<~CASrtlBKCxnzme1muVU-lUs(S zE#;Q3X-l{*)->26O^n zahqz>rr0!^PPS>2xJ|Ta6S$4HX|QoNEk&Ou+qALV#@IC2Xqz@lpN_PtBW&6Tn=Q$v zCE0Alxh2w6vnAMU@h~r298JA!v0k=ed^(iRU<~NMz?4_gK3m*kvt}+ zDWDBp3v|;=C?`2!PKLL%B;3ZM`U;qG@?)lgS6BsGSOqW4M9@?)zfaFjJ-c@8*}YpY zUs8A)2Lgcs)HbZ(uDD|W{hi4-+}oI`<(fjia7rgm=E9!>~rQ5 z_KEord4#+pLVaiK8?_y~yyF`CU_N0V%y-7Vk@;m}pO{bB-=eAv`(VC~eK5Z~*a!0o z`(XYG#(Z^o`LGY>6ZXM;!akT!ma1caF0jv;Z^FJ{K4BlUCj?ZwJ0i2O`a@&WvT{rY(J)i2N96FuOp>HzptCQY0(YXWEf z8N~UTW)JnD^FZF=I*-6UMf(W#uc~%!U&Q&qK85VIQGBMf(6gF&_axm~X^BnC}Mr2=yu2 zcg8+KeTw#P82dzi5$9WCpQ3$%ePgJ9UDy|qKSF&+>?72tXrG1pEZQ$+es5N0!KCC5 z=!Yjk_w~eUUY@gOdjMq?C?Q{{I+G_(m^yjV+}RUfZh$x+%s*$-&bh!oQ+VvrK2vy{ zu`fdXGO_QV@Q|?2nQy?pg~EeqpRiBl7a|W~-;Kfp?1TA)eY?V=mMT05_(|a@C-xl` z9v1CG-k~W(9%t;kQh1>AFp)>K%R87Vc(?iK<6Qa$B2DWcrYb}$B2D| z`aIx=$ioyKnj+MvXg{8G9xtx*7!)2v`xNjCc?UB>;SnMaurCbcr%>NA;3uSu*d8-v z*4T%(&zX-YDLnYe!1R2?`TcscQMaY4PT$@H{Gb8-Jp1aGXYYv~a8`8?2h4ZIzO%w((Vy#D;V}yPkoO_%o3wpnwC@J{ z5cVxY{r`}#Z;JMvv2PT4?83gyrLb>OctCz5^Nqqji9AMOAE7=)`_95XLVb$%IreRs znuUEX@-Xa^uy3=G$WtC+Utphf9-;6M`4N^=z|UB15cW-+H@$Tf_U)m*9s73XKRU2~ zAN#0VblsAS4^;=i4<6Xx6R&xB&YtZ7Q;_1C-(sl07y{0FA(+Tpdz%rca$RbNaOT^QI!sXUx|a^ED08K7@VfJS>ODQP?Mi$BnRW8S4K* zVV|OXq3sj%EfpU3!oIV@Zo#`<$9)HinNy28G=1-QgM?iZZ-i0He~s9PlD3som%VE<6b zd{1Par=N4Oo{&#nw-n^hnlXJr+O$_+$*<^r%BG#M+0NL2eMg1I3i~{bhf+5k`g4sr zJVMwvh5C?pEEFCmVIK*c$5izwPu(<%JR*(968mK;Jg$WOa;9#gGuJA0lhSyYwoloc zCSkv9?CYJmH%r(T3Xij}FH$#+!oD$$M+p0vVcI@&cpL+MXY5mFE+Tvp?OTTWrfA;{ z_66sSp+37QSoY^~=7&B0SkS=!-JMk(1o>gX13mrvm*?+!9-t9eCg9JRHFME|8Lz!+ zXTF!6`F4dz^yijO*e8X@h<#V7o5Xxm4$o;PVc!+@|Ddq%fPJ32d0oQ3V-An%oUw1h zJ_7!7rSbd$?3+?IEroqjcrc~TT+9AkJ$uvMp9|m<_F3vC3Hy!;k1>JM4fcV0qp%Ou z|3P7&ei87VT;89nGk@T~{ym&kozUQcVIe`Dcg4#y_H+*jRfoE6XH1`lARo+Myl`eI z^R?3m^_jw>Itcr0ST1GkqcfLh>?`(xT1epG1GVguz8hg*jM8$Gx``oLY;dlgy2+W( z25RwWUomEHy2AdA3;UXVpq8_+e_g4YB8SJQ@LVtUg|J^{#y;d7>e4k~UkucuI9~|+ z0KN(PdS|Xk+IKNfOJwXDurE?KN!~Z6ZX(V%3i~GPyJ5aico6DaM*EQc&HcH8`9b}A z2vsL|KtHDH1P=%c85kZK?CICPJb%yg07hV8)NQGfKV$mrSu^L(nZ0E3Y|i|XT-!PA z#T6coeFtG5p}sw1pJAW!HZA*eA?cSV)W`T->&{#q`-Z`}gngF4X~I5bZ(@k9DRtAm zug;<2FM0X^9Wtp0r|SBF%c@Q|R0Fv)z+L!QT(6D(XQ`X0z=lxYP?Ld$`pDsNti$65`{fy> zWmS^_L$n;2f5XB)W$a^QuEXG5#bR*o^^DSDH5tk?IG3<525O-)gF{&cq3~#IaIUL? zS^&N!^DPJGy6ManH5n*x(|~=4)J>MIFPN{FWnf*pPD6BE_2-hdLsJC#BV;$;)`Nof7t^hy)%t%ohW-u5WOz z!2S*9@K|8q)R`;dd`jK4D8)m}*X!`OV!oTfxyDdm2>WiZk09T%Gq2T zftS2IWe@iN67n(X776(z;}h@~q|IBFG4J))3MzVo`KP>WFed5q&Riz!JB`vp`88gX zK@ar}gL4t;1MifgPeZhvQ#XxOH{Fz9L#1U3^&JQ2+T|T8zh*f~iz=`Q<|B>ASboj2 zCWG_f+;ZmdpaPpy4v!uCkavWz?+W`O)EA|A#Nb?G`8D_07lU&tZ_{#cuB#j#A?%y5 zKf<20ZwEdpJT$dqpGWw{bbaT}Tm<}-!$S!?B<~RR0eoUUfbWX=M0;oEL+uy0fKu%1 zaei=UFe|fQQ1XMqLz%D>86N84m%luH5A*aN9<(X-3G%TpZnQ!CF z2lk=x*z-2+*gwzH^;yQgD8Hr;&Mhs!#;R_*9GvSahsRRbch2Dvb$CeCC+uI>C@mD; zEW7F^$};HX*Z!kbH%$tUz&@2<6Qi_P>ZVosHN^R3r0`e{&P9-KOxHIG`>qD(Itlv@ z3JJ|z4$j>AtKQcTd zDnc^f1DWUPrz{fkk*mXk{CTMh=BF)MFh6VAf;V0-sO){hi^cgT3Hw~w*A*T%O3N{hKcAQ@!Os*qMuZ%Vn2evl^T$a(Ga0Srl6~^yikV>Smeg`c#UCMf+Ce z*HCX6L$utF(y|oxjo3$!Z`q%#3;PaLH%$tUeSEHCv@b$^OY9q?eE^>Vev0!c)F(+_ zU|$G36zUtpd(4Zsdb&Q>^PhYyI6MTSZu>*VCshYBeqM$8U1Tub9L|AlW z_@Jl=Prv@<`FoxROkKB7b(SnzxOBk3eS3c{AEkw=n<&LoW`8cJ`le`~ z6dnroiTnh7nu7D?754FrMPhzrI8$|)jBikNVxl4jMMrwx6)(@&(>*{Vu;!)Cov*7p z0Dk7u^z7B?jo##OK7j89?1T9T@?8r1sIhOTy2)~Q48p$UC@tgQTw|z@yiHRn9?Sk* zC_F5U$1pyZ`g6-E?1TI_J~&tJ&n?g3T%_@sgnc|w%b2lms>$FyO6w+8-84n}0KSO! zN!1r3k1Jsx(Z2C!NalMw06p;hX#^JK>X53lG<`{C#?qWMOB=npyK*HS=X)Jz(Y^~|U$44Z z`dk%@!MXMarc*Fqtf|*3R4C#0JD=ihF}JG~DqMkid5MgDQh3DRTw~Hc!@fQ~S7dK8 z<{O@wjt8cTXQs2KrI$H4*VWU~>B+g`xhnjjD(2^^sLEPJ$yJmbZrO^mh8r=TJu_WV zRwzoAqAcgOOi^H&+%gnpDYtY*S;B3xqAcRJP*E0eo3AKo+~z4ts;b(nZmP;0h1qOH znZ<3UqRdbg%N!oFu#ZQjE6N0J;}r!qPEk@+rF49*rjAvVG2BKg$|!ClRRxm1PJTrD z5P9@W9#u_H6j(e=RpK;-JuuyBl-5v99il3OxeZbkSPTr$N!QbO2>3?q1N9W@6Zt`V zH`r&wJ|3ZtIKSQ#A<+>7jH*ryS9JjVA%mhk@RFCO?BN~|3HgiWr=jaMeaYgCrRmEu zGjek>-)cn6KVHFhyn@#Wfqe&IpVegG#g-lWa|QNU9Ug@Z)B?$?RJrQwd-c{^iywdd zTC-*)@4Q3yJD<7s&O5MT*t_o*zxCGD2OhXwrOG8=U)aUUl`nXEpI23gJj8rVPjjP; zeWSv2Oi_+%+L6kYk5s7wJH+i^l`01-RXU()`>n8FT6I%V^1ZwYDp$_0QU#V*rOM7K zRd)FJ?C|#9t|(<>?5oN;n{A!1&pMwfu(du_a=GPHshq7U06xgSvZD7YpDL?;s$ja( z#|O58Tb7T{a&F6fd@{LZ`1rt<`uL=CTjJxhnA;*BpM@}A--TXY^BDHk)XJ6S`TC^! z`e2&MZLXj1+=||_G_?%uYwDy5UX%QMr~3L#;Wo+FhgqeH<299GUsIF4Y{|Yp<9&U` z`TC@A!`tM_mBwh$b}&lI`21X3lBOm4_>A=R9l+2Ke z=Ns$iOVe0CzhM=<2XpKL^&*GI68oS%Mf()k6Z}O7E-ehznI9SxIgqJ3JUJ7&I;83h ziHRCIINHN6e|h>I=mC^jFb4S<=}VSnEX`V$xh{8^BlC}Qg$Jpdkat)bkA0LD)nuUZ zYwQtD3@y1yxh9=)g?==ojm#b@4uHgzRd59e?{v{mrCZ%D;_Z5%0mw! z<~PRqz&@DILVa!&>K{jt|KNisX3e^^bm^7!^vmh#mtdKhmq(92U%NJ9evHqeXx~EE zN09%)2M1@(I+vMwAwB&(*_t&Me)s_^;!)|qKBmr{xvUhAqO5)RAKP+ru+|xFr_<9< zXJnk2JQ;_y63nmYz50jmcV%W``6=B_rl&jGiS%@5JD#3iYPq?mo_Y$u7v>_c@7FJH zpeXfcMNnZn{FE(g!W%cT7*RGWm7ngwHPA|)a4y~?UExCNTLbqo@yDY_ zj}~M1v?8q=8j8(gSe8B1N2O(6Y?*4W*+*$9%DQKt-C*7^ZgkwgWzq)dzHK8E~ zObx_du4@JxdHr<=M~wMXrW`bF6K~u7wpmf2-Xqj^#y*xakq69g`tCBry3Awe&Rva* z+o>qf?VZ*Z-+m<%7pwbWFoWF+tKP@l^?iZZ-evju0)T;V5TZ;y39|7>C9 zN{OaWUs0ligL6-xz6}59TfT+@JZXDsfqa(*ynM6XuT&x21T?3=JCGdzipLgam>?_K#r|O+Nd4e&-_}aeR$b9QiAIxvt_Sngj=5@ovu~|g; zrt)hJgLA?B=bqhZ+#}8J-%re61LlW^G3MWt*Bfu3zW~giIeq#e=M8pfzeQ01b^^X7 z_8IfvYr5RD?!}A62?=?6oNs5ou?_AE^XEe%1M~SK6j>b}mcxS}fBEu5rq2s2jvt>_ zz1k>ML3m%9w+ZHlhORw#?uv2Cm~Yh*y%?Z3B78DnpA5{uZtM%@4~`+`hchLgcilqC zhp;njh-ALU5l@~l$}CXVtx$DVW-VX6a>d5=E1NWC%&%Yr@PU0E?Neti&)c*frNuM$ zb%lp@zcz1v<;)pId1o4!u6Oru#p)RuNb*6Kt~2&eP!11g{;3nz%t!aGsI+W>eMKo~ z)8^QT6ULhqJ1ztB9k7oC9x+NwQPx{Bf8RbZf3>QB`Pny{`KgHWr%kh9e*0496Ywcr z-=XTJqD*vV{)G$0@$tlbit}ffJ{N@XX=%iKp2NdBb4O_EhzA~+gdpG47;n-3>8GY> zD(lP@m6oCHgoLa;d)C~(C1XC+8ycTW+73;L`9|!c84NcFGHCb|>SM+(>=W~!dNOoK z42`-Chmt=yI+B!p06%uLc(|}{SM?R; z_*3;xyI}rNo%zm>P-FwOTr(dx6bL&G&(eanuPZ!E#Q- zoATmuzI*0F!#5UWFk_!HKg)D5I`dIWpXKn>z1tP@>7gpTCWAH-!cJu5W~emSnz%cQ z`RM44x7>nkA5!&Mv|n!K6Y!lwedh!o0zORz^Fxuh!RJ|~K_*HT3 z6J7)p&Yk=Hz=4v32bmo>fO#@Df8+?YwEzgZbf0(v#r7%Gr)VDv4@PNG-X=2k-80|a z;9OVC4-I9^R}^ILY~wSShj;?b9JWbO0Q?QG_nPjuA2cA)$qZAx_o9>y9 zI3G3&IXt!QoUm`-IalX$>eS`O9-j>6i%{P+^O2`2;(S+CHvxP%!am}Bd#G=Z^Xr8T z83ZNYsOrST4jB|Tba4EzA(8;n1D@uAC5snPLOybJNY%++y(%|*&DJg1Z#O}lf4G8; zm~X~D>&$h;zNzXauw3(w3uDJJ=8G3$p|G$dG_<&N>*8;}E&lGiYt5U(O1}NJ1k>it zf5*(f|6S6x>$Q3FEJPtN|MSnGmVo&%5$!|U2lEO0C)~#QZmMn;D#|WPZD+_3WGn5g zRxQ6;wY+N8Faz7cZF{w9+i$;pTlH#NUwW}%>eN%>OvTLh?YKj&QI#B5aXtk9>@{cC ztU1F?%+On!o?f%Y!FKF8_w2K)c&`2&OXm0NxxJ$I-0IcRxXr6xjV!f#H7tb9sa|ci zug{FHzFaMilV)`0V@wuH*LNM~+kp@4E6Rv2opWe)XYA9G?%mh;`;XwEJ{z3tmigfM z4OHE}0*rA?wz032J9-vXT6y&d3k+o*k%A7T;*XHJI+nW2% z+r)gxJ4bk^uPZz}jYl7x%gAq0eobRczN5y4;{`-t`#_Mz&#V1Dua_aDO$Emd{-TopyxZOQ!X?2G;S z9S;jT5f+9i*|D%NVMik)kEN!baV8!zj?&U_U`Wvi^K&h=opC8g-)^#{@mpIOH->HI zwyAMr*hbwpG;X|Jw{?vhuY2#k_0_5^Rh0P{jb+LFrRm28^v??q%hwI_M%x)4wlgSb z`?P8MT{9nJwZQzDrUts659T9Ff3&8Ky8X8Cd3mRu&)v9UQPJ54AH)V@Z5nDafcb-5 z==q5Ajm-Z;!oE#|wiEhPy|7_J5ae?ipXF!TRh{_QAqjEABmtxcJk0|%0*i8WNY%;B z$zHcMcgME1jQN!j=O6aA0sDydq3sy3PouQV!oK~ zZ>+H|`g841&Q+8>ZQ2|+_2IgwnUQRmr^aHHOP8*7>vjOca&_ig^tblvS)>ok6=QN~ zST4ro(y(0gY{C{Pig8RXitym=+-cJeTONaHp{uvZ!!iEJ5a+vRJ`(uHYU&tO8TG~I z8MFm)*2WEDo|b?1*+OscMAn&Wi!Y1$?#r)1-WQ#@Lgz7d>Dv2qDb9boURdlgcKt1OCv&c*%TE1D-rzg+^c*Rh@OYIUClk-MM|;yTp7i1o;B{jQN)RxekMKdDTq_ zFaCb#0{(q@%$TbQ3C!Z+*fcok>Ps&I+DN%XoR1k8qJ1#m0sCHFC67FEZ71s#b$}9> z-?b|-pRg}P9>_Z|7VW!WK5{jYa_Q!AT3|1jf5Q2GVS-pW-vXy=*PgF?_huAiP?g-u zmDYxbSunqEpS>8OYgLm0us@fl?Wy$<<9-d`0N9K>Od-p6;Gd<2nkk6i(UcqbZ zoY{MgHx48Djq~&772SO|#(6>22lEGqgsij5+o3pLJWlJ6lwS)6^YLuNXX=H=4JF{S z+*JPX!;J~{6iJI4p*pf z*c;@xNBhpgJ|CQGFSdLdL$peTeU(M~x&hG_DpWurg^N}FFWr9Ic868XB9KwmA<|!bj$MfUCwQpuWu%q zpC7BqV6!bI?4$6GbDYoIbfYiM`~vjeqWl^v@VI4uGO+*HqxPGE@pD3w|K-bUK$__& znlzcqo|#?-^AW=f=0n>tVjmH{$mBsYiuMuVQgyLtHZ-aY28!zLSW$Kq@?2X^lK|uUR%EW+UCtA*UW>TjF4c&Fk*w|p$OPAO%2O>XB$B)miQzsdrzUw$2S{6tyYU~?3b198SU|%qwfKOBOggRFeS;Pn}xl<6_y6e89nI;D8b7 zCbp2IV{Ab47cP`M{IFoYG0qqLxu@&ZJ7*E+ZzF97%y;=Xt((YvoX+Xf#r^snLLDAW zU5ApJocU($9nIhqIH#P3_B(f;f9$c<(DpSABLBSqOq`FR%B^(VT`?adH^%wYpDPl02>1f~B=6AFjD5lU#P}Fp z$sdZY+u>ZvN02{aIC>W3OM1YwJb*F_l#owdw@`JqY}&YO%jUw}rOdAg<|Ep-W8W!t z({XUF=*&f&j~>;}Kf7?~kX@y5CIE4uJX3w$l8A_FyxSMdC+xFs0$t$&@QL~7t>XOR z`|rbuE$2rlmZol6G9QPxapUEoLr)JMerEV^Ovz618BCcaB%GW)8H0b#Y6db(Q<8C` zv1WdGPTaW-XFdw75%bfWgC%TUFQB--t*)v!4&xIW(%?Ac%RrDT>k-4o}W>GZ;T29l$hcEo|pOblhzI)~y zu`eWj%V?i~FBG0qk%wzL;m@YU!Ha-^2A4W<(fIl)R!LzP*dA8C67%W5y z`36;I>z2*iw{0yd+}7;9BA?0!IPDK|>{Dkh>(A8(YL%+`yyWHyK1z$8mVU~nA%*62 zuU?m&sa5_Zy0&8%5HL!sMh&F*Aa934eG~TWIXw01xn;gnm5*$mNzTH{X8yqPy-&zw53ge`B_o+oHeSwUFC_zuh&zdbQN=zFTYBDQEsJ z^xoPukQvOMV`=~~CoK)yPRe)9SJ0n~*e@(RSEu$QB<V7>|aBo!xNYYJ8MZw1P<~NSsgOly%4;tm&bMGb8c-WoCvLg? z*s;s?>Y=D6F@OHFX?mdwz4gI^m)?JWEq`*>TwZ$<(>eStwDeiLAj3>vkYUQ(O;+kF z+5gv>j|ZcJ`IB5R-@C&2b?c5B*QFB)8o05dH_|v+H6F-2{rj&!f4;Q%2DRC|`NW-f zj_uPY*SW4{%a&k1kd89@^aw>t;30WOMEipIm;v^Q`9|!6`3(DZ=06jel*q~~P(nV* z_!Q(z=6f3MGS4t3p9kiT=s_I-PI>dZd?=6eh5GlhpjeW$^>j#W32Cx2(n zGs(%N3kTmyu51*78FmIfXcmB6BCOKb}ccHtRyb3BqXGyZQGK6{j21K7k;l& z1xh}Gd>AobV4tG>(j1<$m`~Uj!hY!}EmYUHWPW%!ezO#)>Mm}W$>)}*syn&uK>0P4 z;(74Fg6-SQziamG!*PQ78+e>=<}-YcIj5osnf0& zEq2tXk%i|cY8nV>!TdvqE`R*-I?!Iz(ll+Js?F21RGWskb8Xrjn>L#p;6LrXrmKwH z#=L>~D98im&ownboR8b1*6_-5v2+DY}!MU9IBOZKkD(dh!xBTRjg^`gP z&zxcLfbrF`}=gg0I_L-;=!ImSE9ywe>KzhK_ zJg^q3&f46K>(_1GxMAz&P20C_*|~jNe%{Ul`}00%wwG%=2=Wcsw+s75g-0Kp>nQAl z`8Dr2J$8)wx<;DH;>E=ueR%2l=Py0*z{NZ6xLC6$dytlH$k@Mn#~oMiytDY8dx~q< zLSTPQ)2>ylSmNjR`>nTLt5iu~-x%!^_U)m5y?Pg1==liO>Cr0m!1N=IIXsXo+%q4K zR&j}aF#rDh3$|^uWIi%B!TgQkVHV8q))jrntMo_Z0_My4b8_iXxt7DSAnD*y=}S5D z7dXfH2M%2N@Ix>^)yRBj?4#=1dremxk&Ahq`FJ)um_ON)`I(tV(&n=V0pJc!H*uQI znZ3`u!dR6(Z-p7i4^@BMxB}$ri@i|G*|Q6$Oxbn(I7{p?z8slvdRn@%GuJ5W8?g^D zpNxh2HZ7u}H)H8*hdWnOXi63;wsJ;%uh8k8S}Vh@7Z(Cl%h#(UT?Sn!*WrO0poKC`zXKm<{N28 zkCrBFQcI-n@7Qt5MVt?w;{hw|5sF;W7oomTcqrN@g@@!FXY7Odk;MGxo{1WjWS8+N zS7%g`9r%*@5`Z4)0g;f8Tpe`X=Iz*CkhgPpLH^-`yFPq>uPQv_;TPUywg@<9kTD9U@wXSlzT&ouBQmtAS zVff+j)?1fQX5R#>{Ji3qUOZEy#>v{X$WGL*b-Z?M*s(fwj#jBc1pI_-S z(|xLB`&3!Ojh>di%JFIG?1AZ(DlN0wGJt(#={qwYI~_Kx=<&x_+cE!N zuCoNwI&~J;sk4aN!rO04YyRyz(*f&o{v=wRuS_wQ&4cGf>*N29;C}^ zGH~XjCwEi@uTk0AM~!9&cN=#P1p)T`=FhurH#0L4MNq?V-Mb`4+0q*wG`T zw&Q`slc&s7olP5=s()gZXzt+Zk)tc5rE@re1yc9~WxfdH$|C7w)Qau@19K%xc3f*RFk~PMs@F zniLlln7CbX>C*3=I{l9FYfyVSbhws~APoHitZM)kACH^GRg_#dat8Cb3rv`BIWF!} zLIT;vgao^#q+ERNIRyWR^R1b`VZ)WdgU=)+oJ~l;RM_c+gwyfyNZmX&d-l1)LVM4J z;26yyMIWX0IrBG%hdI!Xj&SD8liz)}yLofiE^YJ(ZwFUBUHFLhQGp>R$HGucbJHe!Sq5q>5CM|6 zYlk@hcT`|oVNVqle>Be+*EP}sjC%opqROVqmB4(5 zI3JCj+Z(s$YJS?Zqjl>-2m+`7GK;s9(9#n5|nZk~W>UrpNj8L@i^qZ)Cm@c>sK8=7azC&Rje5pN$?p zl9gFt$(aUK2SL8f+wnNw$rHxYo+!wtuG^hEcID>*_(g?#jvp)f_(R0`d%^rl-h_Qi zVc%SJQ?JQz+-AdAq!YnG=6Ug z<==?0B^@v_l;+Mo!=I%E=5H}E-`M`T<|j@(a`)X}K9rEfp`nGSX@=Du{B>QMGc@MR zN4Ldfn?}r^XZWLe7JHX~Eb`P6m0 zdsji>p51$k3Qrv0`|*eSbms3D*w=G-xUf%kczD%K=TTZ-jQPjE|NcBGJrH;D zl;Hu9kPjuFRGqzrd-m@wI(cFrXFei)W+vwIQCdjaH+Sapfm-_DTrbA_e>`+5HI)h7 z?k{$ytKVS(t&8cL*$DEP{b!iZa(LRbxe3e{C%b3Q6@P!E^lahG-x_A6=R2Hg`QA*N zc=YbOQGS7Ca^8AtR%)t!c$C<0`8H$GiZg!?R2`d!=jVd?X%>wf7a>Kna;1q}*g=&Z z3+D5r&7_o+U6(IAcgj0#i55@UWV1DxG1gJV9 zKa(r@EXar2BOxF?;AtMPC*(ub;W9o{oqcl}y z_55>#lE*OSQ$jv~PfGrX;Rz7=J?(mz=PNycG7BbEXaByv2M_E&ed?fr`K7A91@_&P zU-M!EpnQE!zTM>PsF9cU?=K_FbA|vd!Q&J|LW=9vLqdMB%9wu*eYoq_yJ~(T%fI^$ z#o3Utf4F`7vp2H7<2rlyUYRuMOw*?OyuDf8zN&5o^D9^0Hgu>%y#mLTuH)U@xhL+q z2jv$sH4RDoc#6uhS6*2k8@nH6mak(gE}EvK?B#L(3=Gu0_uly~TG^mwv#%N!J`o?IVuy0S-C+5HK+~Ab4 zEXX%0`3UldCrIW?1bUnYL_&Vy?p>tnAjm&-@W9@p9iOr|zo=pb#QDsG!UIWPWbErX zJQkz0?2l0NVq>6?#`C~^r{8_&Y|9qsd-S+CXwc>8D7!^PU5<*nVz8@GQFe=rEbiU= zYV+n--)@2jX(9WCGhfAktZSND^3FRY0Rgbz`Se;q09kQB0PHHaD**vymjeP|m$+RF z2)Ov~08B3g1e_;p-TFK)xyc4{LL&N!?Ba1dtsM2!I_62so$A@mv}mzEAYfmh-S!3s6zNtN7(lis zFaWk&w_Sk&umW!RfdTogTIDxvxY^GSgSM7|`GkE$`*rKCdh^WmUD^gz{_bSz5LPL#S^Q_ARkbow>$npJCt5{O1RcOCBzQe3J12d<6N4@v$Cv!OIi& zcn>fcAGtca3X1mZ-d9w3VBcN<|H$D(%a$$t^y7VgjQK?sIrF)^Qx0L@x$36Zaht|+ zc(|Rc=*{Zz+);zgUtR62WFdX4HAbh}u+I)^@NsQ0MGt*UIUoBQS_PMan zbRK=iI5-yryJ$rW@5RVm7{-4gl7~UVL5dCxbr?`unU0FxnDCQCRe^n`@KD|+W$a^c zE^0EM{2I3+JXZyeP~=9#atrifxfqj6!*X}%wbxLGhn3wd|=6XCr3w3x< za+6ywo|CR92;E^2>XDNJGmXhbLOu*Yh5&-;a!p&Vsh9%$B=V$NJt{p7wKwr-6~lns zIfi;`vkWEIAmGEMVQSN6f%!IVrplIp!xOnpK)p?F;}iv!qA1CjYAOn{kpcSze3~NE zM>j4_3HwPXyrw7#{IMy}(qVCm0`S6yLDk{FPZZ4O*cUlGB6XAUHc8bN(LP}xaXuL_ zKf0oK^b5}q9-oqgAfHqnlJNn2)ZRf|Wse7dbb!Zs0A&_Lt`4a>hYuZu+B0@+(x;yY z<`+?@Uxu)6uDV$&?BhXN^jsA$?WD>`3nTD?z+|8?uuMjj2CiY3H1(3IUIy?LHoXe$ zGv+&BpN-O@!MS=_2K*=h_F*7DZRb2M^?>aQ@?*ci{waPuY%~F5zRRkcrO#DS)xF$` zR25dJs(Vz`Sciuyu%Y}KRbV6R+oOF|-2zk8&5F87RX1tOHmd3djoErUPKz6!tA$7C z!g4eKNy>j#BbhjcZ+3C16_xk*WwtZz`Z-@2=}A7HqpJu7zN)tE+2SJ6!}Rq4%CpLqhK@4T%5u zJKwqIo?B)zNrrXzx#M%6*$5%z-hJkm*Y|tB_ekt(4v)`rt~SrbmKM6#tok*apXhAe zbe)_VmM7=#3(?`Z`$M6UkXd4eN<(I;X_keI@{q)S1+Z^0`KfhB>HKKe2lE;G_&MUm zKC6AgzF@wNeV^7%lr4*^Si?S;j|!e ze1&~5pRo_-Gxov!T4J9oH~m{T1@og}AMkf!AHa8E-|O(`*r(#nYGL0p_7(PZwGT_5 zFJ0}!+p(Dsk*{1G*!TcGs+DmVdf)+&d;o!a0_5sgSsexZ_uqSum=A9!t1vx6m%u&; zcwEN5yM8Sy_MNSp>Nu@%cn_VN3+9LRhUvg`9s8yJ*hhIjun&_*VxP$m?BfGmF-#s{ zpZ`-rseQse|EGr~^)0n;x>`5Yaav$LW8Wq98T-)r{$6X;j)xK<6wpur#^IcM3$3CP!t9<~Uu@9-wYM+76uWamt`Cjak z!^8D!g84f3q4W7tY9E%qGWOZwiDB%s+GpnDl^hcGZ_s{`QU zAeF$q;gdHU_$R;{SjyFblMj(^F~5kJpAmt`FH9%3&+}Yb%jNW@8s^eLTHLX(IXvD) zEzfaUyHmouQ$o8zehvH3`B3{MQ2VBdSE&6mlT02``@-RYvG0q0c6fB`yBr=?`wIJP zEA2LYu#Y1Y`4mO{TopJxZ1Tj3eP4%%)jp&?d>)ee7WQK}JgoLT4iA|;EcGWVW4}7s zR}K$J{pi?tIXpGMzT4Q>urKT#HhFA^2i^|I&khfbbg`KWoo_ijFm_n&>r!7iJRIX; z=D(VjO3a6!50NjznYOF*=Ig`Wcx`Cl0g!wEfqMcDkuRKl5A)YT=Vw6eYYvYx_MNSp z@;EJ&@SuSWM<}9!jU66Qzos9jwKFAzBNTU94$q#H(4KIZq<#oe-vsa>_{rgc&S$9~ z0`m>s*r$0e+56Qt_I! zFqh`J@fiCqwXd)*nJ;v{OX`F9toCCV`%3M*93BYmizj!B=ZADyyOj|W}01V(0;H_KT$p`S| z443?T0sOSW42gX;%!R$f_CA2m!(1}<+1?R8kCgh{w49m(k-t^eucgQgk3aUE!(75X z4|5^-X_%|9FNe7l-IO*yhc@xb#y(tqHujO>;TVs~@c2r7W$f!29_u(Q(}n$LQs0%~ zi9O7v3=ixb&hTi%-0Ej|q{EXKsc&U?l0lmFHm(#Z$&U(0Vuy;7IFR||( z=2nS)eV8k;?VlXIhIc;dvq4Dbj`pRg~|J8?>VW$b%1JZ$eMsgLw#oY76! zFn5GC%mw+s_h8?5n9I3MyvEM(K;-k~9p-}h!rqs_SHoP}*!L{w>c&23&%kF(KQ|m2 znwHAUN6o?;Dxb+t{_DeD9X9lpfWQ5dw;#YKfSLskc#uJRxQ5n{arb`AglyT*_@yTwgdm?CP-K zuQJ1<)V_}W>SlN#_+vyjy~aK}JaS8mbDKiyv#U?JO^?Iljc$VZQLwLVX?YwTg!W@6 z_FdTbW_WaCKL+-zo8f__&lemXmik_YM`2$LbM*|5?=TmBzF_`^!_ceAz~_7>JNXd# z!-l>bz_)zzmIM9-l&hnhe1<==nmxNAwd@;HuS10$?mxsQRWPSjd7xY_3R)-<)r}M%5)#<6L ziTQ+mc6cE874|u%ekVK!K&hM zZ)y2rUuAgM-r+?phr=TRJe=VXJv`jWU~g%$w~q`DF7G^-mviGW_M@6S@i{y(u&;8P z)$L?(IXv-T-$j1!Jhx_BS{&VESD)khw!`Bw_Emb513V=41@ni3`8XR`z9Ovz&>SoNbO@wOGy1_*ss#q*OznUmR3xMC;oYEjKscf?6cY@pGV|27h79e zuIQ$(vCp~9>9UZ)-qM1@1D}V7xzQY+XjMEe?0b!U#PvC`&jC*M_N%p=%MK4G_Tya6 zl^GswIhVJzq8j^3=i>}?-nzA19a8uJ{wpuN7`%6U@{WW41bjt4eo^@O(D|z}i1};M zz1SBHk8?RUj@C_UOG`+7Y3z&TT-ZB2&n4!^VeHd9m!g}rs73W_ma*^J((;|>x*Q%g z%;g3)7xwk&=2p|F{coCxh>OAD9pqE;fdw4||LCHB3Y3=W5fO&(8TA4wj4o~ukA zU+lY#{b*ZSPGeu0Joa*~it)%o22bmzTk3~H!_rcRLFa$)-fQgYST;VGPr!fq??ZzA z{!iY2@SXrO-}|aDe^q9hl=|tg^kdF*xsahc#y)kgA-$=ho0`KTa+}&bR}?Zp?dyqs zq4te(9s8nzjg5U3-Sl;MKz^0j_Zs_@+qBfaANC!IeRlPIGd#WykB)uVJM8e--oCY^ z6=&;a42MUb=T@tA6KxDo`>K#ZcX(8K(--^7+xI#=$Z*2sQNvuo-!;$WwOr?NZcSP@ zrLixW-*njf@1kY_Tel8Z=arX+y!7G=!F$Ii?>OjBz)O4n6=Ht(LZS0lWu)oYXJg;m z(u&f$sdqA1Jv_9WONsq3jD56j$}KHg%cZNTd9GH!MzsvIs71!UD&DmEmZLd5zSytY z*cX+XzRS6BIy}sLy?%{Hy83>u!oD5ax3Ev?&8k{AIk(9sk2A5)oebpg)IPDV93C)V z$3DofJ3L^14D81+_C3qFw$zVh>~j^5)w;=2p9>jY3x{5V&cC2(?tAZ0%>surm8%%wL(A>cGA=&qaDibTUL~-9-IbYKm&z>@ct6YD(qf%2n6JW_wyOh?pJaaUfS0^uqYCENS(%Xv=C8D*J{kLh`4rtGtA`e~ zu$;?PJQU**1^asICYA7{M!@`yvWJJT&r+Y&K4CxhaxTcP4RbNcbuu58zU*Y6c`ke& zv7AfBzNlqjg&)n>S5hB6Jj&Qt86JvmYE8>rurJD%?Fb4~mE#sqd&?Q}#}@3{R|K zZdLVbjC}?^XLx+2zCO%lW8XW^RT&GB0FtTvL%B|LVm)AEBtEzHf%d zPV5^79hh#NoV&?7Id_9$#JQ#Ab@iiWcr5J4DA<=x%hnOPdE(?;K0FsE=T0_dZj)X8 znqVIz-Izxx0{dV-UyiuGVSH&ApK0gk(h<7i6kQ(Xa_gpRn9CU+Rmf1oBNT@6+WJ1L=!z$2J{(M3D%x?}IlFO|GFq~wL?_uqg2zRND%UAK-riyTrv-quYO z~m<7?VV^_TBP<_=QnLK?5#I= z2}|ZPS>yxwFFg0x0KVmuw;b>%5D)V+(|xe-x11ZbbyF1VgZ3NhWNzxyd*>&gl+Kt@ zQCN6z@1DxtyASQ!b*Q-b(5zVpzWk!>#vAq>(Qt=hK=4y1gR6DZe@lzjayi3O#{0Qd zWq4Tjd!;@*JdpZI?JMkq`E;C?IK6$xDJSn5{6xuw2^GtiAK1M4&>nFY+qWNDw(P)< zKbAfB+};Z>#D4<#VqY2iF*+HT{5}~TUx&vKz(4c!#e)VFj~Kpl(W3oZwp8rdb8z?W zgIl*&tXNT+m$&=nm)AFKIv?1_RpSg#b&UP54daWWj{M=a+va}#Rq?`wyEbkt-L}%BOD%%ZaPj&x3P~5k2p(98vEhU z8)>PpgZXd1&aMsx@|CLt;6MMjX9NEBPu_k2pFm9JuYu0bOrt9!9j{XDJB@wS$q=h` z6B!=ESbzHOwhS4vub|+-FVX*5yY}E4ua`D$0>(1-UB*5RSWzeEVwlTG9&wx&W8X2) zh1BDqPGci-;ZyZ2zUZ|T5+%AbDPdE<=*4H`@n*a!LJcX-BxLRi$IX# ziwqCan|g-Fj_Vu7=dD^!e*gV7#lEiL4O(!_G59Znk3%NZK1+Qy%+(zp z&hW%C_A$)0=DA=#VPEKcX1AAYd^mtQ0*D=Xd1)PLo>Z_Cg9gTTI+vkIUFMjCo*w_wK8gEh|-M)xQVVcePb`w!j|h==)E=>q$n6GS0R9QbZ+z>`eJ%tk zCg>k1WPSHtdG;yrQXFN=qIDC^4Aj6DGWS|KU!^xm@XL;U=R7y=<=m*z%}{7h_wFTY z*B-)mtG-G7J69j8--Tz@w=aAu-Pq@ob7Abq_V!`#_&Yqp-jQ2c0Dh}hE0!;pz*m2- z`cBjb_$~fjV1K}Xf|Qgg7WQKp`%zEM1@=W;AIx`S-!Q&8{j?ufudd$z*Uvn&xM2f~ zdfDWujj{i>j(uW&7>t~;p|MnQf;|cpNV;=!dk=WmhvSlvV=g_`9 zND=c~3UEU0v*7o2c=!lKR{N0pO6@D`v(%TZn>jiA^YTdH$A1g>A%pJl_+y_>)H00u$NqNl)TwdNm7g3d9F3mRoGY2O=n^suD(1r*Q@r);gKd! z=*`r~8##@Ky_Wl;vhgvx6|T-Re|z@nzdrL+(BJ>b`w!j|h=ck0tn(TBUSr>{evP)Y zD6t#YkHR^Xi;4=r^%!5IJeV^aGeV@H^lbM?eNjD5mBseKbVpKJDMl55X% z?M1B`82f6TOU%b{U4sXg>#(ZwW4>7!>0fV(%R1@>>I{+*L0duQBn1O3h7Ue zzxnawH~rz95t`J8=Jz%BVehB|)8DhmZ)bQMt(%hhxx6~eyU&jt1=z^Pg{X*sv5)=g#XL+$&_bDK9Woj*Tbk*{uz zZ(--oLl<96F&^s_MSY$d+u?~i&ozvNXPl8ge?Cn})q_%hZ;TK4lUJ@R?cRMEq<-`a z51*nKZBYx@AIaDk2d0z56EcbUD+=~j>-*8a)&Bi4U;ao8bA63{eM?Ii`(f_kVX2Sw zj#|zohX-TaaOkbn$eTHhbBDc3E4LiZL{^7g9RUBCrv?Z8{hz%5;5~tu%wJu<4w%1O zFh4(2VV@lyZ06e7r-J=-0=}H*W}s;~QfL~5O>#E#1C8lZsx2-N=g@fCG4MRb$Y zJ~b^<505-N*T%kfcy8^DeSMz0|BgG#bUIb*Kl+~^uW5AB zx7IK>8umqihoruh;i0l+PV58wl-m?iUpPFZ^WV%lKR5T)=UL?IuFf-0J^A#LPXzBB zpSo;~x@=j!x+WaZ(NSJrIbuXbDkb*8 zd_Gh~Nqs~&$$Np+SIn2LKKVRC?USo7>(^+QD-TRp0UkE?b*Zm98E84TL4(pa-l#cU z4;`w^%fn9)WdF*YU?=jSIkMher)_t9^tb z&vOmqhx_keQdU+yV~4*~4?AQtm`K<6)4Iv;9ZNPUsmuX316*e?Y0AADd(Z1}|fUG-lF zrKOdhe_k$(ed+28sm~qzG|3G)TQ_44a}St~BNVGx&OQJ9@}GXHwy~q%*M<%FfkGP! zmc4_KE?fFIMbV}6V;r6=na1^b#c!%zUsuX+6%)V_77 z3at1azWHV&<`?mN55AwXcJ02lZN>}cb0Gs`UpMwW%el7NhtyXlkKED@8lKENb%d*lP*ymxcwxz|nO;_tC9heU0KRI|O-uQ~! z)rA0mzWU>P_Ex_5qQbs7LXnJpxtyyqJTkYbhPk46Q{*;X!(7O3I6Q3fz~KS;eWII9 znv_qRNR~y7{{i!xHbo~}?D{oj^2Ct(l;JUqC8wUcV%|J^532^>SV4YCr;Zro3Y~BF z@Zeuz^1$AS*WqDfAFqZneB{WQGoSx2zU5}vO~5<1ZdO^p_MxmipMEL< z%m>pH_IK^7d~^_OcBp;rT$Pa6(n568Y3xJl%PlQCt}n6g+0s&i-)-zSX;M*p=I7*q z`8Zc42KHSh590d%M<{~%+1ZI={$z1fx_*RW4Y6NK=JSuOn+JNC?=|-IP6jKt>4$xI zJCVrS=bxAR^535Y^BMSv>pcA=Tpa@b;Kv^gJOGjpAaGA0KISXzb8geh@I=|tvRXIS zg82{JS1UhXSFj5Ekj=N>Rs!r(Xg_3+bm8!aO(ga?w2Aa49j)SP?9=(Vw1mZYS5n{K z;Yld-y~k-OZ=adZYM)IW|1B-USej7g=Sl3-FxNHDjpsNmuHu2tA5j~fuQ4C7eCT{# z>gx^-$4pSHBzM<|9u?=bUUdhr?E)q#xfH<5QVRRf`J$6SZfV6b z_C*4pwzL{GI&gW5LswpTs8y@V^UkZRTNljtad;BMeAF*{kJIvv@l=8Rgff3h$N=;6 z$kLY%PyE;)8w$xSEi|wtl=*yGI%6NeS5hB84s>H*kMRiRGxn|4%~0rFX8zxYP*&&J zr=P;y7B)Uaz5xECk3afI-~o_)0D*e~F`2(ocsomB@6^eFvEx$va!X4y_A%9!r={aW zEnq)vt~uxI;t?ZiHEoQBeUuotYr78z7sBKThY0w>*jLNBgnjpxmdU*e<)@xnaou$X z-+uehqD7TO>naNhei=Ek^3g{tFTS`k5@A>0Hujq|Igki%hncUi?`!fv>PK^U>`n&3 z{FRCD^Oxgjl@N~80`uum6@`7+JFy)epDLaNF@KiG=Y#o-eOUT@MZ-SNb9p&eNqt$! zAejHodFST7_`*}1)lsg_6ORG-gB}}{WPad?m%QPsm=EAX=QH-1`Ox|C8vALHHIdMo z!H<)rkAIX(=YOs7tNJ?Y*B@%$Y;Q(7IXnXU9NiSy_Za(B%V1y-PP#!qjGX|<|||0o8F9peH^NSE8j4dWoOr(`K6sY;!A<{ zQ=?!%T4LY7iYIZ*2j2Bl6usDo)YlyzR{K82J~RK^b90A0Pc;i1&J?cBqi}Tq{6`;o zI7xuy6A0Q9h=ch{zf#`5g!L@^)6j$l5i zeaqpYNp3V(U+>{@I6MhrJ`Q<=uxH@Mz`jfBTg-QKGBESsJ@*f}&;Jd;msuU<>OA_$ zUmp3(LrDT8pFq%_Kvm3Nl9djfPg0+m&&Iyk(sDRFS(M@N)~~Gw^HWpS9CbwDkRft8 z%b&Im^v3ls`qx2#^TG@EMpA%%VeE@S9$B``%?u$7d`qsou6)A=7Ww+0w*NZoAeYK# zpZ&!&rNh&t$)Sl8Ywhhc2J^Q>(fKU(^?9yxczm#L82Q=Rs}rR2nfaE~m)HmJtF@(t zVeVMN7?lW}&&>bAgMH=dv#}56t9h;~!}Goj?T6nJI-i-3xh-dPY*z;&f6&88<_8UU z$@?|pVE$rezH;?Nj0am<%Gl@STHEW)`COvgEuz%dK>vD%yqeb5aarBP> zZpH468xNg#E-W2s>`Qwm#8RJHHw|O|=+V-~x7hCUrTMq=@_xDSLh0}%i1|B-`Tp3C zQN?punUDWu0PUv>sc&K5k>ROwnENAWFRn3$F?!_4t+m|4y?G#*4?l;QuefdBAAe-7YVK6%Rle*&?Y->~jtFh7gD9qI5u=N$4v%51$j(j( z^H&%KfIl;2GWH?$HHU{XJk>Dv6UzL5JF(AkeLb-csjoXcz8M}c{{v$FIl0e1O*IQD zp9veEf&b8he+uARK6%Rle*!U?zXCeHPWmFje0F$fn5#KF?kz2CIafG5&elzsJdFK| zF5K|rkF=U_Y)cEzWI@GmNo`)A?Y&3;V7N z4>MoKzEA5WF+b(~Kb$l4*{2BjaCH#KXB!{P2k`&=rw0Q5_D|k^0G~ir%wNRJkB)s< z`th`G!qw+XV*m2y8>dZ6aA=>nY#BH2zEghuajf>Ke$6!Yrl;+jHM8nUWc~unmshrH z%gjF{bbgI_Pk&G_A37h**RjtIk1~0f`7ts)@>~_eSecz&mYo9|q<# z_TxA9xo_FAsAU*GCP?Q)(lhgYrM|Dj<8SPT%nzaS!ThJ6R9PL{)dBPWbpL$;fBPqI zKY&jl4(2bcli|faIXvQ`3m!*kSXUHL`&%28X?5j{Pg@ zOAQ)S+UO)KYo`~VU!dCOHZKVxCt45eJK*_#5AQ z@6ZV+plO*-&PCsHQ`*U@@i#($aL10yoW__V1Ne&hz&wGPwzOTdMYwSbk^W{kFhs=LO zQa(86?A)gYWBrQK_`0hD;6HHRy-5NjpFq%_KuqQrkheq3Ur6>2OdewXk}UD+T-1WG zqgU}zCj%$;k_hE0ZHkUHpC1tMe<#rS zjT>VTcqf{lEmxn1xf=Ezp?!Ux>t56{jMa(I`OJJMd}h9q`p#jltB1$r9v)@y*bYyE zm_N@jFt`22DfL~(zGA*w&gB4)&U|usnEAgyD|hhY4?*O+Tpa-azI*OY5+L~mg7yUB zVE%%-nHu(m!(&N(iG8bnO&{i>kb%{{80PYU=~m@tCNJlT^Apokq4w7{tiSH|TV)U1 z;b1=a{P^Q#_3I(PL-X8}@ZJXX_qS?UI%rV&3ojga^2v(Moy(6orYsyP3z=o+;bQ(S zi}_0D3+9u_qi1+j$G*R@FPL9cYi~zqepOOm)vporb?nQVK_(J60on(H{aF@JaJb}2FZ%KVI%q2_Tli`7@5B|G`xw3~R zD~-B0Go0AxLsg*jaa1~E|L7yv{po&rkmBLMKECFI50=)+B3B=$rH71tsgZqkvi8@j zTT-`9Nm^QoX`)@f)D$P@hD=EP3P^pJJc-l!h-(t|g{!Y&-^bNwW8atgg*bgIJG=JG zho8UNFhKjcLhAcsU)$1B*eB*Q_8rT)iDN!gJ#>DRQePSSF6_h7XTcBVe*|v_%zymR z2ffU{@1A?^z5A{t1d>l6Xip#p^ZWFKpTD3^hS2#E`>^-n@VKx~0Ujmwof#gze$8h~ zi!(e5`^O$r^u%KcK466%82%wrQc`)>ouwJ+gnbn3n+E(AzNmE*%*VMZArnWdKB{h^VJ;{3ZHGrNzj)p}8FP)_V$wgeAYW%b zV;}a8?(q0!c;uFrPwS>(j8Blx2lKzRn9svp{0d)sCxh4FQO3T+zGD7kgZ`w99Uk4X zv2)K|N#+L)c**;XrStpr%s)afzfPvYK4*Af@AGmlm+-h;eOCKi$Y2k1#Sw}E`_kdD z>eqzSXY3=rdE)U!FAhm?{icq6OmAK-x^=s=4V{LYJqL>fb&l6JL7yD{UYnpYI zmN&!0mj2I)X1*`>!F*qb$KTiwnIA_|{&Du1LmwOTfXZj`=oYRHME>1(-kC%|@(Bd( z2}INRPe1jz#r*koGZ#qg!`O%5cf|E6$zx^p^&=E%KNkmSiS&-ux@p(1*>Qc?J3{Sa zo=YZAYIyA_jf&rXEAjQ4AU~`9b?XlG=~I##0fG0@nYre^kacoy$OQI<)VHvYd2T|P z9~=9=86LO8V;F_m+3ORe^O^aKeb_s`4iCt0wQeeh$7Adp#)L%Z{P~7~tj>3q)c19G ze6jCxc*x#S%zyNe2b7b4|GoDz@b9_n&b#ioBWUmUL9nnouwhXnO%GfWAgx8*aYVqfv?bG$S?n~6>W5)in??0yY-v@soSQi2uQ3eJe!h_UHNZYQ zJkr}g49sV#?~8rpIk|O{9Ui>0)MtA~#&|;JKO-q0opt8WK@Z=rjc#G%-woj3edir_ z-hO+M0Ldp1v?mY`^Xp}5Qa={aG0c@}U&Fp>mIz}X_D)%%nQvp?G|;++=%<XQ{7uGPoTc!&sLfoxj#F z0DQh;I6NL>-{&~3Nrv(B$dTJ?ozI8PhldZH&r;vl;eodw!`Qd6Z!tgjk-yvr<||hR zHa>uV*B!UtdHZcad%q{|H~9pjG2ilbp!4Sm=DV=(GWOZwslwP7%emHZT7-Qh_E))< zbEVpk6sCp?&pl_u^uy4@rm#P0(t);Zq4sU;OQ{cI-@?9Olr?QyQR_3%k&!4K?vM!I0WufxNMeKPh1^EV{I+hOJ}s6O_^;ki7|m6gkhW&ZaX_FWmC*jrk< zvHyu;{=*O5i_xvi)q%*r{%=z`m-qElh)!27o-;vl? z%emS-S8Zug>!vdHL4MRS0Q*fEZ&v7Zp;=TmM6%wJ%dU_N6X zQs3q9=rJBO&js*toR&CIi;v9}-o9Z>OoYx~U>JbBVt#C6UpYLS+jKiTA@h?+%ExD& zk^7fF-vf~^!kMG4bs$%{;q4Pc1R}N3~<=h$`r$xp-h4wR2 z3xWN#l){TI+*n+c_#QS8o3W2lePADD%ZP3Y>`P-GQXkkyV*i4s<%wp#Qu`d^@xwlK zujvktIz@54VH9O&Z>*)x2lG1$=7-FM7W3KI$5fXyJTd0EBE~}wkBIRI!Jkm(|Da(X z_KwTpVX5zOczm$0nE%j&cdKxwa&>OM_14>NNisi)z`#9$IG8`TUMB1v#y(4ZtmQ7C z0FUToP-V+jCxaa3T8Vvk4-dw;6y{0OI~nX4kCggk>=z2PkJok0FWa?e--SnmSx7n)JA zuP655>g&e7hJCVkrX-H}AUrc)seLc@Ra`%ou^%!&h0Z_ojNFI*L@T!lXL42tHa=V( zi2Pe`zBzDj_~Z=-{t3jz{0xbGHujO*v=aM^sFp!%-LyIxbcaXPuW@2u<~A+t%VDlC z_NBe!#6Hx1VOILO)~z>}l^#ay&z*bll8ar~r%5h$wGyQB!F+suo%x*M;n!*y`-V}R zoxLd$I$tthU|)CjT@DYr*Yx@|YdIGpA5!0%=NiVOM0h(34FiN9?!>+?^Odo$93FT( zj^$jz{C|S^fBw^541CV&u&Z<1EjQmXaA4p8kbD4vdjc_;zwC(m@OI|blgx*!FXy=y z^EIh&Z)w?yeUxpAGt()npP2^c>xq3H=B|Y7=L`>xbY%|@r8jw)D;yqRe_h?Ib)7nH zsT4hIhr`&1=AW8(FsCuF45^RMCcoleOD?#eY|^CK`}v4{(pU4tzV9&CCH3v&v@GV! zF-QDk1^qLHw*%%cGR;MCOMSm16v2Gq>L-r*@bJCNSH?b4J!(1E7yIn$(=gX&{sVVv zSsjdS$;k)vZ@oFm{J;?}dBasPe{S7OA@V7`1AB*=Z^iYgdrhop`EO~F+V^0eH*j~jRJ59g2+2lhF`lThZI4UC%JZj z$BOH7>!#YzMO@#T;n`>y>$9^r&zqMR<{PHMz8dDbjD6kVakg$^m@9QYyq!epd}h8E z`%wF?43EpR$71X<2IDlKPghk9lsx`bD?iav1A3(W(Fa zcL&Zm-Nrs-d4hC4>e^@`tVw+~_Bp^4&oEa?eZhQqJGEqfdB={Zaad%Siy`${?TcY< zv<#2S;jxx;t+Hjqn4BPQ=lf7-geCQThq=C6T0YCUA@g66luyn$ZRnrw2lHv`mYsYH z_ycdg>BgYF-;?*7d;+nVed4~fpOZaouzC`X{qpk4ciyfz;s1Evy6=LfrHRK{(EoZty|00fx;)?-*m$bfqTOzZ#eKzz?bxm29*427)IK{r!rR9win3te9_A`z-#RcII-f7AevP7=dLe_W zb&~`?8rT^7k?^|T9E~2fy=o|#aO|UX`Q?`@eseToA1kzzC)a+43+7{t>%u-8`?|>! zWlKvl_6=iGf^e!M z3H%!e30chY$OE_TZ?*ir=4Ia`#>3MeAzcpAY8Ksf(7>SH?d7*Mx5J zM8m$^(h|(ynh0-)nZH!SzIUGMUCz~weLJyl7eZfB(R};gdHU_$T0HK14n-|0DEjD(26Iy<=nFitEQT z_LbV_c`lE1Ytp)@*RNTNTIK*`pnQ1cirLiB!JXduxfcX;phA}m<%(t-bb$B?p z>5}?B#=d0!?{e?C>lUx8bK`*i1N!v~JOGjpAaGB>#e4=nUIp`)u+FcSC8R#=ow$ts zSTP=JIoH?N=jB}C@VJeAv@yWm5eF%Xd2U8TV!zRe8)nTc2a%clwIQ?m+2Q?CwfwRc z^XWu&L4J04bnN@5H{Gq9qJB*InCDs!kHo%U{`PtE zWY?(v!zKQY^YgKj#mvw5N`1fO+$x5-hA}-6I)8~_F!NdJkF@L^9_IQ+H(gTSc6dVO zmywjuPCIqzowwhJm0J#H^5_=8PclE~>f`a*x#|^m&T1N4Fk+)>_g|PVXnTV#w~w zsPFsaeFygmF!No&-@Iw#uALiukq&`GGUCu2n1ohFtrp5JD({hyMT&;(P z%9d4JUpq_7u3xi^eb_tNmX_#bU{_yYzbGTE_^KNn$;(4Nn%w3rWj$Ax`Y zT%VZ_W8YQ9!)43T;W3PD*{AH7H#Z?VAHXlrun&93?eJL3xm6sYD0F@TnQvnsQs1fe zqhVj)(jsG@ng6>}b8o-(2EqIREb`gO2k@`!+c&uH{N$Yn^$F-YALNIt1K@AkxMAmx z4ZV6S(wNV#J{%tD>cif#uunZa;v(kD)6%Jg2R@Hl&c!fyaTb;ED3ix_c!b*L6)o2= z7uXlYYpL{#LIyF+W$eog4|G0dc+kv{T9lenl$l=KyVth;`~Iu2&&>bDFd+3A`!0t^ z1$b2JrWgD6$+?MR{szMU@R|8a?Mv)ecRANEW{ez}5az?lA8Sc{dzj1E7n`}h4v#Ck zNe)lQ{C6bfi{EAE-ge6XVeEkUl+R?5e_h{eukDj$eo(;|;QI#Wy&|8P-`i$BOMN%? z>v9i|drM2hzEA6>&HU)bK4o~gibr#J1pIC6i+L`59y&#lun(z^d2S?3*uU-89i=7z zMc5ZJTmU}?_SxaVE2O^5;Xx+@bUqjClhiki?b)a7thLUs=-3ezodEs{4g2m4k8*gb zJVMbhW+sC9OAUjW@4`MyeY{q~*q6*d70mB{(+zG{2f**!yH9}M`pH`l=o9b-{>BaK zcWhtZyZgeU>d%C?Q!h&~pRq57xn$|bUe0ByA8k38Gd#XqS~A1K=}pS;h!_t$JoY@7 zjD4tmgf_X8fsB2^eyAubqxg}Bca@bUKD6)gZQ}X`E4QPt?{au}o~zWp7yDelCb2J= zzbiq^2k@Esy2DdlW1liSiDN#r{f|QG+t?QYp4iK|KGDsP`IXf9H}_Mn4zfDz>Rj8W zPv73X1Nyd4-gbbW0K6SwAK^^QZQ0cU@Heb4-oCB47nt9Gm|x$4eRw;T!((G#W_Z** z*K&BOZrxNFo+$HN#CTS^68pfuZR`{IMJI!Z>kI4`rG$#=WEBs2eotB1e+Bj}=EK-w z?CZupd>%E=trGhf=2FLg_9?YzK5{$I`2hY(KkUbE>~mrt%%7DYozKi??8DWsxv?*p z|K+Kt@)MTPszPy;PuMJhsYPMPHzCech8;yecLB*JHSuCSLClRF50$rU9av7p!4fz&1jG{ zqka|yKa3q^@@Q%w#hXs-)1oeAcr@(uNY_!n=G(fd7PaIsS6j{%F`iURavjFLV^NE+ z56#csK4HH>{q>)Jw!fnMS62IAeokW$o^?K6bz@(pH{-!R8T*LqXP>e=5#G)w!(isS zq(0`kRT}#)?1TBUM~>W8>#U|?el+ZpvG3Qqsk+yg`Lw7NGQW5#YCe^40_T1Uw4A7{D(sTDNtJ#{4>x`IfN{;7em)#dyR#7pV6=PAi_)O=R`i z+sDbduIQ%dWI$ryHui1oizE*wxpF_3n9qrQ^7aY)M;*Cg)X35U2YyZL6Z3Pxc7=U* zcv$W0-o6X_(Tx4XGJlnj`Y!A{q&}A|t9dT=bG@N`!TjBcU_Na8pDd~Gc6gN9kFuQW zs9zJz|9AE&LkHe?EwVb-Uwa+Gnab7a)uU(6?%f0Wwol%6fS&-{_?nYnw7zIv@w&n- zTMB!1gU$!-XVkAVlcYXe{mfYw_U&OVH7%3Q=XtKLu`iZ$Rbrny892JB(wpurEe-qm z%Hi=Gr$uL`!`Rn~H`&-%*e}AOR*G44>~A+rnpk#7oS%-F>wkVr*J8fHJ_~-Ref}xH zQokC;e&TdKfX|o9;Q{kq*jLACEfk4;#=afeH;g$WN7kPC%MAmZ2lJz1U(9oHs7kbR zRc!2s%x@wo#Qd9X=*uEscXfJp@6n@Mw*bHOleZqwCjjO|>srmI~}+q+5K_i5sbY^Os*o?F;4u=?6`R)b};^edoD|>pK$r*{AH8 zH&<>P$CVuNd{TZs{Cs#j0RC!$eKvW#86KZuuGPcizGkW(XrQo9 z#*S_5a}SSD`!1=E&0IG2MInQ|rL|BDbJdoXZS1qXQzt`WKlXC2zNMu}eO13^8~bUI zqDZ*-^wTyim=EmJKmTE=Z!_Oysn4#CVm{kDO6~J7moq#_?0e_AyH7b~&)ixwze9)3 zh5_I+^V#76^J5Kj#XQ&l2t~t~n;@MJ&NK64Nqw!8!FQNTYCmLtn-c!&l#_FB=-)@V zI=y?+=oU_XlKDaAUXbryb9HRsuPa=;dDB|K{OR?xrZ=dgNqx}Xc6g+*FR{bK1+S}_NCg_rGBhoE+3vN;`%K0f&J8!;>PE1UbeL2 zKaYKj`3Fp5zQR73uMcza9|C+SS6^YDx3rYh-+jUfd*{rlmCj$a>OlAITLkmh#KgXL znCpLXZlakVL+Z==HOu6Qwx#u5$owuP{Iy_yZ`kMcL*=^oY4J_tE=SznN3ZFAPQ2XrabA|_tTKbk293CoU zP(3^%v9GqYaEg}q2u0=akiGAT>vNJvv~Fs{TzEUYrKJsXX-i8l-h{maV;{{7z<#sK zwyaq3pT@pmK1`bfhIznrJ3Ls6g%Uf-|TBPqpKx8JsQjVvhp&zL+G^D9iozNw6THOytVhwPmO4Gx@i(t$>eDjGE^7gt%M zMx~7!mEbz*r2VI!y1!q)(gh0+s*!YcX|F!Nu<*bWPwYJV?DZ#|RCMym>%>)f^2xZ? zifhfuC$BmFc>Ew(!k1OQCR;Z{CdiL@E<&4>+cb=MiSTw5^BMb94|8K+UoyW@ZvX3h zf%(eG@6nB%e2Dz6ojV8ly`Q}IU_Jpq;ICP`X7$Dmt9x{vD|9}XZ)2Z~eF%Ox_BE+b zF&=m8raP{$>ep=S<6ITHiYKeC;1Q)^Uv*weRh>cRu*wmNU)({1*fMoZB=_>F`+d z+yx0`{?9_{R~P%r*q8HM!TfJdZj{^ax}FgE!qt&Bz5srgF4tTW%y)hAu7mspP_y7v z_yqhlt5xi_kokQ|n3#Y4wLKv6g^f@7Ot?B-I|KNgJ9p|7x)+^=GP}<$Cmn*u`d_3Nb{4iFTH)|mX>YuSjIjt=khRDkUy%iZs=eHeq9Nd1!KAAkG3FZ^_1@rfKce3=apnpTYzdmpvGk>DR z{Ag0&u3yumo37SP$^4UsUe~ufXLT%Brwd%2&YiBgrel)%!Ngvm@7f7`De_kn^H+fR zN0ZKS>_By_$3@*5Sven!T@JMUDZ zSYHb2nEcf~*1xltUfO=-k#4mQW8W|qj2!7;zWP(8rH59nESouV-_)sl7cJbsaidJk z;(pZU@VyDSxo4lM$#(tV= z-6X-Uu&){W_HwRe?1T1{J{G(%!>;#){5-fqn3wu;0D={{ZmSPa4JN!GpI( zA}G4IPt=0ccQKzo8M42yu>7Nswq4tI)$f15=#*0yw7g>3gAW#t|7q9m-9G)=DE_?SIqC*yBpH@E?4K8jvYH*-64=~`s7Uq`UxoD7p|4A&gzvbR;?%~ zT367c3o*Yz*3<@di21_d(Xg*A=W2<4nc-0mk2Lln^+j}3ZD~1;eV9D)w{9}_En{Ew z@W`@dInR~HX=&KEmvb%b6Z4_-f&IE!8>oKsaQ3hX=2O_-6jDD#BES5IBMz)y4NHey zol4!>QIP%b`#@p8dGodG@K_^VFn^)a`RdcA=Iy)U3fMK&u5Vp4aB?oLXP+tFyxAR^ z9XJri5jYR#*9!Y=?1#+ZDdF!LotWFVcUP~g)2Ty;4(+cF^qW6<^MQNm5!%$|xuS~4Y3yT+E9bc)x+w}7?8H9o9oo{e7PYJ`E!o3^ z43E8>>vx=%*VyMX)8X)}w4}Zk<4IG;Y1zxUEcJ!jUw=e{jsN_3f2CE(lW^85=7-F( zkWp?L<>7F}i!UB_PbuLy!e09Lv5pSE~WNu!hS>vK#4hpN!I zDj{>0xMrFL$Uo)oyVoe}^B=L8KiOh_{0@&?)Y7FsGykL$bNlq_qQaTV)q%+G(7t^j z-}K3w4)haHI-lb@aPnd7tXxsBt}wql>3lFhuVEb<`y!~%*teK(wQgF>2kJ@f^Kz~{ zODhq}xt_Sb9Ohb8Je=5P=1XH=NPRv|OLj6~SBsC+63l0{50hv8Z;#&i7_evEXroa7geA$YM%F8ZW3*fWEgJG^=EK)ije>x*;*2v5zrs{W|b@82Fajr!6h3b(2u9XL#rcMG@V^mKLQq z$=DI|Tz2(ECj+EDj2*6Jz!=xJlY!Q9d8F&XKIJw!y$O3qD_bV!^L{R2pPQD|DT)&N z1blQdh}UA%Sl{f@ZS&@l+mLVwgZWKz;O&%zLZzlz77mqPemS-S{znWQ=g$m&H~{tQ zuj6E1sq+!ncQOC*$2MkVQfMF851Gt-uhgH`;_{`77w=ckTrhuP$OP~e^Q)Blc7_LS z3|bG*2*v!~J+Hyat;p(7K2tIO>Z_7EKd9gf@O`g?tFy-8#C}P`1p>kB0rJUwpnz zeWL?!f}an_1Nc*_gMCMa$Hu-nG9~;ybbhZMV18$n&txYbz;A!$RRMnMCvQEVPXJE7 zQ{>~E#N`_E^BUA4Z%0Xe3;SxAE7o$W!aj8}$g{NMmX_#VbDpK8mUHO{MU~sMjeQOK zdi|Ol<|4gmXL#)8T+DO5TUv1S+1Q83mpUJ6KP9w2EwZ6ommLKK3C6y~d;&hMw6wA> zzo=D1nu6r7^#OiV?b(Ck2(t9KlL1@0BS-G#e11Ve+0jSA_EF5&u@8HP9iEvX6NBBn z=bl@SFel&0z?)F`gX!&*kg1qo(Y5rQ+uKC}~j|Ea+Fdq$U6x!68KWouUNCDjK71t)mzFS(p$7zZ5rrgr9 zY8goF%Xw~^)wi5lrLnI&JZ$U}_6=iw{kj|f{NV1bTZzoIQTzO7!Tfr4_lL}V;SgR+ z($Y#k|NMX6+u5}XEudJ)m8U3z`QLvp_i)#(D{oLA%*R1mj@>btOCMeC+u{%|nATbHYO>$c<7wyOi?w{6`v$nX8+y$ACN zSirYk9fx5A6$?`%@z&_unrALbdsU!hXoF z@(N=oFApZ{w4;pFe<59bt6*|Nch=Th0SxE#j5Va&p!)@T2cGu-_A zk`^uSlLGR7s=z)wJXIR|(T~$IM}@<~PxwDDzrE(_T*asm)BR%PsCo(p4# zvCj?<)IP1{TG(fwM>srMCxhD3vgf%@V_#p+a(#guwS*YPp2qmh}KPHc*M!M0KPQ#74}a&Ve<#?)7}vNiJ#B>nRvxv<@M|B4Vinw zA-uxUC+tU3_78oP*RuYX)V{i@S+lTW#s{XWGt&wCPd~j?F?`abec9Osra|>1CH<5Da-gT`*t6K4taU;fa4yi?DB+W5VGPV1DOL zS7Ph->Z=^C4uIe4ik3ls?&Zy-oCLQ9GZ_SBYplOkF4LeO#&Y`_{=lw>(!fM7yv$Bg88KKGiA?L;7-!kKL2L*%!*qE*WlR|NB2pS>z?Ax6T?CM*c41|4=-m#epfqL-I+J&WLwi!fyTf72-VstCu71xRvPr(mFBt7F`cM8kf0MAe-g|G4 zVc@7l1$>tJ@b(G&J$n}8hXQ|N_#=Dv9DM)%9qrq%O-qH~pLf<-%Wu76xnBS>GTiEzKx@8+5BEMyemPzIZ z8GV7h_Z9ie3zj>a{3VMPE?T%CfBF2botXKP8VZpwv5)kQ^m$@n-x}uf;kk5RIz&Eu z`x^H72t^J1S`QDkZnC8>9Uh+y4~!k@?T8~3!GE7&Zl=ocP`kduJ}>7=W1n69j8yJq z(3f+`)d%x&rGz$|b;g!Y|7olJXuMSbpET*f(MRnzja>kKN@x$59}e%;UH$a5{rBG& z^#GA*U;kJ6b#+spe6lw^eGQmznz$79d6Em}FAs&5-*CeQ72v^l3%3Vt`CGRh*s}S+ zjvWW~?mdVGHgy+Emz6YchCevVG`Mw>na>UnTz$*ful|;nWd3nrew(Y>5%VGPq43$s zhp}_T0%Sn>t;#(C@3a&;ChS-f!Zq6Lc<&d*;yuWP5NLg!C9f|$>0AFjR#ZIaYy zwJ(=*g{!YDed^(fa)cr=pDle+zov${(&5ptFY4DM`DNL%$JLj+TB4Q#g$#~P263(m znLM(9{Cx=JFzG>{MU$109|8gwqDh8@AqJHI9{J{0iy!z_yBad8X7^_Va z7c-x+&y!pPcu4A}r7iE@e*?!{V}J9bNAJG$kNM#cTpb2JUX|3>u9X~=zb!V5 zHKvIx8un#|Co?m@S+l~o-r9)(kMCc*a%I^=55X~88V=Ds*D70<1^XU{M~iM!16y3f zTxR}p$K`ft-`eZsUvYVh7R@gY>ia%<-@$zX*t*3A7H4(X)dBDq%%8Vx>D(@zrXJmp zm=B$QL|v!D1D(%;-!k?^O5Z2LqjH({L4 zCX9V@^{osK4|8RPN8i#?*ms_#MdbHw-Q)}pun(z^k#1UKQ{(ftU4PxKCm!GXkAIYm z9aBDfbotj`l?@)e|N86p*heBj-84-OxGXyk}PBOF)dh!N`gWyA=+)UV(J{qqlw7@=PWMvPEb#fTBE zt9--={VE$Vg0Iry!%IK?wB*Sr_jK&I?c8%W83q)7p=sh$*mpTRjtmbH`;8i{Y}2Oj zPk-9<=9@dPr8{cWo^QU{otwM$h5_rEH(S}T0X%#FADg+3;wC0k&Th?M#G@1e%;MyoWAYii*{Ue;g0jq-O-@lHpAG8D`akqq->9* z?7)>m*oV|7!EYJ+uzIAiA2w0KL+MRmA6G_3=_#j_=QJ*Fl2g$n=Rl5q9n8tWbtosN z(s|)m__y$i&#w?yc}`B5xJq+!O2oB4CkNL)aqZ2?*(0vqIXSp?iEC$0&JJ;H&&lCy zTjR!C&p2aqy?XdD2intRnz+FHwSL%ldHWRI1oqLrmYTZs#1jk7KYvw|CM(bVL&5RK z;n%Ty1=@4FzE2O2g?*LbnMTGwIXrps*RPr8xNvy%amVIfePt_l@;RRg8y~I?fZzPG zWEv~brbN1!=oGfcKw=tu1XYRU&MGY$#od};%F6!eD{`?z`obmx3KSA&ULkJvebvp zXY7OgsVSSnCe$xqn+;=2$k-AIgZA4}QUvqEJ5oS?sD0`10Q8=SyJC|u8QUG)X>;h%x`~XOT=|rw?saZvN{m?mp8k-`DM+U zU3yt?-}%Wq59$-}1^&FbbCxWb-KFD{qZ>|aSO?5^W8cTv*RXHRbJa+DtD= zb2-<>zWX>W>RYxF`!@DtZD~2?xrp(|BNQ>owU5)nmR3w-UtoVDF+YOWO{sKkj-+f! zO#$%nx)sc~u}?am93Bh%YMv_+`xxn(yr>1>L+}&!O|!%dm4?u?j|0&M_2p4*q8NdqHLM^mgzVxbFqmY9_n6W?ECcaSOxp~JlE;)xXYIPqnoDrQ#d>Z zI{&J6SMcZ-`Ap^NG`qCfWq-UhsPFsaeFygmVEqbdd?@@y?CQ*$JAdw+d2?njS)?-` z3ZJp>XY8v^hMKl+!qT@|H(Bl5TUs7tA8rpVYKc=6rLoWC7h_zGZpr|s$JiIAC`Os* z`WgG))=i3Tva8Q(AHXN}RQ8rP{Z#59UMdTgQOo74SZCc-Z7&3y4i2 zc6ef$Jbu^*@PU1o)MxA~lZTD{_^~fL_J!16!crftn^Ef5yz^XVV&7iQt>QQ>(;OcT zkNxd$hqY_d!t3flc_oYk2>f9||_bLI->FIqIS+clGaa|FDd zpN}AWCnokasV|IuS-)1F5}WMs&~mPw*vFPurejM>=zQDQr>0FT=gM>skLB>_^W3Fkp3BU)jeQJrd7f*ZqG%`fX*pL*?5|CaL{03crqH66GWOZwiAd~k zNtF%{nLHNurPQ~go0_pNcD2aZM>EFMY1OZZ z7>_te5mH|ydEzzpeOoud{B)Y+TE}VG^=nwsRVU}#%eip%g|ROj9=Q5!?1vpvpT@Wp z;1SVH409>NgA0B>&2xEIi)$GK_HBp9W9-A#m)NhqvCnGXXP&EJANCGAJie~JF7*}g zmDG=o{a6l<$JmcH&s7;71bBEkS1=!0eX3=Ez2kCt#+&AZNM!WyPJO&htIPGQ4!b&k zy!et!F1k3dZ~Wwq2lfdp_Bi=-XV02FYv%m9GoF1a?}TFo^Xm%iOM8bjKN6cl`D;}? zo=yf4;NkOgHMK9cv`FxK3K=x1PxWg^ZwiNpUOih{Lh4go-)6q$>cifV(M^3(%gXRz zEtl7|M8`g#t71ugk>1pXxoKAGrrpB>dxy)GCH4jLH}Ei5sC{;LSLbk zi+%4fHyZQ3wG3|T!{M<^o|}VWxpSsodeOuq>iyi16uuz8H1^@`!S928cspV_SL8N*U43QjL*$drmsLF6x+$x8WYeY*(yk>L?rS{C-< z@Q7iqh;C|9-#N@>;Op4uVXk7nO6)6R-+_H0^<5brcJ<@Q@aWj*T86kXJVNJFbW_8A zjOZpZJXJ2|+76Ft(pg#!8hq5cW%Da82lF|QPx(xC@&WvdF1RR&@A>3C2m1-o)~%J# zWLHN3fBKB+#QaAdnsnT6ekM!5o{oJU>FUe5i19?NUlS$|i+tYFV(iZ(hlfti)z4MY zu=^FqaeioZ%6}T%PChNLNqn$H2b7vG0j)dWX44 z>_;>9tBmVM9p+XO`Vc&mCOUv+Z(>|&FxLP;mFqa%2#CWuV zed=Cgdq?kNaBgXFZj-jO+{QixzqX~t*jL8BcRAP3;fXfPRf3U9&@B{_3QZ>`?lKmSo?fJrIAEABT(t_Go^IX?{ zuCDgua(EQ>V>&#(oeY|>FUyu;>CuZ3-qgG)Q}ZT|8#}4RWj{BpGr{BV*ol2k^1#fuupghXU&pnj z1n zTUyn1c+_$(75+zYc)Z4b47DGvcvCIss<=MQb8BSmD~AW<*UnWT;7`GIzpQPG z=FP1@zRc<@}7hJ1mNc&jqh@GrcWi2KXuCFDU&D7nlUl=m7g1* zGXcw4w!?#|u3rz2*1GAfUxTHOGt+frU)$0mV;{$9MQ`1d4v#YS@qtS0JB)qa&(%&* z)R%Jsf1c;c`Zcwsr5XGBxhj^iU&TC^)P8)%z7^d>2@mJ?Mc<~5{geZZ)ri^v$4-ozs3$vjCrn7`(VB=_N#Pwsxi9ft&>4_ zc+k4(O6+?VwRG$&lLw1hysK3$V;_Q_qnomm!J6mBkA2(WF^xRaoSK#SJu>-iT3u|F zXHr&2b9F9cR|m|$w$HUee9tHEIq*+F*!TcGWp$*hGj&Q{-lQp0CQcU2pZN2H*|UCn z4^n=-vAqnkX>MO>2toDPRa>tx`#euFyx*a!SYhDVqBuzCpiT4H}LEo#w$ z>73Y?#=b~z0{D!5>SQ3PUxVdbadIx5qKMPdWsC=BX_3^Y&^}3hYfH;H&()99if-)t zFKUVCCS%`yq86|p%h-3dZu%Je(Xg+Kea>zAwr)b_v(!gu-&McnHF=_8KZe5tlPBst zR}6EjSHET&({Q0(KP&TxGtYPkz}IRP;OhJl8(1z^Cu!^io_xW50CVTenKfg^%<0o- zOtYAuH-$w0WMclLi4!LNJbuFXpQh%Gdt=zR3!6+hx*;X@(XmW!552mLedIRX#(tJO zFkRR?uH&>&#RFqsL^nkbkLB=CZqr`Q1?{uuXo-Dk>~k}NF!qJR!^VDwNbEbXFXy?+ z*pJq_DeKof*r$_o1@_bNN}UXd>vLis0iGf~x|x~+kxxgfsMgJh*wRvoeQQx`8zuI+ zc#}K!9VcpuvgO1W`_TC;^%3LoCicC|ms?s|5057G9j%)f=|*#SSnbEaK07>BE^0+h z?3>0+)0klz)5SF{9Gcvq{^w0|2DE8)nK}|BnyZs@eh&KcgZrLO-g)4k03wqr!rtAlVREFCEP@Fz|f|MU2tCj2yR{J0-~ z`f=>Iv13M!82#@rMm;%rOoyw+HTu5^bsp>3!Idwrg7ip1TFQ#Fl$F-&s zd|`A8ODEdJ>3w{rOZMYy>d}4vtMNUkqqU&?`J@#p;S?7R*uv>r>VPfR>00uDEyE}h z7tU2I6xUiDp)Rh~hOtUqD-C0XVHAif-!PV2*D`Ug;!^93t;O0gTMG;W7tY(7m!3M| z_~YI=|J?gpG{2_R6>U97qPSe03y{?T^C9xjJ?Gq@zvq+pANVJL>8$|1a&>-YSLf#m zKTQ}vZv0O_{xojvxF5$r=>Ir&)Yvg2fB1gHm!FUL_`~5JygU5E_kVcysSyv}Kl0BH zjCc^&{lousAFdxBxbOS>@BRM1d%nN#?(gos>)U&9-TBSkcYJgA?cdyW``35g{?#3~ zeRapJ|GxbeTwmUH^Ov{Y{KYK;KfmRs&u_l*^MN;hcGC@?-8A4|H}(JY#sUAjq5r1? z`h7Cs`cL{__s@RUeSH13A79`1sPjY?aDT4{O^hw+*hAxvj4l+6q^T zp)FhB75_S{P0QEXwtBs7>(|@0exqHRH?M5-)>UoaZr|>m_E)}p^;Pd(-TwU!SAEd2 z{Rf?{{;*Sre_Ye?qiZ^S+_~dFyL9@b%Qc^N?fhxCE}wPl`dRm`pLg&2MUQS@^z8m+ zuO9#I)$^-fJ-_bV^P4`szU|xlyKDP=e{J6%uIoGex@(7Df9;5V*NyCd{iy!^Mi1yW zWhX2Y?qBuj{w2N{d{_APx_0?j zm(HJd?)*vTYyOFEzEj7KI(GO+#||HMxcY+*?cc{e;!dw>_s*4V-)`6TEj)p?ZQf|p z`t{bWUTf8ISj(2VEw31QMT=KkT>i@C&0lWb?4@SS{@(1e7n@x=ovw}qK4kuw(IZDg>yH{fX7mqZMt?tg)OVvtef#Y<-+cMSH(z}I_2-}c ze|z`Q9O-qP0r==M0m4htV6e*GuI5~4NVk76AbIv(u3y67BYh`V$ zpW)L1J(wXVij+4lKUG~Q3|{l+nak^8@rq9 zI~!{|>#JL9D_g6}n=4D}%ZqDE3#*IsD+_bW^Rr8HGmEp+3o}#m)01;k6SI>OGZW)8 z<73m~qf=uelcU2EBSYgugJVMjql5h;1AW7Ny+ge{gFRgX-JShioqe6{y&Y{m?QPv{ ztzE4xoh{8BEsgEX$y-}fLu+GwOJiM2LtS%yZBt!MV_kJ)ZB=7URYP@UeN{zWRe5b? zd2K~mO?hc`c}Z1SaaCziWl2#*Nur`SQC?h7mMADq6qFR^mlWoe6yz4?=M?4TB=R06 zavv1t+%I@|FF#(!>+zQOuXu-ex1xg5;==Oy4DqSr6PA@!$LFsstF0=ptFCCMscfvR zj(ZaKC+=8lQ+-==Lwie8M{9FuTT54aOLs?WPiI?iS4UrW=Ri-_U~l(OU(fJB@5o@^ z=+MB}@Zk8!(8TEQ)cDBs#OTcA*zD9qpv_NDEzC|W&P^}P&n_>_tt`&3E-kDrFK)zb zX?`vK8E>4MS)QFF*ux>ly0p z9*jE>cO%W)*3#YD+||<5+1%LCSl`}I*B19b9z{Hsctr8Is>i6jc=` zDvJus69r`jd8PTeC3!hTxepT$9~3^gS8)Gs{=K_-cW>w1`Q+j4TMs|E`QYaDdpEA# zy?*u1)yuc9T)K7n;>}AJZd^Qn{rtz*&YivT@tMnKPhUE7>f)&nFP#49?Ag=rpZ@6m zQy<2Yo4@y~_uhN|m%mI6`S1SY&ofVM9)D)x@BcP)J~I{g$=|w-A^)d8`RO}<`Og3A zA9W9&KuX{jzxYKQ%ly}`Uf1|vZ*~6p*I)kT%P)TY#phps{@E9wz54vqSD(Fl`RU8& zFJC-+`Qquz7f)V1KYae|@w2A~PoF$`a=3r^c<=GS?!lv-NBi6Rds}JetDEa98|%yKt4nJui>u2ED@*gsi?d4$GmG;x3v*NRvy*c(6SLFfGgD*JlcQ4; zBa`F96JtZ;ql0531Ea(JBSU?|gS|roJ%jz-1ASfny`6nM9lhP{JzZ_xovmFREuHPn z9c@kRt&Qz1jcqLrt6}8pn5yuhA)s>}H z6(v>WC6UyT*X5;&vXa8G;=hk)UiiXQ^;d8 zQ=`+9!^eb!w>Qgs+%^l_*3#J8+z<~T9!O(-O+#IEeQi};O=V40d39x3RYhrKc}aO` zaalh-Hvu3fo&_436lmo8ksc>dz~a~IB?J^%5Ub7xPVJ$>rTsZ*yu{OH49eGs>Z z{G%r~|EFVrF!^UEHu(|wKm3ay{`RZi`k^O4AO!yKhyRY*K9c{NTAgEo|LIZSzj*cH z`OD|eUOapH{OOZtPY$0RK7R7};PBwl<41c3`@4_!clP(T_jb2-cQ$vnH@COeH#gTd z*Vi`IR@YZoR#%r-mzP$S7MB+nmKNq0=Vuq@X69#S=4Pg5rzdBoCZ;FHrzXZG$44i| zM#e{n$3})mhX+T728IXwhX(ov`+Eoadis02`+B;1yE}WjI=VaCyE@uB+gm%@TH0Hi z*wEZq-_%goSYO*vSJO~iQ(s$MS5sA6U0GXISyNe2U0GgLQC3x6 zR#{eBQJT)HqO#(m(xOC3BAHsng#|?g$>2)l>uQ?nYnvP5+M}VhDb}M{m|}T~`M$L~u2o{B z?vC{K`2*h=(5tK`}KR5gkE2K07@TR`avdi*qx{;eK&0md;o} zV;PObbYpEf7S`n2y0)BNSmPby7S8b@^RsUuzae~_x3{}1YunM*+TPj{_qnC1vAMCK zsiD5HzAilLYN~6it7@t$t1Bxi%gZXtO3O=2N{fq2ii(O8g++x0h57jfd3pJ{xp_Gc za~?c+c>n%`d-v|&y?gie?K_`*a{Jb;Pj244dHu$XYuBz{y?X7+l`EI7T)uee^2G}m z&!0d4@%eLSKR$Oh#{1K!KZ@I_k3RVDgZDr9;Qe34lbgTy?t5Qr@{@l&^Vs>!%!L1= zAN=UqvuA$T2@uE%{Qmd<6$O4A&P>(#>9aa-w>n>Z{^{qRz549am!H0R@hUaszj*%i z`Lid_o*pJM|MBA|2M33b_8;%>9qjEs+TGdT+1}gP+TGsV*;?P)T;JSS+gM-SSX)_- zz+YKfSzcUTT3A|~UtE}5h%B0$nVX%Sotc`Mnw*}Tn3@=$93Pt)8yz1V`A&hK&ir)X zN3KTZX9K^nE}8l1z^|*WN(O#aGVmjrGl5@LlFa;)m{E&LixZj5kH9a?PiB68Ch)VF zAFsp`5N|Jz_U9<@~!KHqB6c#e9`#QvFJ26#1%(~Z1RrQS}HM^enbai%jcBB{6RNyzKm(ioZudT1G z$p(H^MR`S8SpoI7{^<8zVuAD=mM=2SB9W5`cuegyv4pWOWT`OIvq6E%MP z*6s4ra`s!@D+CCn1%CIt|BREFv0WQ4vS)SPYIQP!|LXb6m(O0jc>4VLlV{HkpFTZ& za(Hl<8S?iJ_V*s`?e6dH?CostZg1^uZESC@Z*8Q9{AA#-tt_oBFRm;tEH5Ur=xE4K zXVKe3{@Cd7=*ZAWYRDfN>>nKH8|drJPWUGd`4RYS&6y!T0zc;J?2sSXTVGS14E*Y3 z;Ga0;Co?}i79-e8|rRetO6+Es7$jC=>YUAwL=T6?L(5kR0-3FCm(x=tCm# z8!~~PF7Z2}l8Ry~>Z#8D*rkXjy}u^{Kdxxvx+bn}(hdIP_{h{m4Ef_TlM{2%$j?qi z54Jczy|^%wZ19f_`9~9eP-oh+Bj>2jjw8Qu8Fl;`D$5)9s;i?tef8AV5`mu{@{^to zb;-a_5BV|SN8rbhUtU@ofnQo&R8o`<{221{3i5LEbJKzUAf5RU`1kH4hx|`&-nw<; z=B*poZ(O^6?MiaUkIauDKQcdt{K))N;Kz`ECWicU;76+yL;g{#^X^fr6VG+~564=a zec9fS+*!1WpwAkAIIptN(8i_!0GSI5TePR_E8BfBxlXpM8<3 z@l&nNi)Yy(|LK#*smy=$_+bCw(cYu|-Tl3tz1{8Ioe2ER?ahtMke_OG)>ckzb&hAz z|9-2}6fHzF&quA!2}6E)X+?I(kH9ZZloS;vi=Aw%b2Q}t8LduTvek)w<=9<*y=(h+ zt26j!s}t*8v^tZ~>Ldeybb4xhCR&|L=0`I>6VrRT*oiIM6I-3sc1d<&{m!jUXM0LiPubgPpZ^3$208S*3ZQ$v2VIuZCWc^4!iNZu}bfT6zKvp203_X5DD+c~JsFS>9 zwr;U^!nk5;Fr zGp6hQY^yU6$8`E*&w70Hc;>%u@)x4XKNk4uV&`~`ANilUn2P=J@7?NTwn)CA)p=dx zXIq`vx=q#iMbYX+A(k5QvrYc}bd#TMb<$0K4EfROq?`P!m!rv#AwS*fMBvAe|2pvh zc&l^lDN+6=e(@^)NPg@FCkhhTZ)vX(An>gM@ke;#e106!jPsc%H2LYR+o#93ZvRND zld18)wbhxL9G{#Rn@DZlW?P-%!GWQH{?xwh>sBYbxA^wf?NO`qJ+^LNw>rnSZvU)S zr~0H;C%JE%uJO~YPMpi^@99jpI)ekpn*5Q$$o$y4jV6Ek%_cv#yrUC)tJO(oe!ToH ztXw~_hTt=nkwqt!`o z-5zgsj%xf&=D*qMy!YO_aX$0iqgLnmQ=U5uUT<5JOKWgV_b>0~Ak2m>WAM(F% zt5Y6l`rkUMld193XLVk;I-$8nvbCO?(=Z#Vgwv-rua+f0+6Jd6LeRwsE7{~ONg z9BXxwXLaUgj_;M6d{(Egr#rhv@~5;q-`eEo +#include +#include + +namespace amd +{ + +static const short bitMapID = 19778; + +void +BitMap::releaseResources(void) +{ + if (pixels_ != NULL) { + delete[] pixels_; + } + + if (colors_ != NULL) { + delete[] colors_; + } + + pixels_ = NULL; + colors_ = NULL; + isLoaded_ = false; +} + +BitMap& BitMap::operator=(const BitMap& rhs) +{ + if (this == &rhs) { + return *this; + } + + // Copy header + id = rhs.id; + size = rhs.size; + reserved1 = rhs.reserved1; + reserved2 = rhs.reserved2; + offset = rhs.offset; + + // Copy header info + sizeInfo = rhs.sizeInfo; + width = rhs.width; + height = rhs.height; + planes = rhs.planes; + bitsPerPixel = rhs.bitsPerPixel; + compression = rhs.compression; + imageSize = rhs.imageSize; + xPelsPerMeter = rhs.xPelsPerMeter; + yPelsPerMeter = rhs.yPelsPerMeter; + clrUsed = rhs.clrUsed; + clrImportant = rhs.clrImportant; + + numColors_ = rhs.numColors_; + isLoaded_ = rhs.isLoaded_; + + pixels_ = NULL; + colors_ = NULL; + if (isLoaded_) { + if (rhs.colors_ != NULL) { + colors_ = new ColorPalette[numColors_]; + if (colors_ == NULL) { + isLoaded_ = false; + return *this; + } + memcpy(colors_, rhs.colors_, numColors_ * sizeof(ColorPalette)); + } + + pixels_ = new uchar4[width * height]; + if (pixels_ == NULL) { + delete[] colors_; + colors_ = NULL; + isLoaded_ = false; + return *this; + } + memcpy(pixels_, rhs.pixels_, width * height * sizeof(uchar4)); + } + + return *this; +} + +void +BitMap::load(const char * filename) +{ + // Release any existing resources + releaseResources(); + + // Open BMP file + FILE * fd = fopen(filename, "rb"); + + // Opened OK + if (fd != NULL) { + // Read header + fread((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd); + + // Failed to read header + if (ferror(fd)) { + fclose(fd); + return; + } + + // Confirm that we have a bitmap file + if (id != bitMapID) { + fclose(fd); + return; + } + + // Read map info header + fread((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd); + + // Failed to read map info header + if (ferror(fd)) { + fclose(fd); + return; + } + + // No support for compressed images + if (compression) { + fclose(fd); + return; + } + + // Support only 8 or 24 bits images + if (bitsPerPixel < 8) { + fclose(fd); + return; + } + + // Store number of colors + numColors_ = 1 << bitsPerPixel; + + //load the palate for 8 bits per pixel + if(bitsPerPixel == 8) { + colors_ = new ColorPalette[numColors_]; + if (colors_ == NULL) { + fclose(fd); + return; + } + fread( + (char *)colors_, + numColors_ * sizeof(ColorPalette), + 1, + fd); + + // Failed to read colors + if (ferror(fd)) { + fclose(fd); + return; + } + } + + // Allocate buffer to hold all pixels + unsigned int sizeBuffer = size - offset; + unsigned char * tmpPixels = new unsigned char[sizeBuffer]; + + if (tmpPixels == NULL) { + delete colors_; + colors_ = NULL; + fclose(fd); + return; + } + + // Read pixels from file, including any padding + fread(tmpPixels, sizeBuffer * sizeof(unsigned char), 1, fd); + + // Failed to read pixel data + if (ferror(fd)) { + delete colors_; + colors_ = NULL; + delete tmpPixels; + fclose(fd); + return; + } + + // Allocate image + pixels_ = new uchar4[width * height]; + if (pixels_ == NULL) { + delete colors_; + colors_ = NULL; + delete tmpPixels; + fclose(fd); + return; + } + // Set image, including w component (white) + memset(pixels_, 0xff, width * height * sizeof(uchar4)); + + unsigned int index = 0; + for(int y = 0; y < height; y++) { + for(int x = 0; x < width; x++) { + // Read RGB values + if (bitsPerPixel == 8) { + pixels_[(y * width + x)] = colors_[tmpPixels[index++]]; + } + else { // 24 bit + pixels_[(y * width + x)].z = tmpPixels[index++]; + pixels_[(y * width + x)].y = tmpPixels[index++]; + pixels_[(y * width + x)].x = tmpPixels[index++]; + } + } + + // Handle padding + for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) { + index++; + } + } + + // Loaded file so we can close the file. + fclose(fd); + delete[] tmpPixels; + + // Loaded file so record this fact + isLoaded_ = true; + } +} + +int +BitMap::colorIndex(uchar4 color) +{ + for (int i = 0; i < numColors_; i++) { + if (colors_[i].x == color.x && + colors_[i].y == color.y && + colors_[i].z == color.z && + colors_[i].w == color.w) { + return i; + } + } + + return 0; +} + +bool +BitMap::write(const char * filename) +{ + if (!isLoaded_) { + return false; + } + + // Open BMP file + FILE * fd = fopen(filename, "wb"); + + // Opened OK + if (fd != NULL) { + // Write header + fwrite((BitMapHeader *)this, sizeof(BitMapHeader), 1, fd); + + // Failed to write header + if (ferror(fd)) { + fclose(fd); + return false; + } + + // Write map info header + fwrite((BitMapInfoHeader *)this, sizeof(BitMapInfoHeader), 1, fd); + + // Failed to write map info header + if (ferror(fd)) { + fclose(fd); + return false; + } + + // Write palate for 8 bits per pixel + if(bitsPerPixel == 8) { + fwrite( + (char *)colors_, + numColors_ * sizeof(ColorPalette), + 1, + fd); + + // Failed to write colors + if (ferror(fd)) { + fclose(fd); + return false; + } + } + + for(int y = 0; y < height; y++) { + for(int x = 0; x < width; x++) { + // Read RGB values + if (bitsPerPixel == 8) { + fputc( + colorIndex( + pixels_[(y * width + x)]), + fd); + } + else { // 24 bit + fputc(pixels_[(y * width + x)].z, fd); + fputc(pixels_[(y * width + x)].y, fd); + fputc(pixels_[(y * width + x)].x, fd); + + if (ferror(fd)) { + fclose(fd); + return false; + } + } + } + + // Add padding + for(int x = 0; x < (4 - (3 * width) % 4) % 4; x++) { + fputc(0, fd); + } + } + + return true; + } + + return false; +} + +} // amd diff --git a/Demos/OpenCLClothDemo/bmpLoader.h b/Demos/OpenCLClothDemo/bmpLoader.h new file mode 100644 index 000000000..301ad0d12 --- /dev/null +++ b/Demos/OpenCLClothDemo/bmpLoader.h @@ -0,0 +1,201 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef BMPLOADER_H_ +#define BMPLOADER_H_ + +#include +#include + +namespace amd +{ + +//! @fixme this needs to be moved to common types header? +#pragma pack(1) +typedef struct +{ + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; +} uchar4; + +typedef uchar4 ColorPalette; + +//! \struct Bitmap header info +typedef struct { + short id; + int size; + short reserved1; + short reserved2; + int offset; +} BitMapHeader; + +//! \struct Bitmap info header +typedef struct { + int sizeInfo; + int width; + int height; + short planes; + short bitsPerPixel; + unsigned compression; + unsigned imageSize; + int xPelsPerMeter; + int yPelsPerMeter; + int clrUsed; + int clrImportant; +} BitMapInfoHeader; + +//! \class Bitmap used to load a bitmap image from a file. +class BitMap : public BitMapHeader, public BitMapInfoHeader +{ +private: + uchar4 * pixels_; + + int numColors_; + + ColorPalette * colors_; + + bool isLoaded_; + + void releaseResources(void); + + int colorIndex(uchar4 color); +public: + + //! \brief Default constructor + BitMap() + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + {} + + /*!\brief Constructor + * + * Tries to load bitmap image from filename provided. + * + * \param filename pointer to null terminated string that is the path and + * filename to the bitmap image to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + BitMap(const char * filename) + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + { + load(filename); + } + + /*! \brief Copy constructor + * + * \param rhs is the bitmap to be copied (cloned). + */ + BitMap(const BitMap& rhs) + { + *this = rhs; + } + + //! \brief Destructor + ~BitMap() + { + releaseResources(); + } + + /*! \brief Assignment + * \param rhs is the bitmap to be assigned (cloned). + */ + BitMap& operator=(const BitMap& rhs); + + /*! \brief Load Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + void + load(const char * filename); + + /*! \brief Write Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be written. + * + * \return In the case that the bitmap is written true is returned. In + * the case that a bitmap image is not already loaded or the write fails + * for some reason false is returned. + */ + bool + write(const char * filename); + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then the width + * image is returned, otherwise -1; + */ + int + getWidth(void) const + { + if (isLoaded_) { + return width; + } + else { + return -1; + } + } + + /*! \brief Get image height + * + * \return If a bitmap image has been successfully loaded, then the height + * image is returned, otherwise -1. + */ + int + getHeight(void) const + { + if (isLoaded_) { + return height; + } + else { + return -1; + } + } + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then returns + * a pointer to image's pixels, otherwise NULL. + */ + const uchar4 * + getPixels(void) const { return pixels_; } + + /*! \brief Is an image currently loaded + * + * \return If a bitmap image has been successfully loaded, then returns + * true, otherwise if an image could not be loaded or an image has yet + * to be loaded false is returned. + */ + bool + isLoaded(void) const { return isLoaded_; } +}; +#pragma pack() +} + +#endif // BMPLOADER_H_ diff --git a/Demos/OpenCLClothDemo/bmpLoader.hpp b/Demos/OpenCLClothDemo/bmpLoader.hpp new file mode 100644 index 000000000..2daae0a47 --- /dev/null +++ b/Demos/OpenCLClothDemo/bmpLoader.hpp @@ -0,0 +1,189 @@ +// +// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. +// + +#ifndef BMPLOADER_H_ +#define BMPLOADER_H_ + +#include +#include + +namespace amd +{ + +//! @fixme this needs to be moved to common types header? +#pragma pack(1) +typedef struct +{ + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; +} uchar4; + +typedef uchar4 ColorPalette; + +//! \struct Bitmap header info +typedef struct { + short id; + int size; + short reserved1; + short reserved2; + int offset; +} BitMapHeader; + +//! \struct Bitmap info header +typedef struct { + int sizeInfo; + int width; + int height; + short planes; + short bitsPerPixel; + unsigned compression; + unsigned imageSize; + int xPelsPerMeter; + int yPelsPerMeter; + int clrUsed; + int clrImportant; +} BitMapInfoHeader; + +//! \class Bitmap used to load a bitmap image from a file. +class BitMap : public BitMapHeader, public BitMapInfoHeader +{ +private: + uchar4 * pixels_; + + int numColors_; + + ColorPalette * colors_; + + bool isLoaded_; + + void releaseResources(void); + + int colorIndex(uchar4 color); +public: + + //! \brief Default constructor + BitMap() + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + {} + + /*!\brief Constructor + * + * Tries to load bitmap image from filename provided. + * + * \param filename pointer to null terminated string that is the path and + * filename to the bitmap image to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + BitMap(const char * filename) + : pixels_(NULL), + numColors_(0), + colors_(NULL), + isLoaded_(false) + { + load(filename); + } + + /*! \brief Copy constructor + * + * \param rhs is the bitmap to be copied (cloned). + */ + BitMap(const BitMap& rhs) + { + *this = rhs; + } + + //! \brief Destructor + ~BitMap() + { + releaseResources(); + } + + /*! \brief Assignment + * \param rhs is the bitmap to be assigned (cloned). + */ + BitMap& operator=(const BitMap& rhs); + + /*! \brief Load Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be loaded. + * + * In the base of an error, e.g. the bitmap file could not be loaded for + * some reason, then a following call to isLoaded will return false. + */ + void + load(const char * filename); + + /*! \brief Write Bitmap image + * + * \param filename is a pointer to a null terminated string that is the + * path and filename name to the the bitmap file to be written. + * + * \return In the case that the bitmap is written true is returned. In + * the case that a bitmap image is not already loaded or the write fails + * for some reason false is returned. + */ + bool + write(const char * filename); + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then the width + * image is returned, otherwise -1; + */ + int + getWidth(void) const + { + if (isLoaded_) { + return width; + } + else { + return -1; + } + } + + /*! \brief Get image height + * + * \return If a bitmap image has been successfully loaded, then the height + * image is returned, otherwise -1. + */ + int + getHeight(void) const + { + if (isLoaded_) { + return height; + } + else { + return -1; + } + } + + /*! \brief Get image width + * + * \return If a bitmap image has been successfully loaded, then returns + * a pointer to image's pixels, otherwise NULL. + */ + const uchar4 * + getPixels(void) const { return pixels_; } + + /*! \brief Is an image currently loaded + * + * \return If a bitmap image has been successfully loaded, then returns + * true, otherwise if an image could not be loaded or an image has yet + * to be loaded false is returned. + */ + bool + isLoaded(void) const { return isLoaded_; } +}; +#pragma pack() +} + +#endif // BMPLOADER_H_ diff --git a/Demos/OpenCLClothDemo/btOpenCLSupport.h b/Demos/OpenCLClothDemo/btOpenCLSupport.h new file mode 100644 index 000000000..5b03e14c5 --- /dev/null +++ b/Demos/OpenCLClothDemo/btOpenCLSupport.h @@ -0,0 +1,84 @@ +#ifndef BT_OPENCL_SUPPORT_HPP +#define BT_OPENCL_SUPPORT_HPP + +// OpenCL support +#include + +namespace BTAcceleratedSoftBody +{ + class OpenCLSupportHelper + { + private: + cl::Context m_context; + std::vector m_devices; + cl::CommandQueue m_queue; + public: + OpenCLSupportHelper() + { + } + + virtual ~OpenCLSupportHelper() + { + } + + cl::Device getDevice() + { + return m_devices[0]; + } + + cl::CommandQueue getCommandQueue() + { + return m_queue; + } + + cl::Context getContext() + { + return m_context; + } + + bool InitOpenCLDevice() + { + cl_int err; + + std::vector platforms; + err = cl::Platform::get(&platforms); + checkErr(platforms.size() != 0 ? CL_SUCCESS : -1, "Platform::get()"); + + std::string platformVendor; + platforms[0].getInfo(CL_PLATFORM_VENDOR, &platformVendor); + //std::cout << "Platform is by: " << platformVendor << "\n"; + + intptr_t properties[] = { + CL_CONTEXT_PLATFORM, (intptr_t)platforms[0](), + 0, 0 + }; + m_context = cl::Context( + CL_DEVICE_TYPE_GPU, + properties, + NULL, + NULL, + &err); + + if (err != CL_SUCCESS) + { + btAssert( "Context::Context()" ); + } + + m_devices = m_context.getInfo(); + if( m_devices.size() <= 0 ) + { + btAssert( "devices.size() > 0" ); + } + + m_queue = cl::CommandQueue(m_context, m_devices[0], 0, &err); + if (err != CL_SUCCESS) + { + btAssert( "CommandQueue::CommandQueue()"); + } + } + }; + + +} // namespace BTAcceleratedSoftBody + +#endif // #ifndef BT_OPENCL_SUPPORT_HPP \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp new file mode 100644 index 000000000..b7e22c714 --- /dev/null +++ b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp @@ -0,0 +1,470 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifdef _WIN32 +#include +#endif + +#include "clstuff.h" +#include "gl_win.h" +#include "cloth.h" + +#define USE_GPU_SOLVER + + +const int numFlags = 5; +const int clothWidth = 40; +const int clothHeight = 60;//60; +float _windAngle = 1.0;//0.4; +float _windStrength = 15; + + + +#include +using namespace std; + + + + +#include "btBulletDynamicsCommon.h" +#include "LinearMath/btHashMap.h" +#include "BulletSoftBody/btSoftRigidDynamicsWorld.h" +#include "vectormath/vmInclude.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h" + +using Vectormath::Aos::Vector3; + +class piece_of_cloth; +class btBroadphaseInterface; +class btCollisionShape; +class btOverlappingPairCache; +class btCollisionDispatcher; +class btConstraintSolver; +struct btCollisionAlgorithmCreateFunc; +class btDefaultCollisionConfiguration; + +namespace Vectormath +{ + namespace Aos + { + class Transform3; + } +} + + +btAlignedObjectArray m_collisionShapes; +btBroadphaseInterface* m_broadphase; +btCollisionDispatcher* m_dispatcher; +btConstraintSolver* m_solver; +btDefaultCollisionConfiguration* m_collisionConfiguration; + +btCPUSoftBodySolver *g_cpuSolver = NULL; +btOpenCLSoftBodySolver *g_openCLSolver = NULL; + +btSoftBodySolver *g_solver = NULL; + +btAlignedObjectArray m_flags; +btSoftRigidDynamicsWorld* m_dynamicsWorld; +btAlignedObjectArray cloths; + +extern cl_context g_cxMainContext; +extern cl_device_id g_cdDevice; +extern cl_command_queue g_cqCommandQue; + + +const float flagSpacing = 30.f; + + +// Helper to test and add links correctly. +// Records links that have already been generated +static bool testAndAddLink( btAlignedObjectArray &trianglesForLinks, btSoftBody *softBody, int triangle, int *triangleVertexIndexArray, int numVertices, int vertex0, int vertex1, int nonLinkVertex, btSoftBody::Material *structuralMaterial, bool createBendLinks, btSoftBody::Material *bendMaterial ) +{ + if( trianglesForLinks[ numVertices * vertex0 + vertex1 ] >= 0 && createBendLinks) + { + // Already have link so find other triangle and generate cross link + + int otherTriangle = trianglesForLinks[numVertices * vertex0 + vertex1]; + int otherIndices[3] = {triangleVertexIndexArray[otherTriangle * 3], triangleVertexIndexArray[otherTriangle * 3 + 1], triangleVertexIndexArray[otherTriangle * 3 + 2]}; + + int nodeA; + // Test all links of the other triangle against this link. The one that's not part of it is what we want. + if( otherIndices[0] != vertex0 && otherIndices[0] != vertex1 ) + nodeA = otherIndices[0]; + if( otherIndices[1] != vertex0 && otherIndices[1] != vertex1 ) + nodeA = otherIndices[1]; + if( otherIndices[2] != vertex0 && otherIndices[2] != vertex1 ) + nodeA = otherIndices[2]; + + softBody->appendLink( nodeA, nonLinkVertex, bendMaterial ); + } else { + // Don't yet have link so create it + softBody->appendLink( vertex0, vertex1, structuralMaterial ); + + // If we added a new link, set the triangle array + trianglesForLinks[numVertices * vertex0 + vertex1] = triangle; + trianglesForLinks[numVertices * vertex1 + vertex0] = triangle; + + } + + return true; +} + +btSoftBody *createFromIndexedMesh( btVector3 *vertexArray, int numVertices, int *triangleVertexIndexArray, int numTriangles, bool createBendLinks ) +{ + btSoftBody* softBody = new btSoftBody(&(m_dynamicsWorld->getWorldInfo()), numVertices, vertexArray, 0); + btSoftBody::Material * structuralMaterial = softBody->appendMaterial(); + btSoftBody::Material * bendMaterial; + if( createBendLinks ) + { + bendMaterial = softBody->appendMaterial(); + bendMaterial->m_kLST = 0.7; + } else { + bendMaterial = NULL; + } + structuralMaterial->m_kLST = 1.0; + + + // List of values for each link saying which triangle is associated with that link + // -1 to start. Once a value is entered we know the "other" triangle + // and can add a link across the link + btAlignedObjectArray triangleForLinks; + triangleForLinks.resize( numVertices * numVertices, -1 ); + int numLinks = 0; + for( int triangle = 0; triangle < numTriangles; ++triangle ) + { + int index[3] = {triangleVertexIndexArray[triangle * 3], triangleVertexIndexArray[triangle * 3 + 1], triangleVertexIndexArray[triangle * 3 + 2]}; + softBody->appendFace( index[0], index[1], index[2] ); + + // Generate the structural links directly from the triangles + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[0], index[1], index[2], structuralMaterial, createBendLinks, bendMaterial ); + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[1], index[2], index[0], structuralMaterial, createBendLinks, bendMaterial ); + testAndAddLink( triangleForLinks, softBody, triangle, triangleVertexIndexArray, numVertices, index[2], index[0], index[1], structuralMaterial, createBendLinks, bendMaterial); + } + + return softBody; +} + +/** + * Create a sequence of flag objects and add them to the world. + */ +void createFlag( btSoftBodySolver &solver, int width, int height, btAlignedObjectArray &flags ) +{ + // First create a triangle mesh to represent a flag + + using Vectormath::Aos::Matrix3; + using Vectormath::Aos::Vector3; + + // Allocate a simple mesh consisting of a vertex array and a triangle index array + btIndexedMesh mesh; + mesh.m_numVertices = width*height; + mesh.m_numTriangles = 2*(width-1)*(height-1); + + btVector3 *vertexArray = new btVector3[mesh.m_numVertices]; + + mesh.m_vertexBase = reinterpret_cast(vertexArray); + int *triangleVertexIndexArray = new int[3*mesh.m_numTriangles]; + mesh.m_triangleIndexBase = reinterpret_cast(triangleVertexIndexArray); + mesh.m_triangleIndexStride = sizeof(int)*3; + mesh.m_vertexStride = sizeof(Vector3); + + // Generate normalised object space vertex coordinates for a rectangular flag + float zCoordinate = 0.0f; + + Matrix3 defaultScale(Vector3(5.f, 0.f, 0.f), Vector3(0.f, 20.f, 0.f), Vector3(0.f, 0.f, 1.f)); + for( int y = 0; y < height; ++y ) + { + float yCoordinate = y*2.0f/float(height) - 1.0f; + for( int x = 0; x < width; ++x ) + { + float xCoordinate = x*2.0f/float(width) - 1.0f; + + Vector3 vertex(xCoordinate, yCoordinate, zCoordinate); + Vector3 transformedVertex = defaultScale*vertex; + + vertexArray[y*width + x] = btVector3(transformedVertex.getX(), transformedVertex.getY(), transformedVertex.getZ() ); + + } + } + + // Generate vertex indices for triangles + for( int y = 0; y < (height-1); ++y ) + { + for( int x = 0; x < (width-1); ++x ) + { + // Triangle 0 + // Top left of square on mesh + { + int vertex0 = y*width + x; + int vertex1 = vertex0 + 1; + int vertex2 = vertex0 + width; + int triangleIndex = 2*y*(width-1) + 2*x; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+1)/sizeof(int)+1] = vertex1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex+2)/sizeof(int)+2] = vertex2; + } + + // Triangle 1 + // Bottom right of square on mesh + { + int vertex0 = y*width + x + 1; + int vertex1 = vertex0 + width; + int vertex2 = vertex1 - 1; + int triangleIndex = 2*y*(width-1) + 2*x + 1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)] = vertex0; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+1] = vertex1; + triangleVertexIndexArray[(mesh.m_triangleIndexStride*triangleIndex)/sizeof(int)+2] = vertex2; + } + } + } + + + float rotateAngleRoundZ = 0.5; + float rotateAngleRoundX = 0.5; + btMatrix3x3 defaultRotate; + defaultRotate[0] = btVector3(cos(rotateAngleRoundZ), sin(rotateAngleRoundZ), 0.f); + defaultRotate[1] = btVector3(-sin(rotateAngleRoundZ), cos(rotateAngleRoundZ), 0.f); + defaultRotate[2] = btVector3(0.f, 0.f, 1.f); + btMatrix3x3 defaultRotateX; + defaultRotateX[0] = btVector3(1.f, 0.f, 0.f); + defaultRotateX[1] = btVector3( 0.f, cos(rotateAngleRoundX), sin(rotateAngleRoundX)); + defaultRotateX[2] = btVector3(0.f, -sin(rotateAngleRoundX), cos(rotateAngleRoundX)); + + btMatrix3x3 defaultRotateAndScale( (defaultRotateX*defaultRotate) ); + + + // Construct the sequence flags applying a slightly different translation to each one to arrange them + // appropriately in the scene. + for( int i = 0; i < numFlags; ++i ) + { + float zTranslate = flagSpacing * (i-numFlags/2); + + btVector3 defaultTranslate(0.f, 20.f, zTranslate); + + btTransform transform( defaultRotateAndScale, defaultTranslate ); + + + btSoftBody *softBody = createFromIndexedMesh( vertexArray, mesh.m_numVertices, triangleVertexIndexArray, mesh.m_numTriangles, true ); + + + for( int i = 0; i < mesh.m_numVertices; ++i ) + { + softBody->setMass(i, 10.f/mesh.m_numVertices); + } + softBody->setMass((height-1)*(width), 0.f); + softBody->setMass((height-1)*(width) + width - 1, 0.f); + softBody->setMass((height-1)*width + width/2, 0.f); + softBody->m_cfg.collisions = btSoftBody::fCollision::CL_SS+btSoftBody::fCollision::CL_RS; + + + flags.push_back( softBody ); + + softBody->transform( transform ); + + m_dynamicsWorld->addSoftBody( softBody ); + } + + delete [] vertexArray; + delete [] triangleVertexIndexArray; +} + + +void updatePhysicsWorld() +{ + static int counter = 0; + + // Change wind velocity a bit based on a frame counter + if( (counter % 400) == 0 ) + { + _windAngle = (_windAngle + 0.05f); + if( _windAngle > (2*3.141) ) + _windAngle = 0; + + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { + btSoftBody *cloth = 0; + + cloth = m_flags[flagIndex]; + + float localWind = _windAngle + 0.5*(((float(rand())/RAND_MAX))-0.1); + float xCoordinate = cos(localWind)*_windStrength; + float zCoordinate = sin(localWind)*_windStrength; + + cloth->setWindVelocity( btVector3(xCoordinate, 0, zCoordinate) ); + } + } + + //btVector3 origin( capCollider->getWorldTransform().getOrigin() ); + //origin.setX( origin.getX() + 0.05 ); + //capCollider->getWorldTransform().setOrigin( origin ); + + counter++; +} + +void initBullet(void) +{ + +#ifdef USE_GPU_SOLVER + g_openCLSolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext); + g_solver = g_openCLSolver; +#else + g_cpuSolver = new btCPUSoftBodySolver; + g_solver = g_cpuSolver; +#endif + + m_collisionConfiguration = new btDefaultCollisionConfiguration(); + m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration); + m_broadphase = new btDbvtBroadphase(); + btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver; + m_solver = sol; + + m_dynamicsWorld = new btSoftRigidDynamicsWorld(m_dispatcher, m_broadphase, m_solver, m_collisionConfiguration, g_solver); + + m_dynamicsWorld->setGravity(btVector3(0,-10,0)); + btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.))); + m_collisionShapes.push_back(groundShape); + btTransform groundTransform; + groundTransform.setIdentity(); + groundTransform.setOrigin(btVector3(0,-50,0)); + + + + + + + m_dynamicsWorld->getWorldInfo().air_density = (btScalar)1.2; + m_dynamicsWorld->getWorldInfo().water_density = 0; + m_dynamicsWorld->getWorldInfo().water_offset = 0; + m_dynamicsWorld->getWorldInfo().water_normal = btVector3(0,0,0); + m_dynamicsWorld->getWorldInfo().m_gravity.setValue(0,-10,0); + + + +#if 0 + { + btScalar mass(0.); + + //rigidbody is dynamic if and only if mass is non zero, otherwise static + bool isDynamic = (mass != 0.f); + + btVector3 localInertia(0,0,0); + if (isDynamic) + groundShape->calculateLocalInertia(mass,localInertia); + + //using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects + btDefaultMotionState* myMotionState = new btDefaultMotionState(groundTransform); + btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,groundShape,localInertia); + btRigidBody* body = new btRigidBody(rbInfo); + + //add the body to the dynamics world + m_dynamicsWorld->addRigidBody(body); + } + +#endif + +#ifdef USE_GPU_SOLVER + createFlag( *g_openCLSolver, clothWidth, clothHeight, m_flags ); +#else + createFlag( *g_cpuSolver, clothWidth, clothHeight, m_flags ); +#endif + + // Create output buffer descriptions for ecah flag + // These describe where the simulation should send output data to + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { +// m_flags[flagIndex]->setWindVelocity( Vectormath::Aos::Vector3( 0.f, 0.f, 15.f ) ); + + // In this case we have a DX11 output buffer with a vertex at index 0, 8, 16 and so on as well as a normal at 3, 11, 19 etc. + // Copies will be performed GPU-side directly into the output buffer + + btCPUVertexBufferDescriptor *vertexBufferDescriptor = new btCPUVertexBufferDescriptor(reinterpret_cast< float* >(cloths[flagIndex].cpu_buffer), 0, 8, 3, 8); + cloths[flagIndex].m_vertexBufferDescriptor = vertexBufferDescriptor; + } + + + g_solver->optimize( m_dynamicsWorld->getSoftBodyArray() ); + +} + + + + +btClock m_clock; + +void doFlags() +{ + //float ms = getDeltaTimeMicroseconds(); + btScalar dt = (btScalar)m_clock.getTimeMicroseconds(); + m_clock.reset(); + + ///step the simulation + if( m_dynamicsWorld ) + { + m_dynamicsWorld->stepSimulation(dt/1000000.); + static int frameCount = 0; + frameCount++; + if (frameCount==100) + { + m_dynamicsWorld->stepSimulation(1./60.,0); + CProfileManager::dumpAll(); + } + updatePhysicsWorld(); + } + + + for( int flagIndex = 0; flagIndex < m_flags.size(); ++flagIndex ) + { + g_solver->copySoftBodyToVertexBuffer( m_flags[flagIndex], cloths[flagIndex].m_vertexBufferDescriptor ); + cloths[flagIndex].draw(); + } +} + + +int main(int argc, char *argv[]) +{ + + + initCL(); + + cloths.resize(numFlags); + + for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex ) + { + cloths[flagIndex].create_buffers(clothWidth, clothHeight); + } + + initBullet(); + m_dynamicsWorld->stepSimulation(1./60.,0); + + preInitGL(argc, argv); + + std::string flagTexs[] = { + "atiFlag.bmp", + "amdFlag.bmp", + }; + int numFlagTexs = 2; + + for( int flagIndex = 0; flagIndex < numFlags; ++flagIndex ) + { + cloths[flagIndex].create_texture(flagTexs[flagIndex % numFlagTexs]); + cloths[flagIndex].x_offset = 0; + cloths[flagIndex].y_offset = 0; + cloths[flagIndex].z_offset = 0; + } + + goGL(); + return 0; +} + diff --git a/Demos/OpenCLClothDemo/cloth.h b/Demos/OpenCLClothDemo/cloth.h new file mode 100644 index 000000000..cce971b22 --- /dev/null +++ b/Demos/OpenCLClothDemo/cloth.h @@ -0,0 +1,183 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "gl_win.h" //for OpenGL stuff + +#include "bmpLoader.h" +#include +#include "LinearMath/btScalar.h" + + +struct vertex_struct +{ + float pos[3]; + float normal[3]; + float texcoord[2]; + +}; + +class btVertexBufferDescriptor; + +class piece_of_cloth +{ + public: + + void destroy(void) + { + if(created) + { + if(cpu_buffer) delete [] cpu_buffer; + } + } + + piece_of_cloth() + { + created = false; + cpu_buffer = NULL; + m_vertexBufferDescriptor = NULL; + } + + bool created; + + vertex_struct* cpu_buffer; + unsigned int* indices; + btVertexBufferDescriptor *m_vertexBufferDescriptor; + + double x_offset, y_offset, z_offset; + + int width; + int height; + + GLuint texture; + + void draw(void) + { + glEnable(GL_TEXTURE_2D); + glBindTexture (GL_TEXTURE_2D, texture); + + glEnable(GL_DEPTH_TEST); + + glColor3f(0.0f, 1.0f, 1.0f); + + glEnableClientState(GL_VERTEX_ARRAY); + //glEnableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glBindTexture(GL_TEXTURE_2D, texture); + + glVertexPointer( 3, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].pos[0])) ); + //glNormalPointer( 3, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].normal[0])) ); + glTexCoordPointer( 2, GL_FLOAT, sizeof(vertex_struct), reinterpret_cast< GLvoid* >(&(cpu_buffer[0].texcoord[0])) ); + + glDrawElements(GL_TRIANGLES, (height-1 )*(width-1)*3*2, GL_UNSIGNED_INT, indices); +// glDisableClientState(GL_NORMAL_ARRAY); + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + + glBindTexture(GL_TEXTURE_2D, 0); + } + + void create_texture(std::string filename) + { + amd::BitMap texBMP(filename.c_str()); + if ( texBMP.isLoaded() ) { + glEnable(GL_TEXTURE_2D); + glGenTextures(1, &texture); + + glBindTexture(GL_TEXTURE_2D, texture); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL); + + glTexImage2D( + GL_TEXTURE_2D, + 0, + GL_RGBA8, + texBMP.getWidth(), + texBMP.getHeight(), + 0, + GL_RGBA, + GL_UNSIGNED_BYTE, + texBMP.getPixels()); + + glBindTexture(GL_TEXTURE_2D, 0); + } + else { + std::cout << "ERROR: could not load bitmap " << "texture.bmp" << std::endl; + exit(1); + } + } + + void create_buffers(int width_, int height_) + { + width = width_; + height = height_; + + created = true; + + cpu_buffer = new vertex_struct[width*height]; + memset(cpu_buffer, 0, width*height*sizeof(vertex_struct)); + + + // Initial test data for rendering + for(int y = 0; y < height; y++) + { + for(int x = 0; x < width; x++) + { + double coord = btSin(x/5.0)*0.01; + //coord = sin(y/); + + cpu_buffer[y*width+x].pos[0] = (x/((float)(width-1)))*1; + cpu_buffer[y*width+x].pos[1] = coord; + cpu_buffer[y*width+x].pos[2] = (y/((float)(height-1)))*1; + cpu_buffer[y*width+x].normal[0] = 1; + cpu_buffer[y*width+x].normal[1] = 0; + cpu_buffer[y*width+x].normal[2] = 0; + cpu_buffer[y*width+x].texcoord[0] = x/((float)(width-1)); + cpu_buffer[y*width+x].texcoord[1] = y/((float)(height-1)); + } + } + + + // Generate and fill index array for rendering + indices = new unsigned int[width*3*2+2 + height*width*3*2]; + + for(int y = 0; y < height-1; y++) + { + for(int x = 0; x < width-1; x++) + { + // *3 indices/triangle, *2 triangles/quad + int baseIndex = (x + y*(width-1))*3*2; + indices[baseIndex] = x + y*width; + indices[baseIndex+1] = x+1 + y*width; + indices[baseIndex+2] = x+width + y*width; + + + indices[baseIndex+3] = x + 1 + y*width; + indices[baseIndex+4] = x+(width+1) + y*width; + indices[baseIndex+5] = x+width + y*width; + } + } + } +}; diff --git a/Demos/OpenCLClothDemo/clstuff.cpp b/Demos/OpenCLClothDemo/clstuff.cpp new file mode 100644 index 000000000..0ee8b1ad4 --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.cpp @@ -0,0 +1,53 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#include "clstuff.h" +#include "gl_win.h" + + +#include "btOclCommon.h" +#include "btOclUtils.h" +#include "LinearMath/btScalar.h" + +cl_context g_cxMainContext; +cl_device_id g_cdDevice; +cl_command_queue g_cqCommandQue; + +void initCL(void) +{ + int ciErrNum = 0; + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum); + //try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel +//#ifdef USE_MINICL +// g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum); +//#else + g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); +//#endif + + + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext); + + btOclPrintDevInfo(g_cdDevice); + + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} diff --git a/Demos/OpenCLClothDemo/clstuff.h b/Demos/OpenCLClothDemo/clstuff.h new file mode 100644 index 000000000..09f6313eb --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.h @@ -0,0 +1,10 @@ +#ifndef __CLSTUFF_HDR__ +#define __CLSTUFF_HDR__ + + + + + +void initCL(void); + +#endif //__CLSTUFF_HDR__ \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/clstuff.hpp b/Demos/OpenCLClothDemo/clstuff.hpp new file mode 100644 index 000000000..09f6313eb --- /dev/null +++ b/Demos/OpenCLClothDemo/clstuff.hpp @@ -0,0 +1,10 @@ +#ifndef __CLSTUFF_HDR__ +#define __CLSTUFF_HDR__ + + + + + +void initCL(void); + +#endif //__CLSTUFF_HDR__ \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/fragment.glsl b/Demos/OpenCLClothDemo/fragment.glsl new file mode 100644 index 000000000..6a265d348 --- /dev/null +++ b/Demos/OpenCLClothDemo/fragment.glsl @@ -0,0 +1,7 @@ +uniform sampler2D tex; + +void main() +{ + vec4 color = texture2D(tex,gl_TexCoord[0].st); + gl_FragColor = color; +} \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/gl_win.cpp b/Demos/OpenCLClothDemo/gl_win.cpp new file mode 100644 index 000000000..1826330a3 --- /dev/null +++ b/Demos/OpenCLClothDemo/gl_win.cpp @@ -0,0 +1,272 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2008 Advanced Micro Devices + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "clstuff.h" +#include "gl_win.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +//#ifndef _WIN32 && !defined(__APPLE__) +//#include +//#endif //!_WIN32 + + + +static GLuint vbo = 0; + +#ifdef _WIN32 +#include +#endif + + +static unsigned int windowWidth = 1280; +static unsigned int windowHeight = 1024; + +// mouse controls +int mouseOldX; +int mouseOldY; +int mouseButtons = 0; + +float rotateX; +float rotateY; + +float translateZ; +float translateX; +float translateY; + +static GLuint glProgram; + + +void doFlags(); + + +void render( void) +{ + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); +// glDisable ( GL_CULL_FACE ); + + glMatrixMode( GL_MODELVIEW ); + glLoadIdentity(); + + glTranslatef( translateX, translateY, translateZ ); + glRotatef( rotateX, 0.5f , 0.0f, 0.0f ); + glRotatef( rotateY, 0.0f, 0.5f, 0.0f ); + +// glDisable (GL_BLEND); + + doFlags(); + // TODO: + //glBindBuffer(GL_ARRAY_BUFFER, vbo); + //glVertexPointer(4, GL_FLOAT, 0, NULL); + //glEnableClientState(GL_VERTEX_ARRAY); + + //glDrawArrays(GL_POINTS, 0, 4*4); + +// glDisableClientState(GL_VERTEX_ARRAY); + // glBindBuffer(GL_ARRAY_BUFFER, 0); + + +// glUseProgram(0); +} + +static void initGL(void) +{ + //glClearColor( 0.05f, 0.0f, 0.1f, 0.1f ); + glClearColor( 0.0f, 0.45f, 0.45f, 1.f); + +#if 0 + GLfloat mat_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + GLfloat mat_shininess[] = { 50.0f }; + GLfloat light_position[] = { + -10.f, + 5.f, + -1.f, + 1.0f }; + + glEnable ( GL_COLOR_MATERIAL ); + glShadeModel( GL_SMOOTH ); + glEnable( GL_LINE_SMOOTH ); + + + glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular ); + glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess ); + glLightfv( GL_LIGHT0, GL_POSITION, light_position ); + + //glEnable( GL_LIGHTING ); + //glEnable( GL_LIGHT0 ); // Switch on and crashes! + glEnable( GL_DEPTH_TEST ); +#endif +#if 0 + + + glEnable ( GL_COLOR_MATERIAL ); + glShadeModel( GL_SMOOTH ); + glEnable( GL_LINE_SMOOTH ); + + glMaterialfv( GL_FRONT, GL_SPECULAR, mat_specular ); + glMaterialfv( GL_FRONT, GL_SHININESS, mat_shininess ); + glLightfv( GL_LIGHT0, GL_POSITION, light_position ); + + glEnable( GL_LIGHTING ); + glEnable( GL_LIGHT0 ); + glEnable( GL_DEPTH_TEST ); +#endif + rotateX = 0; + rotateY = 30; + translateX = 0.0f; + translateY = -30.0f; + translateZ = -120.0; +} + +void display(void) +{ + render(); + + glutSwapBuffers(); + glutPostRedisplay(); +} + +void keyboard( unsigned char key, int /*x*/, int /*y*/) +{ + switch( key) { + case('q') : +#ifdef _WIN32 + case VK_ESCAPE: +#endif //_WIN32 + exit(0); + break; + case('a'): + translateY += 0.1f; + break; + case('z'): + translateY -= 0.1f; + break; + case('d'): + translateX += 0.1f; + break; + case('s'): + translateX -= 0.1f; + break; + case('f'): + translateZ += 0.1f; + break; + case('g'): + translateZ -= 0.1f; + break; + } +} + +void mouse(int button, int state, int x, int y) +{ + if (state == GLUT_DOWN) { + mouseButtons |= 1< +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#endif //_WINDOWS +#endif //APPLE + + +#include + +void goGL(void); +void preInitGL(int argc, char ** argv); + +//int getVBO( std::string, int size ); + +#endif //__GL_WIN_HDR__ diff --git a/Demos/OpenCLClothDemo/gl_win.hpp b/Demos/OpenCLClothDemo/gl_win.hpp new file mode 100644 index 000000000..e7d3f9388 --- /dev/null +++ b/Demos/OpenCLClothDemo/gl_win.hpp @@ -0,0 +1,34 @@ +#ifndef __GL_WIN_HDR__ +#define __GL_WIN_HDR__ + +#ifdef _WIN32//for glut.h +#include +#endif + +//think different +#if defined(__APPLE__) && !defined (VMDMESA) +#include +#include +#include +#include +#else + + +#ifdef _WINDOWS +#include +#include +#include +#else +#include +#endif //_WINDOWS +#endif //APPLE + + +#include + +void goGL(void); +void preInitGL(int argc, char ** argv); + +int getVBO( std::string, int size ); + +#endif //__GL_WIN_HDR__ diff --git a/Demos/OpenCLClothDemo/shaders.cl b/Demos/OpenCLClothDemo/shaders.cl new file mode 100644 index 000000000..27e2d219f --- /dev/null +++ b/Demos/OpenCLClothDemo/shaders.cl @@ -0,0 +1,535 @@ +#pragma OPENCL EXTENSION cl_amd_printf : enable + +#define float3 float4 +#define uint3 uint4 + +#define PARTICLE_RADIUS 0.05; + +#define width 1280 +#define height 1024 + +#define B 0 +#define T height +#define L 0 +#define R width + +#define shiftNumber 4 +#define shiftMask 0xF +#define shiftValue 16.0f +#define stride 4 + +#define screenWidth1 width +#define screenHeight1 height +#define halfScreenWidth1 screenWidth1/2 +#define halfScreenHeight1 screenHeight1/2 +#define screenWidth1SubOne (screenWidth1-1) +#define screenHeight1SubOne (screenHeight1-1) +#define stride screenWidth1 +#define screenPixelNumber screenWidth1*screenHeight1 +#define depthBufferSize screenPixelNumber*depthComplexity + +#define WGS 1 + +//--------------------------------------------------------------- + +struct __VSSpriteOut +{ + float4 position; + float4 particlePosition; +}; + +typedef struct __VSSpriteout VSSpriteOut; + +struct __GSSpriteOut +{ + float4 position; + float2 textureUV; +// float4 viewSpacePosition; +// float4 particlePosition; +}; + +typedef struct __GSSpriteout GSSpriteOut; + +//------------------------------------------------------------------------------ + +__constant float4 g_positions[4] = +{ + (float4)(-1.0f, 1.0f, 0.0f, 0.0f), + (float4)( 1.0f, 1.0f, 0.0f, 0.0f), + (float4)( -1.0f, -1.0f, 0.0f, 0.0f), + (float4)( 1.0f, -1.0f, 0.0f, 0.0f) +}; + +__constant float2 g_texcoords[4] = +{ + (float2)(0.0f,0.0f), + (float2)(1.0f,0.0f), + (float2)(0.0f,1.0f), + (float2)(1.0f,1.0f) +}; + +//------------------------------------------------------------------------------ + +void copyMatrix( + float matrix[16], + __constant float matrix0[16]) +{ + uint i; + + for (i = 0; i < 16; i++) { + matrix[i] = matrix0[i]; + } +} + +void matrixMulLoopBody( + uint i, + float matrix[16], + __constant float matrix0[16], + __constant float matrix1[16]) +{ + matrix[i] = 0.0f; + matrix[i] += matrix0[(i%4) + (0*4)] * matrix1[(0) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (1*4)] * matrix1[(1) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (2*4)] * matrix1[(2) + ((i/4)*4)]; + matrix[i] += matrix0[(i%4) + (3*4)] * matrix1[(3) + ((i/4)*4)]; +} + +void matrixMul( + float matrix[16], + __constant float matrix0[16], + __constant float matrix1[16]) +{ + matrixMulLoopBody(0, matrix, matrix0, matrix1); + matrixMulLoopBody(1, matrix, matrix0, matrix1); + matrixMulLoopBody(2, matrix, matrix0, matrix1); + matrixMulLoopBody(3, matrix, matrix0, matrix1); + matrixMulLoopBody(4, matrix, matrix0, matrix1); + matrixMulLoopBody(5, matrix, matrix0, matrix1); + matrixMulLoopBody(6, matrix, matrix0, matrix1); + matrixMulLoopBody(7, matrix, matrix0, matrix1); + matrixMulLoopBody(8, matrix, matrix0, matrix1); + matrixMulLoopBody(9, matrix, matrix0, matrix1); + matrixMulLoopBody(10, matrix, matrix0, matrix1); + matrixMulLoopBody(11, matrix, matrix0, matrix1); + matrixMulLoopBody(12, matrix, matrix0, matrix1); + matrixMulLoopBody(13, matrix, matrix0, matrix1); + matrixMulLoopBody(14, matrix, matrix0, matrix1); + matrixMulLoopBody(15, matrix, matrix0, matrix1); +} + +float4 matrixVectorMul(float matrix[16], float4 vector) +{ + float4 result; + + result.x = matrix[0]*vector.x + matrix[4+0]*vector.y + matrix[8+0]*vector.z + matrix[12+0]*vector.w; + result.y = matrix[1]*vector.x + matrix[4+1]*vector.y + matrix[8+1]*vector.z + matrix[12+1]*vector.w; + result.z = matrix[2]*vector.x + matrix[4+2]*vector.y + matrix[8+2]*vector.z + matrix[12+2]*vector.w; + result.w = matrix[3]*vector.x + matrix[4+3]*vector.y + matrix[8+3]*vector.z + matrix[12+3]*vector.w; + + return result; +} + +float3 matrixVector3Mul(__constant float matrix[9], float3 vector) +{ + float3 result; + + result.x = matrix[0]*vector.x + matrix[3+0]*vector.y + matrix[6+0]*vector.z; + result.y = matrix[1]*vector.x + matrix[3+1]*vector.y + matrix[6+1]*vector.z; + result.z = matrix[2]*vector.x + matrix[3+2]*vector.y + matrix[6+2]*vector.z; + + return result; +} + +//------------------------------------------------------------------------------ + +//#define DEVICE_CPU 1 +#if defined(DEVICE_CPU) +void printMatrix(char * name, __constant float matrix[16]) +{ + printf("%s[0] = %f, %f, %f, %f\n", name, matrix[0], matrix[1], matrix[2], matrix[3]); + printf("%s[1] = %f, %f, %f, %f\n", name, matrix[4], matrix[5], matrix[6], matrix[7]); + printf("%s[2] = %f, %f, %f, %f\n", name, matrix[8], matrix[9], matrix[10], matrix[11]); + printf("%s[3] = %f, %f, %f, %f\n", name, matrix[12], matrix[13], matrix[14], matrix[15]); +} +#endif + +#if 1 +__kernel void vertexShader( + __constant float modelview[16], + __constant float projection[16], + __global float4 * inputPrimitives, + __global float4 * outputPrimitives) +{ + float matrix[16]; + float4 gl_Vertex; + float4 gl_Position; + + uint id = get_global_id(0); + + gl_Vertex = inputPrimitives[id]; + + // gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex + matrixMul(matrix, projection, modelview); + + gl_Position = matrixVectorMul(matrix, gl_Vertex); + + outputPrimitives[id] = gl_Position; +} + +#else + +__kernel void vertexShader( + __constant float modelview[16], + __constant float projection[16], + __global float4 * inputPrimitives, + __global float4 * outputPrimitives) +{ + uint id = get_global_id(0); + + outputPrimitives[id] = inputPrimitives[id]; +} + +#endif + +//----------------------------------------------------------------------------------- + +__kernel void +clearImage( + __write_only image2d_t image, + float4 color) +{ + + int2 coords = (int2)(get_global_id(0), get_global_id(1)); + write_imagef(image, coords, color); +} + +// OpenGL viewport transformation +// The site http://research.cs.queensu.ca/~jstewart/454/notes/pipeline/ +// contains a description of this process +void +viewportTransform(float4 v, __constant int4 viewport[1], float2 * output) +{ + int4 vp = viewport[0]; + *output + = 0.5f * + (float2)(v.x+1,v.y+1) * + (float2)((vp.s2-vp.s0) + vp.s0, + (vp.s3-vp.s1) + vp.s1); +} + +#define PARTICLE_WIDTH 32.0f +#define PARTICLE_HEIGHT 32.0f + +// Unoptimized triangle rasterizer function +// Details of the algorithm can be found here: +// http://www.devmaster.net/forums/showthread.php?t=1884 +// +void +rasterizerUnOpt( + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __constant int4 viewport[1], + __write_only image2d_t screen, + __read_only image2d_t particle, + uint v1Offset, + uint v2Offset, + uint v3Offset, + __global float4 * debugOut1) +{ + sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + + uint id = get_global_id(0); + + struct __GSSpriteOut output; + float2 v1, v2, v3; + float2 uv1, uv2, uv3; + + output = outputPrimitives[id*4+v1Offset]; + uv1 = output.textureUV; + viewportTransform(output.position, viewport, &v1); + + output = outputPrimitives[id*4+v2Offset]; + uv2 = output.textureUV; + viewportTransform(output.position, viewport, &v2); + + output = outputPrimitives[id*4+v3Offset]; + uv3 = output.textureUV; + viewportTransform(output.position, viewport, &v3); + + // Bounding rectangle + int2 min_ = convert_int2(min(v1, min(v2, v3))); + int2 max_ = convert_int2(max(v1, max(v2, v3))); + + // naive bi-linear interploation for texture coords, note this is + // broken with respect to OpenGL and needs to be fixed for the + // general case. + float p1x = v2.x - v1.x; + float p1y = v2.y - v1.y; + + float p2x = v3.x - v1.x; + float p2y = v3.y - v1.y; + + // Scan through bounding rectangle + for(int y = min_.y; y < max_.y; y++) { + for(int x = min_.x; x < max_.x; x++) { + // When all half-space functions positive, pixel is in triangle + if((v1.x - v2.x) * (y - v1.y) - (v1.y - v2.y) * (x - v1.x) > 0 && + (v2.x - v3.x) * (y - v2.y) - (v2.y - v3.y) * (x - v2.x) > 0 && + (v3.x - v1.x) * (y - v3.y) - (v3.y - v1.y) * (x - v3.x) > 0) { + + float px = x - v1.x; + float py = y - v1.y; + + write_imagef( + screen, + (int2)(x,y), + // texel); + (float4)(1.0f,1.0f,1.0f,1.0f)); + } + } + } +} + +// Optimized rasterizer function +// Details of the algorithm can be found here: +// http://www.devmaster.net/forums/showthread.php?t=1884 +// +// Currently has a bug, still work in progess +__kernel void +rasterizerXX( + __global float4 * outputPrimitives, + __write_only image2d_t screen, + __global float4 * debugOut1, + __global int2 * debugOut2) +{ + uint id = get_global_id(0); + +// printf("ras\n"); + + float4 v1 = outputPrimitives[id*4+0]; + float4 v2 = outputPrimitives[id*4+1]; + float4 v3 = outputPrimitives[id*4+2]; + + float y1 = 0.5f* (v1.y+1) * (T - B) + B; + float y2 = 0.5f* (v2.y+1) * (T - B) + B; + float y3 = 0.5f* (v3.y+1) * (T - B) + B; + + float x1 = 0.5f * (v1.x+1) * (R - L) + L; + float x2 = 0.5f * (v2.x+1) * (R - L) + L; + float x3 = 0.5f * (v3.x+1) * (R - L) + L; + + const int Y1 = convert_int(shiftValue * y1); + const int Y2 = convert_int(shiftValue * y2); + const int Y3 = convert_int(shiftValue * y3); + + const int X1 = convert_int(shiftValue * x1); + const int X2 = convert_int(shiftValue * x2); + const int X3 = convert_int(shiftValue * x3); + + debugOut1[id*4+0] = v1; + debugOut1[id*4+1] = v2; + debugOut1[id*4+2] = v3; + + debugOut2[id*3+0] = (int2)(X1, Y1); + debugOut2[id*3+1] = (int2)(X2, Y2); + debugOut2[id*3+2] = (int2)(X3, Y3); + + // Deltas + const int DX12 = X1 - X2; + const int DX23 = X2 - X3; + const int DX31 = X3 - X1; + + const int DY12 = Y1 - Y2; + const int DY23 = Y2 - Y3; + const int DY31 = Y3 - Y1; + + // Fixed-point deltas + const int FDX12 = DX12 << shiftNumber; + const int FDX23 = DX23 << shiftNumber; + const int FDX31 = DX31 << shiftNumber; + + const int FDY12 = DY12 << shiftNumber; + const int FDY23 = DY23 << shiftNumber; + const int FDY31 = DY31 << shiftNumber; + + // Bounding rectangle + int minx = (min(X1, min(X2, X3)) + shiftMask) >> shiftNumber; + //minx = max(0,minx); + + int maxx = (max(X1, min(X2, X3)) + shiftMask) >> shiftNumber; + //min(maxx , screenWidth1SubOne); + + int miny = (min(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber; + //max(0,miny); + + int maxy = (max(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber; + //min(maxy , screenHeight1SubOne); + + //(char*&)colorBuffer += miny * stride; + int offset = miny * stride; + + // Half-edge constants + int C1 = DY12 * X1 - DX12 * Y1; + int C2 = DY23 * X2 - DX23 * Y2; + int C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + + int CY1 = C1 + DX12 * (miny << shiftNumber) - DY12 * (minx << shiftNumber); + int CY2 = C2 + DX23 * (miny << shiftNumber) - DY23 * (minx << shiftNumber); + int CY3 = C3 + DX31 * (miny << shiftNumber) - DY31 * (minx << shiftNumber); + + for(int y = miny; y < maxy; y++) { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + debugOut2[id*3+0] = (int2)(minx, maxx); + + for(int x = minx; x < maxx; x++) { + debugOut2[id*3+0] = (int2)(CX1, CX2); + + if(CX1 > 0 && CX2 > 0 && CX3 > 0) { + debugOut2[id*3+0] = (int2)(1, 1); + write_imagef( + screen, + (int2)(x,y), + (float4)(1.0f,1.0f,1.0f,1.0f)); + } + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + + //(char*&)colorBuffer += stride; + offset += stride; + } +} + +//------------------------------------------------------------------------------ + +void geometryShader( + __constant float modelview[16], + __constant float projection[16], + __constant float inverseView[9], + __constant int4 viewport[1], + __local struct __VSSpriteOut * vsOutputPrimitives, + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __write_only image2d_t screen, + __read_only image2d_t particle, + __global float4 * debugOut1, + __global int * debugOut2) +{ + float2 texcoords[4] = + { + (float2)(0.0f,0.0f), + (float2)(1.0f,0.0f), + (float2)(0.0f,1.0f), + (float2)(1.0f,1.0f) + }; + + float matrix[16]; + + uint id = get_global_id(0); + uint lid = get_local_id(0); + + float4 vsPosition = vsOutputPrimitives[lid].position; + + matrixMul(matrix, projection, modelview); + // + // Emit two new triangles + // + for (uint i = 0; i<4; i++) { + float3 position = g_positions[i] * PARTICLE_RADIUS; + position = matrixVector3Mul(inverseView, position) + vsPosition; + float3 particlePosition = + matrixVector3Mul( + inverseView, + (float4)(0.0f,0.0f,0.0f,0.0f)) + vsPosition; // world space + + // Compute view space position + position.w = 1.0f; + position = matrixVectorMul(matrix, position); + + //perspective division + position /= position.w; + + struct __GSSpriteOut output; + output.position = position; + //output.textureUV = g_texcoords[i]; + output.textureUV = texcoords[i]; + outputPrimitives[id*4+i] = output; + } + + // Render QUAD - Triangle 1 + rasterizerUnOpt( + outputPrimitives, + viewport, + screen, + particle, + 0, + 1, + 2, + debugOut1); + + // Render QUAD - Triangle 2 + rasterizerUnOpt( + outputPrimitives, + viewport, + screen, + particle, + 2, + 1, + 3, + debugOut1); +} + +__kernel void vertexShaderSprite( + __constant float modelview[16], + __constant float projection[16], + __constant float inverseView[9], + __constant int4 viewport[1], + __local struct __VSSpriteOut * vsOutputPrimitives, + __global float4 * inputPrimitives, + __global struct __GSSpriteOut * outputPrimitives, +// __global float4 * outputPrimitives, + __write_only image2d_t screen, + __read_only image2d_t particle, + __global float4 * debugOut1, + __global int * debugOut2) +{ + float matrix[16]; + + uint id = get_global_id(0); + uint lid = get_local_id(0); + + // gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex + matrixMul(matrix, projection, modelview); + + float4 position = inputPrimitives[id]; + vsOutputPrimitives[lid].position = position; + vsOutputPrimitives[lid].particlePosition = + matrixVectorMul(matrix, position); + + geometryShader( + modelview, + projection, + inverseView, + viewport, + vsOutputPrimitives, + outputPrimitives, + screen, + particle, + debugOut1, + debugOut2); +} \ No newline at end of file diff --git a/Demos/OpenCLClothDemo/texture1.bmp b/Demos/OpenCLClothDemo/texture1.bmp new file mode 100644 index 0000000000000000000000000000000000000000..1d3da81c1018175fc27d941d9932f1d082beb521 GIT binary patch literal 786486 zcmeI$OS0@Rav0FI#|n=<^w2BNSmC)>?TvUTuJ)Bm2!KR^AVA`CI@A)0!YA`2nD6QR zumAn8|M-`G{PREm`uXqw{O7}e{rm5K`Nx0%=iBE$|MHLj^WXmQ|Nr~?K!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t6-z__zP@f4~1T zNyx$^K!5;&y8_?N|MzfrJL?G$AV6SvfvEHA|HD^IfCLB-Xds~ZH$b!n0RjXj6iEC3 zgh>dQ009C7t_eiV|Jq`+2@oJaU`~Ok`F~#L%tO=!2oNA}K|u4rfMqHH0t8kO(EL}a zq&x@^AW$lhc7FftQ)*`(0RjZ370~>rEuFv#5Fk)5p!wI^SxJBZfoTQO=09yJ0w+L# z0D)40wE2HVN*9|)fB*pka|)!*f6hEaO@IIa0v7}{{|i{A64+HBZ4?iT?27$R-f0CN zI@mD;2n-{T_JQd}hOwL=kh9`W5V{Zv5V$CC_PsATe$i&T>1RdX4no2NmJm4mqKz+6 z-j+!mUWt|m*oy#xM+6SP<$i;Y)ha!(=CjTZjbq7C8KJ4+r;lx{&>$UO0GRui>YLd4B>48sgc9!7NaX60vfdYZE4>lby*vyntfs{8IDcxw8c~Sh~>o742gaCnm z7YM)5vu9Yn1I8eZIN5~KjSQ;X=QOWXFI9L4s1 z?R~EO2@oh0_n)I}QTS8uFn;bI1`|I2TxbFT0uKm;UuFLJfnXeQNg!XZ z=+{d--<%vB9lhS%+;Rd0?g&KRWB&EdLDoJdkS}8NYxAh0{m)*TpKC(`1bzyfeUFFZ zKX)@>aDl_JzB@R$nD7WJCQ$LcFD9^aIu+ILv}ETh$eqBG0u`U|li@k*ErE)QadXLA znL3xih60zq)(w56{3mefo?Mztm;iyt1bmy1Q9FnL0RpcJ==ragKmY^?5O`8R+kaBq zSp*0Wcvs+3&;Lw*H(I9?AV7e?O@Z>ZfAb*A2@oJa;B^5#|Me0GfB*pkj|r6beE+ud zSWz5AfB=Es0wvAAcMSF?K!5;&X9P-mzQ5`}QwygMAVA;&fs(fWKp2i7K!5;&X9TqU zXPlivfB=C(1WMYzf8-gYSi&GcfWUnLZU4TZ9S9I0FpGemKTD;=Nq_)>h5|pEf5WqE zMSuVS0wW0IJO2m~ih%$D0tA`}}8G2)r&(((^xaUXN1%1PBly&`O}N z^K1UC!0khT0D;#93OoPnaSDI{0RjZ>3zYQy`={7}009E!0wsN^Zccd&YtQ_tw|4rn zBS2sTfuD`&fe9lxJQG6+<};Bvg#dw_1xk9+^K+J|$W^*!5|;=80%HnXZAx#=A5*U| zs!M2vdzwao0D(IK@Ajojr$>|TWc~2kDDuqf!+|-D0D%n!GA-!urx#6r#uiO@G1Sb_ zQIMI}%qD&!K!8A7flM#@{M7cWo4e&hO@F<4+@0lVFoz>|j(;xu?1f~;+y5Ds13zz@_0tD&=^#3|Hs|XMvFtvc@KXv&8 zPk;b{Y61Pfn$AK31PIJ4p!v^RJCPG0K;Vvm{(nc$S^@+J%qtM~|9P_zIROF$2wWA= z|F4RfPk;b{H3ankHR>n}0t5(L70~~$ikVM<0D(0G^#3*LC<_7v2wWA=|F4RfPk;b{ zH3Y)`|9M^`DOnI8&{iO76wP1TMr*f1fvAp!t4t$6U_F7T8@%$mp6cKkMO7X=THz5O z&`==y{Kt81n4_)o0%6hfGyO_{z@!4#lyzA93^7 zlBjMTVR=R%^3Tll69EE)3q+rD`RnU>npX#OOxirpFn=6Z1qK&j?30s^5rVxm}uZLf!AUX^3(CTcm@j8JB zxo#7y-V%t)*Y)+S9H%?CYZWq^PY3lg;KO8pt}@}0K!kj0t;r1qqCyP&+A!tWw;G{F zvm55)KNQX{@#`glRQS?zlkW(m_|uKtNq_vcLzQEEu1*oARp@lkp4&#edm{ACIznFi zwKrz({aaP1cbxm|pH^g_+#kp^;(g%6^^S-PpLu;s!- zdUK>4dAFH2VINq&TSeB$H@z5Z&c&-pQRQ(KAY}IecS&-3qzX0q^fWYel(f9uM`g&Be2XLtR zu5Biy^rzYV#ZQ1hp+Nfie?|(|nsz2|sQNc}wxs!ZCWiE<=BL|`0D)NqG=xr-N@1gs z>T{!gs$Z)Eu#fd2&O!nN2uv*y^?(0gz|_?gJOKg(E(t_`1YBBcG64bv z2uv*y{t+;BB7!GCfB=C@0%8C6Pe7OGOeR2pz|;cz|J3CZJOKg(?g;oJ;EtZP1PBnA zS|I!*VCqB!Pk;ac0+$5z|4UpZ6Cglf8G*3>f1a1gOCkga5Fn5j(Esy3ekDMFz`_FI z9{~%eDK!EF2oQ(_^#2IND**xoHWASOHz~9<2@oI<3F!Y3h*tsx2y7yt|8G)gX%Zkn zAQI63BM`3y2oTsrK>y#Q(9$G8fIuXm|3@HR2@oK#iGcpUNui}lfB=C=K>v?Gyb>Tl zU=soTf0IH>lK=q%k%0amfp{fAfWRgK`u`?{mL>rL1VVwZ`G20npF9&FK!Cui0#WnV z|5q)vya*5=5D7&6Kl;@x0RjXFtS1n4et+{{uf#GUK!89hkT!q)KUFe9fB=D&1P(jD z=D$*jeb_A+y|11PBlyFoi&-$)Ei+MP7m=K!5;&Gl8@Ael~u#k8uJ72oRV;;HY$-^#B92oNAJzCgaue}5f65fKs~K!8Arz_+pgd??w?90CLg z5ExeAXP*xz44Z|(2oNAZ;F3Vt+cVFX_B5FQ0RjX@6UcP<^rz8+7Y_jfe*)>(+GB+F z)B<%^V(OFyPheGny3e`Qs#R4N%d4*3iy=sdz#0N|pL4%eYp5=h#r;ArLm?3Y%Lv^6 zuDkBAjPs<4R2O&Be1%P56M@1{ySK*AFnR}I|387Bg~&``NPxf;0-3Kk{OOfk!T`eO zSAyB$&=C3R^A0|8B(Rb|{u>`8)3YE432)Lc&QuG>C(T;e1PJ^IoPFtI`kY1k86Pv? zp`yy@UHxgOyaYplK!w2JS6{Ku!%Ggs{tP}G#uUe$S(T4Zk)|LC5V$0ec7RLE-<_NW zAC9~m+ry`ai8pzEIDW?wAn>q2+9)0l;Qn!G3Wp>24|;2d!*tpXzLlDD2@rToAnGA+ zh0*HVD4ny{R>w8p=Pcmnk*9sT-2Y3@UfyzV0a z0t6NoNZZ}Q$sVSb3Oq1!7>Pjw1PDAPkT$xrmkdh5!Kq1Ud;s{lC*Gwk1G-0D++dH2 zAs7M#2s|dB|360UAOZvk3?&fu|DggC3;_ZJ2y_+D|GVnaf5kgf;WoB)9y0^uk8{Bw_t?D?2Lcwrtp)IkIYR0xD$ z>wwP{8CmkKKzNJZjl}5$2z&&>Pjc#K)f`_SykO%;D?$RR3WQ(co#%c&by4+;$IkZ! z!kXUS%?<JU##I8S;r_ePFn^y~j6|COGLIMQ70^v8+_W66nfDb=29WZ7g zg;k?LfIyu<_#NGRt_xtORc8W21u2-1Kv*dt1PJ5>!tbT+bN<{Je?9b>F_DF14r{b< zno`?SAiUm#JnvSBlzlgWk@F3!Gjg0_BajM&w|AW9G|tUO($Z`WGEVwoX~v04B(n>I z_iBgd+3VFi{`3y?KGFUq0%2WB=9@zx5(ux*4$o2gUJC`%yHU8%w0;6nQTiQZ=QRYP z3p4oZ8s^Jnk=~YNQkyi9s3MbQA?ybPqN_5(>jPOkqOCxBZQAZ{x2pnCMXoM4e?5Wd zN>seAXCk9k0?~D9wXJ>X1fp_$UhDR;YHES>8WfI9oweX!5s0qVD=|3qD-f0A^ZNbC z!>R&l(F;db1(Da20_kOY@^oiK0#P}lU%jp&5LNlo>ly;G7)Kzwe&YnTXC%t8UfrXb z7QU|MAfqt_q6;}@kefsm)#CHIN#bjynU?R;$QlT;m`fnNqI1Q+ShP_&7Rzp>bfQXC zyso4nmw5%!t2=Kl3q&53V}Xp;NFl0A-Rl|@vRF(Yz08XxI8i!kF+#D@D%6dPkc$`y z{1iy9_s=CJbP|Ya(diW1rUH@trPp*fBLpTAh;I8tk*^o3T0BtG2>H_M1K~M>z{3L3 zC-Cs;j{6Ekh4{RFfAVl&Am#lza{o{}5Ext_{R##Tc;WEWqG54U%=}2hqiscCB7yX~ zm?-$gLRE|Ef~IsOBh|-QNMIR(=u259uNf1GYVr3vVXIa#}8P91PBly&`Q7`0j(tMLx2E*xdin8x#}fa0t5&& z6bSzaXt=Gd2oNAZU~mEbfA9hcj{pGzodm-E|9S3oifsuHAV6S1f$)!j0mBm#0RjXF zbP~}2JHgtP009Dn3xxgOKLHJ1Ea4F#K%k+3{@)PORs;wTm`fo1BVew`MN5DH0Rndf z!v61{fbK|IOMn1@xdin8x#}fa0t5&&6bSzaXt=Gd2oNAZU~mEbfA9hcj{pGzodop% zPO!EmK!CvD0{Z{p1riMHyZA*Xvfx!jz|G^6+JOTs=^biP}|L3{K5%wfN zfB=C31@!*`iy|Zf1PJsN(ENJ~+n)dd0s{&}&EG%%3|J!}5gJ@ zl|bfGdh(~O%q4qA;Otu7+0$AA%L|-+IHQd(UkC|S2xM+)#cq}m*g+uk)lB!(4)u~_ zB#^(Q(PXa#HW$c$FYEofdAZk4KYv}<&Ruo{$_4UYO8(dKQ+mwI$5!}jkHGBtmcY-V zzZIBs35+1{^8-1XFhcP87|tTE7|$Q%*8v6cRUR;4ArWXOkiY*Gzc!5R#jPq5KV0%+ z3XaYQ92P4a%&hejfk_3z8{X=9Qi{5;>(J`XvZ}d2N%@)|YC{5V36xy;>N#&E_3pXV zT;GN7#=qg|U-`_#hAdKEOdxZ;Z~n9xRF8BjgnNWx&p(0kau6dhwLtk*PiIb@_{hPh z)IUc?PG_RnemZ|9Oq7mL2}A-VH~ef)w3XN10%wId9Pb^>!uub3MuQ8dFSWG=qMKa) zx;D#P*_Bsqy_sz*b;+mg(RL&7n82lb{+aw(U=F$|aJ90V%r8+bp(xqM90J1%lw9)f zoME#NSc$;5jvnp{lz^K*%JXVFc_2-KNdMPM|6!%IF|km4awC~$b6 zn+z5PU>bob1P(unDIymnftvz{m;Y?==2?~#7){{ts*e_=cnA<6K%k+(Vdwu0Hax;s z1PBlyFq*(&^B*lp@em+DfIvfm!_M!o{SAd}MSuW-(FAn<(JCSy0t5*37SQ>73)`On z0RjUGX#N8hMMwk)5a=gx*7iT+{SLJ=0RjXFyen|l{NIhr=>!N6Aka_Xtn>S8e?N0O z6Cgm~b%C?y|9W5oAV7csfqnvq&A;Dqb|yf80D;#9bpF>%AOHdc2s|&K`9E*&L;?f| zyen|n`9Fj2M&)z@1PBo5C!q89v$Zn;0tChsIP3iW+COH!L`8rAfhGcH&A-X!HX%TO z0D;j2&f31e^N&_3@em+Dpou`H?d$wagl$5A0D%DoH2(pMA|wI?2=o)kw|#%@?9NvvJQj^8(0*9BgdT@4)JMm}G?rdxAyaI<;V%~H_ zKD)r-g}gsFJ4?g(&oCPvVXG+w4sXH~kqdGefx}C9X>b{ehZ9MW9}d8AFAAvsi};%# zo!-5|kCs8$k4Z(ZTYN(iIn5|v=I@Wj(rAS(CYYuh92 zMj#Y8ymFbraINRw0%;+7pJ@NL1kzf6M&1g>xl0Nh-m%Q!k_dAp6;)@h;6+;~5LNl} zTDZ_O0%rp0r79db+tBzc0#VgoiNT>=1(die?^c^fcd77oE26oQjVd@-@S^=GpuRte zOh^TyYg6}{Zfj&ffvC6xMku6e0mZGR+i+oYTQ0qB$hAhwQI*$7N)~?tVR4xdNN+`W zL&~2?t|SoFeWkoM&m}DD=XrAt>Hi5tx8l+(!zBfxJFsMq8z!Z!8x}oGIl2Jlui+-1 zmludG#`5`Yl3-ZYO;X-G&9LHi&zl2on|^p(wnbPe@vxwu=ao{EODYhR`{!%AgOQyD zq6@P##R9oU)%?6Jkdzc6fwamcBhe;a348>io5N&k0o9zke3{^*%I04)`}m1KBoN-E z=qImp2`J@U^-7}+D|qQS-O31o5`plVmCQG1Oo6bDpXV`y6jeqbs$j`$W;Z_(C=m#+ zS;>5J#uNza_<0^PNKu6XQOU|*!%aLBxFZnWx;y(=`>udGzUyCpdRVE#=km?XBydR} zyn&Y%nmmp`Sj5lsIDv`eBM?<5|4NL&lLFyweDYjpH53So_<3%)v#st3M0Lr(-Z{ux z0(Ao6ovhozs@DY+ai|i6<;Xn09-U@{1gZ;@XyI6 z3@D(2U5gU(=AXNUVRHgK1i~xbV}E=07Er-X!Xa$_xl;hPCD2Op! zGL}C#Ioc)!-Vz9}`dcwL_l|%H-qHK44FP{X-#p850z(Oe7k;R~1XC!Wf`w4uX`xuj z^LL>K0*eWR_kXd}Hc3YX3kw}Wl{^=2YZ`%h1;P(u-Yj;GTm{qWhbD#3=_W=9tSu0I z8fz!JYIcek30!?$^s|-RMHmD|5Qsja5ke9}oj_E|&uiU2udhlIsUCSf7Xc6$Ss?ndMh;bM zKLw&{{=NQO^~!|9P;U;tl88eI%qtLnar0&&@~=Qx(DeB`?fHi^nCg+|gLfi<^#r01 zaJ>Zg%qS}B&)1tuKlg7n;bxqc6Ce-@L?34Oljqq5qTzFK}BCw%A^!aZnAmu-S=q6OWk|aQ2B7yW9 zpD1{t5+Fc;K(#>B|Eu@0kN^P!1SS&D{3j}wPzewq&`Ka`{-4)YJKKi<0RjYu70~>L zEsekk5FpT1AnN~J&$2lI0t5(*AP{wa|LikDQN%!i0D-OoY3KhN>3XQm2@oJa;9Y^k z=KpR;PA5Qs0D;~DntyL$`x78QU^D@pf3%8-hX4Tr4+v=f43uD;v}5ZF*a+uyM0QYJuPLxFs^4}aYdN6G{U5Li_pZ10)pRnwOj0RjZ}5y&+8 z^rwATWJ-Vlfh7gf#-1Np5T z;AgLHHDMMIaS|XvU?PE5ExP$W6B!DX009DH3N-J`JvJPZPE-U45ST)sM_cZ_=M=Dl zBtU?`AOgL6@}>O;F%bp<0t8kSxYUy`On@x;mw;#q5FoI;KwIuu!Nss^m0<#EIck;}_S#lRA0RjZ} z6v#C2@}KsklQjVX1jZLA@8l&j$In-U1PBn=P@trNm(SS{P|5@d5SU1yyq{-gPL!`u z2@oK#n?R;}SO2t|p?nDtATYQLx~e0Kwu()iuQiE zWTKRXN`L@?4FwKc_ocxN(WFd(0DSW@6;e{MTrNkd5yAV6Sn zfwrCe>TZMM2#){(0)q%#?XE-3AH+}?1PBlqM_{NfeJ7Z4NJT<`0D++d?zGnD*A7J| z7y<+c3@h+_e{Oi!pQ0y_wl_wACIJBZ1V009CE2$Zz+bj|{aOMw6Z0-Fn@y*(P)+(-I71fo0E z9z}a`VQjT(|6F9pYQ;*kCSVdq-f#_RaGDk@r3Ph#%dZ=WOz`_FISHEzY zQVRvbqI*6R2)R5HSXCgr{Hx};ZC+t<2Yl{VmsZL4i_p#lMi2^0^t{Ne>Xe4A`lj_{P~qo$2{~W+L&RBioi{Q@aEs#%JRnq!m3p~ zKNjbV2mSP%F)1+$`uRFY*fWOl)AloA#=OK#AQVXNdHAd67X{MN z{2X~P+9i($zGPV?6$(U`Jp9RXZ-EHE?zMN2JMUjuk)4U;KD$74$!AYzkoY6jhOdJp zxLX+I72HimzEcZC7klblUJO2BZ2tOUYK{&CE>$*M=$XKn0@0NoGsvN$io}|~4wbB6 zB7r;Qj<)hj;B|rMKEEE%H~~a5&0ojKR3sGw&F{l!!-_C0A#hV5y2&?pn|gUfQT;kK zg5WCzs;|o7!ioSaAy6lf-oUzj=313zT|F{a;-c*>P<@fYg}sBcKY_16c=5i^-(S~w z_~v!@ESvw0HUeOsr4il*xoH2bd&nS?&pEIT-<{1Stm+9H&qvq$2>TN_6PUN>pJim6009C7t_n=?XTjB^%qKvA0D(GzxBgJ5i`FUv1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs y0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FoIO!2bgsD41vf literal 0 HcmV?d00001 diff --git a/Demos/OpenCLClothDemo/vertex.glsl b/Demos/OpenCLClothDemo/vertex.glsl new file mode 100644 index 000000000..516983023 --- /dev/null +++ b/Demos/OpenCLClothDemo/vertex.glsl @@ -0,0 +1,7 @@ +void main() +{ + //gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex; + + gl_TexCoord[0] = gl_MultiTexCoord0; + gl_Position = gl_Vertex; +} \ No newline at end of file diff --git a/Demos/ParticlesOpenCL/AMD/CMakeLists.txt b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt index 727006017..f4c4422b6 100644 --- a/Demos/ParticlesOpenCL/AMD/CMakeLists.txt +++ b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt @@ -7,6 +7,8 @@ ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL ) ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) @@ -53,15 +55,17 @@ IF (USE_GLUT) ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h - ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp - ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ) ELSE (USE_GLUT) diff --git a/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp index 4da7f523c..f35e44d37 100644 --- a/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp +++ b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp @@ -329,7 +329,9 @@ void btParticlesDynamicsWorld::initCLKernels(int argc, char** argv) if (!m_cxMainContext) { // m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); - m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + + m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); + //m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext); diff --git a/Demos/SharedOpenCL/btOclCommon.cpp b/Demos/SharedOpenCL/btOclCommon.cpp index d412ef3c6..48fe105d7 100644 --- a/Demos/SharedOpenCL/btOclCommon.cpp +++ b/Demos/SharedOpenCL/btOclCommon.cpp @@ -85,7 +85,7 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; cl_context retContext = clCreateContextFromType(cprops, - CL_DEVICE_TYPE_ALL, + deviceType, NULL, NULL, &ciErrNum); diff --git a/Demos/SharedOpenCL/btOclUtils.cpp b/Demos/SharedOpenCL/btOclUtils.cpp index 7af73b92a..6e0823227 100644 --- a/Demos/SharedOpenCL/btOclUtils.cpp +++ b/Demos/SharedOpenCL/btOclUtils.cpp @@ -1,3 +1,18 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + #include #include #include diff --git a/Demos/SharedOpenCL/btOclUtils.h b/Demos/SharedOpenCL/btOclUtils.h index fba65d8c6..309deca50 100644 --- a/Demos/SharedOpenCL/btOclUtils.h +++ b/Demos/SharedOpenCL/btOclUtils.h @@ -1,3 +1,17 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ #ifndef BT_OCL_UTILS_H #define BT_OCL_UTILS_H diff --git a/Demos/VectorAdd_OpenCL/VectorAddKernels.cl b/Demos/VectorAdd_OpenCL/VectorAddKernels.cl index e224eb6ff..f3d5b3486 100644 --- a/Demos/VectorAdd_OpenCL/VectorAddKernels.cl +++ b/Demos/VectorAdd_OpenCL/VectorAddKernels.cl @@ -1,13 +1,4 @@ -#ifndef GUID_ARG -#define GUID_ARG -#endif - - -#ifndef MSTRINGIFY -#define MSTRINGIFY(A) A -#endif - MSTRINGIFY( diff --git a/src/BulletMultiThreaded/CMakeLists.txt b/src/BulletMultiThreaded/CMakeLists.txt index b4207a72b..6267a5307 100644 --- a/src/BulletMultiThreaded/CMakeLists.txt +++ b/src/BulletMultiThreaded/CMakeLists.txt @@ -67,10 +67,8 @@ ADD_LIBRARY(BulletMultiThreaded ) -#for now, only Direct 11 (Direct Compute) -IF(USE_DX11) - SUBDIRS(GpuSoftBodySolvers) -ENDIF(USE_DX11) + +SUBDIRS(GpuSoftBodySolvers) IF (BUILD_SHARED_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt index aaf2e4bef..63cc88b7a 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt @@ -3,20 +3,12 @@ INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src ) -LIST(APPEND SubDirList "CPU") +SUBDIRS ( + OpenCL + CPU +) -# Configure use of OpenCL and DX11 -# Generates the settings file and defines libraries and include paths -OPTION(USE_OPENCL "Use OpenCL" OFF) - - - -if( USE_OPENCL ) - LIST(APPEND SubDirList "OpenCL") -endif( USE_OPENCL ) -if( USE_DX11 ) - LIST(APPEND SubDirList "DX11") -endif( USE_DX11 ) - -SUBDIRS( ${SubDirList} ) +IF( USE_DX11 ) + SUBDIRS( DX11 ) +ENDIF( USE_DX11 ) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt index 5fea665b0..3bfffcdcb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt @@ -14,14 +14,17 @@ ${VECTOR_MATH_INCLUDE} SET(BulletSoftBodyDX11Solvers_SRCS btSoftBodySolver_DX11.cpp + btSoftBodySolver_DX11SIMDAware.cpp ) SET(BulletSoftBodyDX11Solvers_HDRS btSoftBodySolver_DX11.h + btSoftBodySolver_DX11SIMDAware.h ../cpu/btSoftBodySolverData.h btSoftBodySolverVertexData_DX11.h btSoftBodySolverTriangleData_DX11.h btSoftBodySolverLinkData_DX11.h + btSoftBodySolverLinkData_DX11SIMDAware.h btSoftBodySolverBuffer_DX11.h btSoftBodySolverVertexBuffer_DX11.h @@ -37,6 +40,7 @@ SET(BulletSoftBodyDX11Solvers_Shaders UpdatePositions UpdateNodes SolvePositions + SolvePositionsSIMDBatched UpdatePositionsFromVelocities ApplyForces PrepareLinks diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl new file mode 100644 index 000000000..5106f612d --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl @@ -0,0 +1,128 @@ +MSTRINGIFY( + +cbuffer SolvePositionsFromLinksKernelCB : register( b0 ) +{ + int startWaveInBatch; + int numWaves; + float kst; + float ti; +}; + + +// Number of batches per wavefront stored one element per logical wavefront +StructuredBuffer g_wavefrontBatchCountsVertexCounts : register( t0 ); +// Set of up to maxNumVertices vertex addresses per wavefront +StructuredBuffer g_vertexAddressesPerWavefront : register( t1 ); + +StructuredBuffer g_verticesInverseMass : register( t2 ); + +// Per-link data layed out structured in terms of sub batches within wavefronts +StructuredBuffer g_linksVertexIndices : register( t3 ); +StructuredBuffer g_linksMassLSC : register( t4 ); +StructuredBuffer g_linksRestLengthSquared : register( t5 ); + +RWStructuredBuffer g_vertexPositions : register( u0 ); + +// Data loaded on a per-wave basis +groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER]; +groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; +groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; + +// Storing the vertex addresses actually slowed things down a little +//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER]; + + +[numthreads(BLOCK_SIZE, 1, 1)] +void +SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) +{ + const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1)); + const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE); + const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER; + const int localWavefront = wavefront - firstWavefrontInBlock; + + // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier + if( wavefront < (startWaveInBatch + numWaves) ) + { + + // Load the batch counts for the wavefronts + // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier + if( laneInWavefront == 0 ) + { + int2 batchesAndVertexCountsWithinWavefront = g_wavefrontBatchCountsVertexCounts[firstWavefrontInBlock + localWavefront]; + wavefrontBatchCountsVertexCounts[localWavefront] = batchesAndVertexCountsWithinWavefront; + } + + + int2 batchesAndVerticesWithinWavefront = wavefrontBatchCountsVertexCounts[localWavefront]; + int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x; + int verticesUsedByWave = batchesAndVerticesWithinWavefront.y; + + // Load the vertices for the wavefronts + for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) + { + int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + + //vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress; + vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress]; + vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress]; + } + + // Loop through the batches performing the solve on each in LDS + int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE; + + //for( int batch = 0; batch < batchesWithinWavefront; ++batch ) + + int batch = 0; + do + { + int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch; + int locationOfValue = baseDataLocation + laneInWavefront; + + + // These loads should all be perfectly linear across the WF + int2 localVertexIndices = g_linksVertexIndices[locationOfValue]; + float massLSC = g_linksMassLSC[locationOfValue]; + float restLengthSquared = g_linksRestLengthSquared[locationOfValue]; + + + // LDS vertex addresses based on logical wavefront number in block and loaded index + int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x; + int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y; + + float3 position0 = vertexPositionSharedData[vertexAddress0].xyz; + float3 position1 = vertexPositionSharedData[vertexAddress1].xyz; + + float inverseMass0 = vertexInverseMassSharedData[vertexAddress0]; + float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; + + float3 del = position1 - position0; + float len = dot(del, del); + + float k = 0; + if( massLSC > 0.0f ) + { + k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; + } + + position0 = position0 - del*(k*inverseMass0); + position1 = position1 + del*(k*inverseMass1); + + vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f); + vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f); + + ++batch; + } while( batch < batchesWithinWavefront ); + + // Update the global memory vertices for the wavefronts + for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE ) + { + int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + + g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex]; + } + } + +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h new file mode 100644 index 000000000..92864a159 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h @@ -0,0 +1,173 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_DX11.h" + +#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H +#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H + +struct ID3D11Device; +struct ID3D11DeviceContext; + + +class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData +{ +public: + bool m_onGPU; + ID3D11Device *m_d3dDevice; + ID3D11DeviceContext *m_d3dDeviceContext; + + const int m_wavefrontSize; + const int m_linksPerWorkItem; + const int m_maxLinksPerWavefront; + int m_maxBatchesWithinWave; + int m_maxVerticesWithinWave; + int m_numWavefronts; + + int m_maxVertex; + + struct NumBatchesVerticesPair + { + int numBatches; + int numVertices; + }; + + // Array storing number of links in each wavefront + btAlignedObjectArray m_linksPerWavefront; + btAlignedObjectArray m_numBatchesAndVerticesWithinWaves; + btDX11Buffer< NumBatchesVerticesPair > m_dx11NumBatchesAndVerticesWithinWaves; + + // All arrays here will contain batches of m_maxLinksPerWavefront links + // ordered by wavefront. + // with either global vertex pairs or local vertex pairs + btAlignedObjectArray< int > m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront + btDX11Buffer m_dx11WavefrontVerticesGlobalAddresses; + btAlignedObjectArray< LinkNodePair > m_linkVerticesLocalAddresses; // Vertex pair for the link + btDX11Buffer m_dx11LinkVerticesLocalAddresses; + btDX11Buffer m_dx11LinkStrength; + btDX11Buffer m_dx11LinksMassLSC; + btDX11Buffer m_dx11LinksRestLengthSquared; + btDX11Buffer m_dx11LinksRestLength; + btDX11Buffer m_dx11LinksMaterialLinearStiffnessCoefficient; + + struct BatchPair + { + int start; + int length; + + BatchPair() : + start(0), + length(0) + { + } + + BatchPair( int s, int l ) : + start( s ), + length( l ) + { + } + }; + + /** + * Link addressing information for each cloth. + * Allows link locations to be computed independently of data batching. + */ + btAlignedObjectArray< int > m_linkAddresses; + + /** + * Start and length values for computation batches over link data. + */ + btAlignedObjectArray< BatchPair > m_wavefrontBatchStartLengths; + + + //ID3D11Buffer* readBackBuffer; + + btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ); + + virtual ~btSoftBodyLinkDataDX11SIMDAware(); + + /** Allocate enough space in all link-related arrays to fit numLinks links */ + virtual void createLinks( int numLinks ); + + /** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */ + virtual void setLinkAt( const LinkDescription &link, int linkIndex ); + + virtual bool onAccelerator(); + + virtual bool moveToAccelerator(); + + virtual bool moveFromAccelerator(); + + /** + * Generate (and later update) the batching for the entire link set. + * This redoes a lot of work because it batches the entire set when each cloth is inserted. + * In theory we could delay it until just before we need the cloth. + * It's a one-off overhead, though, so that is a later optimisation. + */ + void generateBatches(); + + int getMaxVerticesPerWavefront() + { + return m_maxVerticesWithinWave; + } + + int getWavefrontSize() + { + return m_wavefrontSize; + } + + int getLinksPerWorkItem() + { + return m_linksPerWorkItem; + } + + int getMaxLinksPerWavefront() + { + return m_maxLinksPerWavefront; + } + + int getMaxBatchesPerWavefront() + { + return m_maxBatchesWithinWave; + } + + int getNumWavefronts() + { + return m_numWavefronts; + } + + NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront ) + { + return m_numBatchesAndVerticesWithinWaves[wavefront]; + } + + int getVertexGlobalAddresses( int vertexIndex ) + { + return m_wavefrontVerticesGlobalAddresses[vertexIndex]; + } + + /** + * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally. + */ + LinkNodePair getVertexPairLocalAddresses( int linkIndex ) + { + return m_linkVerticesLocalAddresses[linkIndex]; + } + +}; + + +#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp index 9c9b325a8..7877aa6a0 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp @@ -622,7 +622,7 @@ void btDX11SoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softB using Vectormath::Aos::Point3; // Create SoftBody that will store the information within the solver - btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + btDX11AcceleratedSoftBodyInterface *newSoftBody = new btDX11AcceleratedSoftBodyInterface( softBody ); m_softBodySet.push_back( newSoftBody ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); @@ -1451,11 +1451,11 @@ void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float -btDX11SoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +btDX11AcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) { for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + btDX11AcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; if( softBodyInterface->getSoftBody() == softBody ) return softBodyInterface; } @@ -1466,7 +1466,7 @@ void btDX11SoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const { checkInitialized(); - btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + btDX11AcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); const int firstVertex = currentCloth->getFirstVertex(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h index a61e5166c..ea5b3d462 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h @@ -13,6 +13,9 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H +#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H + #include "vectormath/vmInclude.h" #include "BulletSoftBody/btSoftBodySolvers.h" @@ -22,185 +25,184 @@ subject to the following restrictions: #include "btSoftBodySolverTriangleData_DX11.h" -#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H -#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H + +/** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ +class btDX11AcceleratedSoftBodyInterface +{ +protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + +public: + btDX11AcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + // TODO: All of these set functions will have to do checks and + // update the world because restructuring of the arrays will be necessary + // Reasonable use of "friend"? + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + +#if 0 + void setAcceleration( Vectormath::Aos::Vector3 acceleration ) + { + m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); + } + + void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) + { + m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); + } + + /** + * Set the density of the air in which the cloth is situated. + */ + void setAirDensity( btScalar density ) + { + m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); + } + + /** + * Add a collision object to this soft body. + */ + void addCollisionObject( btCollisionObject *collisionObject ) + { + m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); + } +#endif +}; + class btDX11SoftBodySolver : public btSoftBodySolver { public: - /** - * SoftBody class to maintain information about a soft body instance - * within a solver. - * This data addresses the main solver arrays. - */ - class btAcceleratedSoftBodyInterface - { - protected: - /** Current number of vertices that are part of this cloth */ - int m_numVertices; - /** Maximum number of vertices allocated to be part of this cloth */ - int m_maxVertices; - /** Current number of triangles that are part of this cloth */ - int m_numTriangles; - /** Maximum number of triangles allocated to be part of this cloth */ - int m_maxTriangles; - /** Index of first vertex in the world allocated to this cloth */ - int m_firstVertex; - /** Index of first triangle in the world allocated to this cloth */ - int m_firstTriangle; - /** Index of first link in the world allocated to this cloth */ - int m_firstLink; - /** Maximum number of links allocated to this cloth */ - int m_maxLinks; - /** Current number of links allocated to this cloth */ - int m_numLinks; - - /** The actual soft body this data represents */ - btSoftBody *m_softBody; - - - public: - btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : - m_softBody( softBody ) - { - m_numVertices = 0; - m_maxVertices = 0; - m_numTriangles = 0; - m_maxTriangles = 0; - m_firstVertex = 0; - m_firstTriangle = 0; - m_firstLink = 0; - m_maxLinks = 0; - m_numLinks = 0; - } - int getNumVertices() - { - return m_numVertices; - } - - int getNumTriangles() - { - return m_numTriangles; - } - - int getMaxVertices() - { - return m_maxVertices; - } - - int getMaxTriangles() - { - return m_maxTriangles; - } - - int getFirstVertex() - { - return m_firstVertex; - } - - int getFirstTriangle() - { - return m_firstTriangle; - } - - // TODO: All of these set functions will have to do checks and - // update the world because restructuring of the arrays will be necessary - // Reasonable use of "friend"? - void setNumVertices( int numVertices ) - { - m_numVertices = numVertices; - } - - void setNumTriangles( int numTriangles ) - { - m_numTriangles = numTriangles; - } - - void setMaxVertices( int maxVertices ) - { - m_maxVertices = maxVertices; - } - - void setMaxTriangles( int maxTriangles ) - { - m_maxTriangles = maxTriangles; - } - - void setFirstVertex( int firstVertex ) - { - m_firstVertex = firstVertex; - } - - void setFirstTriangle( int firstTriangle ) - { - m_firstTriangle = firstTriangle; - } - - void setMaxLinks( int maxLinks ) - { - m_maxLinks = maxLinks; - } - - void setNumLinks( int numLinks ) - { - m_numLinks = numLinks; - } - - void setFirstLink( int firstLink ) - { - m_firstLink = firstLink; - } - - int getMaxLinks() - { - return m_maxLinks; - } - - int getNumLinks() - { - return m_numLinks; - } - - int getFirstLink() - { - return m_firstLink; - } - - btSoftBody* getSoftBody() - { - return m_softBody; - } - - #if 0 - void setAcceleration( Vectormath::Aos::Vector3 acceleration ) - { - m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); - } - - void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) - { - m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); - } - - /** - * Set the density of the air in which the cloth is situated. - */ - void setAirDensity( btScalar density ) - { - m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); - } - - /** - * Add a collision object to this soft body. - */ - void addCollisionObject( btCollisionObject *collisionObject ) - { - m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); - } - #endif - }; - class KernelDesc { @@ -344,7 +346,7 @@ private: * Cloths owned by this solver. * Only our cloths are in this array. */ - btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + btAlignedObjectArray< btDX11AcceleratedSoftBodyInterface * > m_softBodySet; /** Acceleration value to be applied to all non-static vertices in the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver. @@ -429,7 +431,7 @@ private: void updateConstants( float timeStep ); - btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + btDX11AcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); ////////////////////////////////////// // Kernel dispatches diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp new file mode 100644 index 000000000..c72dead3e --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp @@ -0,0 +1,1793 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + + +#define WAVEFRONT_SIZE 32 +#define WAVEFRONT_BLOCK_MULTIPLIER 2 +#define LINKS_PER_SIMD_LANE 16 + +#define STRINGIFY( S ) STRINGIFY2( S ) +#define STRINGIFY2( S ) #S + +#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" +#include "vectormath/vmInclude.h" + +#include "btSoftBodySolverLinkData_DX11SIMDAware.h" +#include "btSoftBodySolver_DX11SIMDAware.h" +#include "btSoftBodySolverVertexBuffer_DX11.h" +#include "BulletSoftBody/btSoftBody.h" + +#define MSTRINGIFY(A) #A +static char* PrepareLinksHLSLString = +#include "HLSL/PrepareLinks.hlsl" +static char* UpdatePositionsFromVelocitiesHLSLString = +#include "HLSL/UpdatePositionsFromVelocities.hlsl" +static char* SolvePositionsSIMDBatchedHLSLString = +#include "HLSL/SolvePositionsSIMDBatched.hlsl" +static char* UpdateNodesHLSLString = +#include "HLSL/UpdateNodes.hlsl" +static char* UpdatePositionsHLSLString = +#include "HLSL/UpdatePositions.hlsl" +static char* UpdateConstantsHLSLString = +#include "HLSL/UpdateConstants.hlsl" +static char* IntegrateHLSLString = +#include "HLSL/Integrate.hlsl" +static char* ApplyForcesHLSLString = +#include "HLSL/ApplyForces.hlsl" +static char* UpdateNormalsHLSLString = +#include "HLSL/UpdateNormals.hlsl" +static char* OutputToVertexArrayHLSLString = +#include "HLSL/OutputToVertexArray.hlsl" +static char* VSolveLinksHLSLString = +#include "HLSL/VSolveLinks.hlsl" + + + +btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : + m_d3dDevice( d3dDevice ), + m_d3dDeviceContext( d3dDeviceContext ), + m_wavefrontSize( WAVEFRONT_SIZE ), + m_linksPerWorkItem( LINKS_PER_SIMD_LANE ), + m_maxBatchesWithinWave( 0 ), + m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ), + m_numWavefronts( 0 ), + m_maxVertex( 0 ), + m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ), + m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ), + m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ), + m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ), + m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ), + m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ), + m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ), + m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true ) +{ + m_d3dDevice = d3dDevice; + m_d3dDeviceContext = d3dDeviceContext; +} + +btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware() +{ +} + +static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec ) +{ + Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() ); + return outVec; +} + +void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks ) +{ + int previousSize = m_links.size(); + int newSize = previousSize + numLinks; + + btSoftBodyLinkData::createLinks( numLinks ); + + // Resize the link addresses array as well + m_linkAddresses.resize( newSize ); +} + +void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex ) +{ + btSoftBodyLinkData::setLinkAt( link, linkIndex ); + + if( link.getVertex0() > m_maxVertex ) + m_maxVertex = link.getVertex0(); + if( link.getVertex1() > m_maxVertex ) + m_maxVertex = link.getVertex1(); + + // Set the link index correctly for initialisation + m_linkAddresses[linkIndex] = linkIndex; +} + +bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator() +{ + return m_onGPU; +} + +bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator() +{ + bool success = true; + + success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU(); + success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU(); + success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU(); + success = success && m_dx11LinkStrength.moveToGPU(); + success = success && m_dx11LinksMassLSC.moveToGPU(); + success = success && m_dx11LinksRestLengthSquared.moveToGPU(); + success = success && m_dx11LinksRestLength.moveToGPU(); + success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU(); + + if( success ) + m_onGPU = true; + + return success; +} + +bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator() +{ + bool success = true; + success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU(); + success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU(); + success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU(); + success = success && m_dx11LinkStrength.moveFromGPU(); + success = success && m_dx11LinksMassLSC.moveFromGPU(); + success = success && m_dx11LinksRestLengthSquared.moveFromGPU(); + success = success && m_dx11LinksRestLength.moveFromGPU(); + success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU(); + + if( success ) + m_onGPU = false; + + return success; +} + + + + + + + + + + + + + + + +btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context) : + m_dx11Device( dx11Device ), + m_dx11Context( dx11Context ), + m_linkData(m_dx11Device, m_dx11Context), + m_vertexData(m_dx11Device, m_dx11Context), + m_triangleData(m_dx11Device, m_dx11Context), + m_dx11PerClothAcceleration( m_dx11Device, m_dx11Context, &m_perClothAcceleration, true ), + m_dx11PerClothWindVelocity( m_dx11Device, m_dx11Context, &m_perClothWindVelocity, true ), + m_dx11PerClothDampingFactor( m_dx11Device, m_dx11Context, &m_perClothDampingFactor, true ), + m_dx11PerClothVelocityCorrectionCoefficient( m_dx11Device, m_dx11Context, &m_perClothVelocityCorrectionCoefficient, true ), + m_dx11PerClothLiftFactor( m_dx11Device, m_dx11Context, &m_perClothLiftFactor, true ), + m_dx11PerClothDragFactor( m_dx11Device, m_dx11Context, &m_perClothDragFactor, true ), + m_dx11PerClothMediumDensity( m_dx11Device, m_dx11Context, &m_perClothMediumDensity, true ) +{ + // Initial we will clearly need to update solver constants + // For now this is global for the cloths linked with this solver - we should probably make this body specific + // for performance in future once we understand more clearly when constants need to be updated + m_updateSolverConstants = true; + + m_shadersInitialized = false; +} + +void btDX11SIMDAwareSoftBodySolver::releaseKernels() +{ + SAFE_RELEASE( integrateKernel.constBuffer ); + SAFE_RELEASE( integrateKernel.kernel ); + SAFE_RELEASE( solvePositionsFromLinksKernel.constBuffer ); + SAFE_RELEASE( solvePositionsFromLinksKernel.kernel ); + SAFE_RELEASE( updatePositionsFromVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updatePositionsFromVelocitiesKernel.kernel ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ); + SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel ); + SAFE_RELEASE( resetNormalsAndAreasKernel.constBuffer ); + SAFE_RELEASE( resetNormalsAndAreasKernel.kernel ); + SAFE_RELEASE( normalizeNormalsAndAreasKernel.constBuffer ); + SAFE_RELEASE( normalizeNormalsAndAreasKernel.kernel ); + SAFE_RELEASE( updateSoftBodiesKernel.constBuffer ); + SAFE_RELEASE( updateSoftBodiesKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.kernel ); + + + SAFE_RELEASE( addVelocityKernel.constBuffer ); + SAFE_RELEASE( addVelocityKernel.kernel ); + SAFE_RELEASE( applyForcesKernel.constBuffer ); + SAFE_RELEASE( applyForcesKernel.kernel ); + SAFE_RELEASE( outputToVertexArrayKernel.constBuffer ); + SAFE_RELEASE( outputToVertexArrayKernel.kernel ); + SAFE_RELEASE( collideCylinderKernel.constBuffer ); + SAFE_RELEASE( collideCylinderKernel.kernel ); + + m_shadersInitialized = false; +} + +btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver() +{ + releaseKernels(); +} + + +void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ) +{ + if( m_softBodySet.size() != softBodies.size() ) + { + // Have a change in the soft body set so update, reloading all the data + getVertexData().clear(); + getTriangleData().clear(); + getLinkData().clear(); + m_softBodySet.resize(0); + + + for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) + { + btSoftBody *softBody = softBodies[ softBodyIndex ]; + using Vectormath::Aos::Matrix3; + using Vectormath::Aos::Point3; + + // Create SoftBody that will store the information within the solver + btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + m_softBodySet.push_back( newSoftBody ); + + m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); + m_perClothDampingFactor.push_back(softBody->m_cfg.kDP); + m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF ); + m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); + m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); + m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); + + // Add space for new vertices and triangles in the default solver for now + // TODO: Include space here for tearing too later + int firstVertex = getVertexData().getNumVertices(); + int numVertices = softBody->m_nodes.size(); + int maxVertices = numVertices; + // Allocate space for new vertices in all the vertex arrays + getVertexData().createVertices( maxVertices, softBodyIndex ); + + int firstTriangle = getTriangleData().getNumTriangles(); + int numTriangles = softBody->m_faces.size(); + int maxTriangles = numTriangles; + getTriangleData().createTriangles( maxTriangles ); + + // Copy vertices from softbody into the solver + for( int vertex = 0; vertex < numVertices; ++vertex ) + { + Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ()); + btSoftBodyVertexData::VertexDescription desc; + + // TODO: Position in the softbody might be pre-transformed + // or we may need to adapt for the pose. + //desc.setPosition( cloth.getMeshTransform()*multPoint ); + desc.setPosition( multPoint ); + + float vertexInverseMass = softBody->m_nodes[vertex].m_im; + desc.setInverseMass(vertexInverseMass); + getVertexData().setVertexAt( desc, firstVertex + vertex ); + } + + // Copy triangles similarly + // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene + for( int triangle = 0; triangle < numTriangles; ++triangle ) + { + // Note that large array storage is relative to the array not to the cloth + // So we need to add firstVertex to each value + int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0])); + int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0])); + int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0])); + btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex); + getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle ); + + // Increase vertex triangle counts for this triangle + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++; + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++; + getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++; + } + + int firstLink = getLinkData().getNumLinks(); + int numLinks = softBody->m_links.size(); + int maxLinks = numLinks; + + // Allocate space for the links + getLinkData().createLinks( numLinks ); + + // Add the links + for( int link = 0; link < numLinks; ++link ) + { + int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]); + int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]); + + btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST); + newLink.setLinkStrength(1.f); + getLinkData().setLinkAt(newLink, firstLink + link); + } + + newSoftBody->setFirstVertex( firstVertex ); + newSoftBody->setFirstTriangle( firstTriangle ); + newSoftBody->setNumVertices( numVertices ); + newSoftBody->setMaxVertices( maxVertices ); + newSoftBody->setNumTriangles( numTriangles ); + newSoftBody->setMaxTriangles( maxTriangles ); + newSoftBody->setFirstLink( firstLink ); + newSoftBody->setNumLinks( numLinks ); + } + + + + updateConstants(0.f); + + + m_linkData.generateBatches(); + m_triangleData.generateBatches(); + + + // Build the shaders to match the batching parameters + buildShaders(); + } + +} + + +btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData() +{ + // TODO: Consider setting link data to "changed" here + return m_linkData; +} + +btSoftBodyVertexData &btDX11SIMDAwareSoftBodySolver::getVertexData() +{ + // TODO: Consider setting vertex data to "changed" here + return m_vertexData; +} + +btSoftBodyTriangleData &btDX11SIMDAwareSoftBodySolver::getTriangleData() +{ + // TODO: Consider setting triangle data to "changed" here + return m_triangleData; +} + + +bool btDX11SIMDAwareSoftBodySolver::checkInitialized() +{ + if( !m_shadersInitialized ) + if( buildShaders() ) + m_shadersInitialized = true; + + return m_shadersInitialized; +} + +void btDX11SIMDAwareSoftBodySolver::resetNormalsAndAreas( int numVertices ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.numNodes = numVertices; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( resetNormalsAndAreasKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::resetNormalsAndAreas + +void btDX11SIMDAwareSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.numNodes = numVertices; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexTriangleCount.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( normalizeNormalsAndAreasKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::normalizeNormalsAndAreas + +void btDX11SIMDAwareSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdateSoftBodiesCB constBuffer; + + constBuffer.startFace = firstTriangle; + constBuffer.numFaces = numTriangles; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateSoftBodiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) ); + m_dx11Context->Unmap( updateSoftBodiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateSoftBodiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_triangleData.m_dx11VertexIndices.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_triangleData.m_dx11Normal.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_triangleData.m_dx11Area.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( updateSoftBodiesKernel.kernel, NULL, 0 ); + + int numBlocks = (numTriangles + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::executeUpdateSoftBodies + +void btDX11SIMDAwareSoftBodySolver::updateSoftBodies() +{ + using namespace Vectormath::Aos; + + + int numVertices = m_vertexData.getNumVertices(); + int numTriangles = m_triangleData.getNumTriangles(); + + // Ensure data is on accelerator + m_vertexData.moveToAccelerator(); + m_triangleData.moveToAccelerator(); + + resetNormalsAndAreas( numVertices ); + + + // Go through triangle batches so updates occur correctly + for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex ) + { + + int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].start; + int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].length; + + executeUpdateSoftBodies( startTriangle, numTriangles ); + } + + + normalizeNormalsAndAreas( numVertices ); + +} // btDX11SIMDAwareSoftBodySolver::updateSoftBodies + + +Vectormath::Aos::Vector3 btDX11SIMDAwareSoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ) +{ + return a*Vectormath::Aos::dot(v, a); +} + +void btDX11SIMDAwareSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ) +{ + float dtInverseMass = solverdt*inverseMass; + if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) ) + { + vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass; + } else { + vertexForce += force; + } +} + +void btDX11SIMDAwareSoftBodySolver::applyForces( float solverdt ) +{ + using namespace Vectormath::Aos; + + // Ensure data is on accelerator + m_vertexData.moveToAccelerator(); + m_dx11PerClothAcceleration.moveToGPU(); + m_dx11PerClothLiftFactor.moveToGPU(); + m_dx11PerClothDragFactor.moveToGPU(); + m_dx11PerClothMediumDensity.moveToGPU(); + m_dx11PerClothWindVelocity.moveToGPU(); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + ApplyForcesCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverdt = solverdt; + constBuffer.epsilon = FLT_EPSILON; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(ApplyForcesCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexArea.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothLiftFactor.getSRV()) ); + m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11PerClothDragFactor.getSRV()) ); + m_dx11Context->CSSetShaderResources( 6, 1, &(m_dx11PerClothWindVelocity.getSRV()) ); + m_dx11Context->CSSetShaderResources( 7, 1, &(m_dx11PerClothAcceleration.getSRV()) ); + m_dx11Context->CSSetShaderResources( 8, 1, &(m_dx11PerClothMediumDensity.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( applyForcesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 6, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 7, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 8, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + + +} // btDX11SIMDAwareSoftBodySolver::applyForces + +/** + * Integrate motion on the solver. + */ +void btDX11SIMDAwareSoftBodySolver::integrate( float solverdt ) +{ + // TEMPORARY COPIES + m_vertexData.moveToAccelerator(); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + IntegrateCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverdt = solverdt; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(IntegrateCB) ); + m_dx11Context->Unmap( integrateKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( integrateKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::integrate + +float btDX11SIMDAwareSoftBodySolver::computeTriangleArea( + const Vectormath::Aos::Point3 &vertex0, + const Vectormath::Aos::Point3 &vertex1, + const Vectormath::Aos::Point3 &vertex2 ) +{ + Vectormath::Aos::Vector3 a = vertex1 - vertex0; + Vectormath::Aos::Vector3 b = vertex2 - vertex0; + Vectormath::Aos::Vector3 crossProduct = cross(a, b); + float area = length( crossProduct ); + return area; +} // btDX11SIMDAwareSoftBodySolver::computeTriangleArea + +// Update constants here is a simple CPU version that is run on optimize +void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep ) +{ + using namespace Vectormath::Aos; + + if( m_updateSolverConstants ) + { + m_updateSolverConstants = false; + + // Will have to redo this if we change the structure (tear, maybe) or various other possible changes + + // Initialise link constants + const int numLinks = m_linkData.getNumLinks(); + for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) ); + m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 ))); + float invMass0 = m_vertexData.getInverseMass(vertices.vertex0); + float invMass1 = m_vertexData.getInverseMass(vertices.vertex1); + float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex); + float massLSC = (invMass0 + invMass1)/linearStiffness; + m_linkData.getMassLSC(linkIndex) = massLSC; + float restLength = m_linkData.getRestLength(linkIndex); + float restLengthSquared = restLength*restLength; + m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; + } + } +} // btDX11SIMDAwareSoftBodySolver::updateConstants + + + +void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt ) +{ + + //std::cerr << "'GPU' solve constraints\n"; + using Vectormath::Aos::Vector3; + using Vectormath::Aos::Point3; + using Vectormath::Aos::lengthSqr; + using Vectormath::Aos::dot; + + // Prepare links + int numLinks = m_linkData.getNumLinks(); + int numVertices = m_vertexData.getNumVertices(); + + float kst = 1.f; + float ti = 0.f; + + + m_dx11PerClothDampingFactor.moveToGPU(); + m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU(); + + + + // Ensure data is on accelerator + m_linkData.moveToAccelerator(); + m_vertexData.moveToAccelerator(); + + // Solve drift + for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) + { + int it = iteration; + + for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i ) + { + int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start; + int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length; + + solveLinksForPosition( startWave, numWaves, kst, ti ); + } + + } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) + + + + + updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + +} // btDX11SIMDAwareSoftBodySolver::solveConstraints + + + + +////////////////////////////////////// +// Kernel dispatches + + +void btDX11SIMDAwareSoftBodySolver::updatePositionsFromVelocities( float solverdt ) +{ + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + UpdatePositionsFromVelocitiesCB constBuffer; + + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.solverSDT = solverdt; + + // Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updatePositionsFromVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdatePositionsFromVelocitiesCB) ); + m_dx11Context->Unmap( updatePositionsFromVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updatePositionsFromVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( updatePositionsFromVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::updatePositionsFromVelocities + + +void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti ) +{ + + + m_vertexData.moveToAccelerator(); + m_linkData.moveToAccelerator(); + + // Copy kernel parameters to GPU + SolvePositionsFromLinksKernelCB constBuffer; + + // Set the first wave of the batch and the number of waves + constBuffer.startWave = startWave; + constBuffer.numWaves = numWaves; + + constBuffer.kst = kst; + constBuffer.ti = ti; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) ); + m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) ); + m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 ); + + int numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER ); + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } +} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition + + +void btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) +{ + // Copy kernel parameters to GPU + UpdateVelocitiesFromPositionsWithVelocitiesCB constBuffer; + + // Set the first link of the batch + // and the batch size + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.isolverdt = isolverdt; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) ); + m_dx11Context->Unmap( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothVelocityCorrectionCoefficient.getSRV()) ); + m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothDampingFactor.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + + // Execute the kernel + m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + +} // btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithVelocities + +void btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) +{ + // Copy kernel parameters to GPU + UpdateVelocitiesFromPositionsWithoutVelocitiesCB constBuffer; + + // Set the first link of the batch + // and the batch size + constBuffer.numNodes = m_vertexData.getNumVertices(); + constBuffer.isolverdt = isolverdt; + + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) ); + m_dx11Context->Unmap( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) ); + m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) ); + + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL ); + + + // Execute the kernel + m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks , 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + +} // btDX11SIMDAwareSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities + +// End kernel dispatches +///////////////////////////////////// + + + + + + + + + +btDX11SIMDAwareSoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SIMDAwareSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +{ + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + if( softBodyInterface->getSoftBody() == softBody ) + return softBodyInterface; + } + return 0; +} + +void btDX11SIMDAwareSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ) +{ + checkInitialized(); + + btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); + + if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) + { + // If we're doing a CPU-buffer copy must copy the data back to the host first + m_vertexData.m_dx11VertexPosition.copyFromGPU(); + m_vertexData.m_dx11VertexNormal.copyFromGPU(); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); + const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); + float *basePointer = cpuVertexBuffer->getBasePointer(); + + if( vertexBuffer->hasVertexPositions() ) + { + const int vertexOffset = cpuVertexBuffer->getVertexOffset(); + const int vertexStride = cpuVertexBuffer->getVertexStride(); + float *vertexPointer = basePointer + vertexOffset; + + for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) + { + Vectormath::Aos::Point3 position = m_vertexData.getPosition(vertexIndex); + *(vertexPointer + 0) = position.getX(); + *(vertexPointer + 1) = position.getY(); + *(vertexPointer + 2) = position.getZ(); + vertexPointer += vertexStride; + } + } + if( vertexBuffer->hasNormals() ) + { + const int normalOffset = cpuVertexBuffer->getNormalOffset(); + const int normalStride = cpuVertexBuffer->getNormalStride(); + float *normalPointer = basePointer + normalOffset; + + for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) + { + Vectormath::Aos::Vector3 normal = m_vertexData.getNormal(vertexIndex); + *(normalPointer + 0) = normal.getX(); + *(normalPointer + 1) = normal.getY(); + *(normalPointer + 2) = normal.getZ(); + normalPointer += normalStride; + } + } + } else if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::DX11_BUFFER ) + { + // Do a DX11 copy shader DX to DX copy + + const btDX11VertexBufferDescriptor *dx11VertexBuffer = static_cast< btDX11VertexBufferDescriptor* >(vertexBuffer); + + // No need to batch link solver, it is entirely parallel + // Copy kernel parameters to GPU + OutputToVertexArrayCB constBuffer; + ID3D11ComputeShader* outputToVertexArrayShader = outputToVertexArrayWithoutNormalsKernel.kernel; + ID3D11Buffer* outputToVertexArrayConstBuffer = outputToVertexArrayWithoutNormalsKernel.constBuffer; + + constBuffer.startNode = firstVertex; + constBuffer.numNodes = currentCloth->getNumVertices(); + constBuffer.positionOffset = vertexBuffer->getVertexOffset(); + constBuffer.positionStride = vertexBuffer->getVertexStride(); + if( vertexBuffer->hasNormals() ) + { + constBuffer.normalOffset = vertexBuffer->getNormalOffset(); + constBuffer.normalStride = vertexBuffer->getNormalStride(); + outputToVertexArrayShader = outputToVertexArrayWithNormalsKernel.kernel; + outputToVertexArrayConstBuffer = outputToVertexArrayWithNormalsKernel.constBuffer; + } + + // TODO: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup + D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; + m_dx11Context->Map( outputToVertexArrayConstBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource ); + memcpy( MappedResource.pData, &constBuffer, sizeof(OutputToVertexArrayCB) ); + m_dx11Context->Unmap( outputToVertexArrayConstBuffer, 0 ); + m_dx11Context->CSSetConstantBuffers( 0, 1, &outputToVertexArrayConstBuffer ); + + // Set resources and dispatch + m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) ); + m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) ); + + ID3D11UnorderedAccessView* dx11UAV = dx11VertexBuffer->getDX11UAV(); + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(dx11UAV), NULL ); + + // Execute the kernel + m_dx11Context->CSSetShader( outputToVertexArrayShader, NULL, 0 ); + + int numBlocks = (constBuffer.numNodes + (128-1)) / 128; + m_dx11Context->Dispatch(numBlocks, 1, 1 ); + + { + // Tidy up + ID3D11ShaderResourceView* pViewNULL = NULL; + m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL ); + m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL ); + + ID3D11UnorderedAccessView* pUAViewNULL = NULL; + m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL ); + + ID3D11Buffer *pBufferNull = NULL; + m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull ); + } + } + +} // btDX11SoftBodySolver::outputToVertexBuffers + + + + + +btDX11SIMDAwareSoftBodySolver::KernelDesc btDX11SIMDAwareSoftBodySolver::compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros ) +{ + const char *cs5String = "cs_5_0"; + + HRESULT hr = S_OK; + ID3DBlob* pErrorBlob = NULL; + ID3DBlob* pBlob = NULL; + ID3D11ComputeShader* kernelPointer = 0; + + hr = D3DX11CompileFromMemory( + shaderString, + strlen(shaderString), + shaderName, + compileMacros, + NULL, + shaderName, + cs5String, + D3D10_SHADER_ENABLE_STRICTNESS, + NULL, + NULL, + &pBlob, + &pErrorBlob, + NULL + ); + + if( FAILED(hr) ) + { + if( pErrorBlob ) { + btAssert( "Compilation of compute shader failed\n" ); + char *debugString = (char*)pErrorBlob->GetBufferPointer(); + OutputDebugStringA( debugString ); + } + + SAFE_RELEASE( pErrorBlob ); + SAFE_RELEASE( pBlob ); + + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + + // Create the Compute Shader + hr = m_dx11Device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, &kernelPointer ); + if( FAILED( hr ) ) + { + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + + ID3D11Buffer* constBuffer = 0; + if( constBufferSize > 0 ) + { + // Create the constant buffer + D3D11_BUFFER_DESC constant_buffer_desc; + ZeroMemory(&constant_buffer_desc, sizeof(constant_buffer_desc)); + constant_buffer_desc.ByteWidth = constBufferSize; + constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + m_dx11Device->CreateBuffer(&constant_buffer_desc, NULL, &constBuffer); + if( FAILED( hr ) ) + { + KernelDesc descriptor; + descriptor.kernel = 0; + descriptor.constBuffer = 0; + return descriptor; + } + } + + SAFE_RELEASE( pErrorBlob ); + SAFE_RELEASE( pBlob ); + + btDX11SIMDAwareSoftBodySolver::KernelDesc descriptor; + descriptor.kernel = kernelPointer; + descriptor.constBuffer = constBuffer; + return descriptor; +} // compileComputeShader + + +bool btDX11SIMDAwareSoftBodySolver::buildShaders() +{ + // Ensure current kernels are released first + releaseKernels(); + + bool returnVal = true; + + + if( m_shadersInitialized ) + return true; + + + updatePositionsFromVelocitiesKernel = compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) ); + if( !updatePositionsFromVelocitiesKernel.constBuffer ) + returnVal = false; + + char maxVerticesPerWavefront[20]; + char maxBatchesPerWavefront[20]; + char waveFrontSize[20]; + char waveFrontBlockMultiplier[20]; + char blockSize[20]; + + sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront()); + sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront()); + sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize()); + sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER); + sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize()); + + D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 }; + + solvePositionsFromLinksKernel = compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros ); + if( !solvePositionsFromLinksKernel.constBuffer ) + returnVal = false; + + updateVelocitiesFromPositionsWithVelocitiesKernel = compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) ); + if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer ) + returnVal = false; + updateVelocitiesFromPositionsWithoutVelocitiesKernel = compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB)); + if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer ) + returnVal = false; + integrateKernel = compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) ); + if( !integrateKernel.constBuffer ) + returnVal = false; + applyForcesKernel = compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) ); + if( !applyForcesKernel.constBuffer ) + returnVal = false; + + // TODO: Rename to UpdateSoftBodies + resetNormalsAndAreasKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) ); + if( !resetNormalsAndAreasKernel.constBuffer ) + returnVal = false; + normalizeNormalsAndAreasKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) ); + if( !normalizeNormalsAndAreasKernel.constBuffer ) + returnVal = false; + updateSoftBodiesKernel = compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) ); + if( !updateSoftBodiesKernel.constBuffer ) + returnVal = false; + outputToVertexArrayWithNormalsKernel = compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithNormalsKernel", sizeof(OutputToVertexArrayCB) ); + if( !outputToVertexArrayWithNormalsKernel.constBuffer ) + returnVal = false; + outputToVertexArrayWithoutNormalsKernel = compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithoutNormalsKernel", sizeof(OutputToVertexArrayCB) ); + if( !outputToVertexArrayWithoutNormalsKernel.constBuffer ) + returnVal = false; + + + if( returnVal ) + m_shadersInitialized = true; + + return returnVal; +} + + + +void btDX11SIMDAwareSoftBodySolver::predictMotion( float timeStep ) +{ + // Fill the force arrays with current acceleration data etc + m_perClothWindVelocity.resize( m_softBodySet.size() ); + for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) + { + btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); + + m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); + } + m_dx11PerClothWindVelocity.changedOnCPU(); + + // Apply forces that we know about to the cloths + applyForces( timeStep * getTimeScale() ); + + // Itegrate motion for all soft bodies dealt with by the solver + integrate( timeStep * getTimeScale() ); + // End prediction work for solvers +} + + + + + + + + + + + + + + + + + + + +static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray > &wavefrontBatches ) +{ + // A per-batch map of truth values stating whether a given vertex is in that batch + // This allows us to significantly optimize the batching + btAlignedObjectArray > mapOfVerticesInBatches; + + for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex ) + { + btAlignedObjectArray &wavefront( linksForWavefronts[waveIndex] ); + + int batch = 0; + bool placed = false; + while( batch < wavefrontBatches.size() && !placed ) + { + // Test the current batch, see if this wave shares any vertex with the waves in the batch + bool foundSharedVertex = false; + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] ) + { + foundSharedVertex = true; + } + } + + if( !foundSharedVertex ) + { + wavefrontBatches[batch].push_back( waveIndex ); + // Insert vertices into this batch too + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + (mapOfVerticesInBatches[batch])[vertices.vertex0] = true; + (mapOfVerticesInBatches[batch])[vertices.vertex1] = true; + } + placed = true; + } + batch++; + } + if( batch == wavefrontBatches.size() && !placed ) + { + wavefrontBatches.resize( batch + 1 ); + wavefrontBatches[batch].push_back( waveIndex ); + + // And resize map as well + mapOfVerticesInBatches.resize( batch + 1 ); + + // Resize maps with total number of vertices + mapOfVerticesInBatches[batch].resize( numVertices, false ); + + // Insert vertices into this batch too + for( int link = 0; link < wavefront.size(); ++link ) + { + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] ); + (mapOfVerticesInBatches[batch])[vertices.vertex0] = true; + (mapOfVerticesInBatches[batch])[vertices.vertex1] = true; + } + } + } + mapOfVerticesInBatches.clear(); +} + +// Function to remove an object from a vector maintaining correct ordering of the vector +template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove ) +{ + int currentSize = vectorToUpdate.size(); + for( int i = indexToRemove; i < (currentSize-1); ++i ) + { + vectorToUpdate[i] = vectorToUpdate[i+1]; + } + if( currentSize > 0 ) + vectorToUpdate.resize( currentSize - 1 ); +} + +/** + * Insert element into vectorToUpdate at index index. + */ +template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element ) +{ + vectorToUpdate.resize( vectorToUpdate.size() + 1 ); + for( int i = (vectorToUpdate.size() - 1); i > index; --i ) + { + vectorToUpdate[i] = vectorToUpdate[i-1]; + } + vectorToUpdate[index] = element; +} + +/** + * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness. + * ie it treats vectorToUpdate as an ordered set. + */ +template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray &vectorToUpdate, T element ) +{ + int index = 0; + while( index < vectorToUpdate.size() && vectorToUpdate[index] < element ) + { + index++; + } + if( index == vectorToUpdate.size() || vectorToUpdate[index] != element ) + insertAtIndex( vectorToUpdate, index, element ); +} + +// Experimental batch generation that we could use in the simulations +// Attempts to generate larger batches that work on a per-wavefront basis +void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray &numLinksPerVertex, int &maxLinks ) +{ + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + numLinksPerVertex[nodes.vertex0]++; + numLinksPerVertex[nodes.vertex1]++; + } + int maxLinksPerVertex = 0; + for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex ) + { + maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex); + } + maxLinks = maxLinksPerVertex; + + btAlignedObjectArray< int > linksFoundPerVertex; + linksFoundPerVertex.resize( numVertices, 0 ); + + listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices ); + + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + { + // Do vertex 0 + int vertexIndex = nodes.vertex0; + int linkForVertex = linksFoundPerVertex[nodes.vertex0]; + int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex; + + listOfLinksPerVertex[linkAddress] = linkIndex; + + linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1; + } + { + // Do vertex 1 + int vertexIndex = nodes.vertex1; + int linkForVertex = linksFoundPerVertex[nodes.vertex1]; + int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex; + + listOfLinksPerVertex[linkAddress] = linkIndex; + + linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1; + } + } +} + +static void computeBatchingIntoWavefronts( + btSoftBodyLinkData &linkData, + int wavefrontSize, + int linksPerWorkItem, + int maxLinksPerWavefront, + btAlignedObjectArray < btAlignedObjectArray > &linksForWavefronts, + btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray > > &batchesWithinWaves, /* wave, batch, links in batch */ + btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */ + ) +{ + + + // Attempt generation of larger batches of links. + btAlignedObjectArray< bool > processedLink; + processedLink.resize( linkData.getNumLinks() ); + btAlignedObjectArray< int > listOfLinksPerVertex; + int maxLinksPerVertex = 0; + + // Count num vertices + int numVertices = 0; + for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex ) + { + btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) ); + numVertices = btMax( numVertices, nodes.vertex0 + 1 ); + numVertices = btMax( numVertices, nodes.vertex1 + 1 ); + } + + // Need list of links per vertex + // Compute valence of each vertex + btAlignedObjectArray numLinksPerVertex; + numLinksPerVertex.resize(0); + numLinksPerVertex.resize( numVertices, 0 ); + + generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex ); + + for( int vertex = 0; vertex < 10; ++vertex ) + { + for( int link = 0; link < numLinksPerVertex[vertex]; ++link ) + { + int linkAddress = vertex * maxLinksPerVertex + link; + } + } + + + // At this point we know what links we have for each vertex so we can start batching + + // We want a vertex to start with, let's go with 0 + int currentVertex = 0; + int linksProcessed = 0; + + btAlignedObjectArray verticesToProcess; + + while( linksProcessed < linkData.getNumLinks() ) + { + // Next wavefront + int nextWavefront = linksForWavefronts.size(); + linksForWavefronts.resize( nextWavefront + 1 ); + btAlignedObjectArray &linksForWavefront(linksForWavefronts[nextWavefront]); + verticesForWavefronts.resize( nextWavefront + 1 ); + btAlignedObjectArray &vertexSet( verticesForWavefronts[nextWavefront] ); + + linksForWavefront.resize(0); + + // Loop to find enough links to fill the wavefront + // Stopping if we either run out of links, or fill it + while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront ) + { + // Go through the links for the current vertex + for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link ) + { + int linkAddress = currentVertex * maxLinksPerVertex + link; + int linkIndex = listOfLinksPerVertex[linkAddress]; + + // If we have not already processed this link, add it to the wavefront + // Claim it as another processed link + // Add the vertex at the far end to the list of vertices to process. + if( !processedLink[linkIndex] ) + { + linksForWavefront.push_back( linkIndex ); + linksProcessed++; + processedLink[linkIndex] = true; + int v0 = linkData.getVertexPair(linkIndex).vertex0; + int v1 = linkData.getVertexPair(linkIndex).vertex1; + if( v0 == currentVertex ) + verticesToProcess.push_back( v1 ); + else + verticesToProcess.push_back( v0 ); + } + } + if( verticesToProcess.size() > 0 ) + { + // Get the element on the front of the queue and remove it + currentVertex = verticesToProcess[0]; + removeFromVector( verticesToProcess, 0 ); + } else { + // If we've not yet processed all the links, find the first unprocessed one + // and select one of its vertices as the current vertex + if( linksProcessed < linkData.getNumLinks() ) + { + int searchLink = 0; + while( processedLink[searchLink] ) + searchLink++; + currentVertex = linkData.getVertexPair(searchLink).vertex0; + } + } + } + + // We have either finished or filled a wavefront + for( int link = 0; link < linksForWavefront.size(); ++link ) + { + int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0; + int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1; + insertUniqueAndOrderedIntoVector( vertexSet, v0 ); + insertUniqueAndOrderedIntoVector( vertexSet, v1 ); + } + // Iterate over links mapped to the wave and batch those + // We can run a batch on each cycle trivially + + batchesWithinWaves.resize( batchesWithinWaves.size() + 1 ); + btAlignedObjectArray < btAlignedObjectArray > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] ); + + + for( int link = 0; link < linksForWavefront.size(); ++link ) + { + int linkIndex = linksForWavefront[link]; + btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex ); + + int batch = 0; + bool placed = false; + while( batch < batchesWithinWave.size() && !placed ) + { + bool foundSharedVertex = false; + if( batchesWithinWave[batch].size() >= wavefrontSize ) + { + // If we have already filled this batch, move on to another + foundSharedVertex = true; + } else { + for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 ) + { + btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] ); + + if( vertices.vertex0 == vertices2.vertex0 || + vertices.vertex1 == vertices2.vertex0 || + vertices.vertex0 == vertices2.vertex1 || + vertices.vertex1 == vertices2.vertex1 ) + { + foundSharedVertex = true; + break; + } + } + } + if( !foundSharedVertex ) + { + batchesWithinWave[batch].push_back( linkIndex ); + placed = true; + } else { + ++batch; + } + } + if( batch == batchesWithinWave.size() && !placed ) + { + batchesWithinWave.resize( batch + 1 ); + batchesWithinWave[batch].push_back( linkIndex ); + } + } + + } + +} + +void btSoftBodyLinkDataDX11SIMDAware::generateBatches() +{ + btAlignedObjectArray < btAlignedObjectArray > linksForWavefronts; + btAlignedObjectArray < btAlignedObjectArray > wavefrontBatches; + btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray > > batchesWithinWaves; + btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set + + // Group the links into wavefronts + computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts ); + + + // Batch the wavefronts + generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches ); + + m_numWavefronts = linksForWavefronts.size(); + + // At this point we have a description of which links we need to process in each wavefront + + // First correctly fill the batch ranges vector + int numBatches = wavefrontBatches.size(); + m_wavefrontBatchStartLengths.resize(0); + int prefixSum = 0; + for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex ) + { + int wavesInBatch = wavefrontBatches[batchIndex].size(); + int nextPrefixSum = prefixSum + wavesInBatch; + m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) ); + + prefixSum += wavesInBatch; + } + + // Also find max number of batches within a wave + m_maxBatchesWithinWave = 0; + m_maxVerticesWithinWave = 0; + m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts ); + for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex ) + { + // See if the number of batches in this wave is greater than the current maxium + int batchesInCurrentWave = batchesWithinWaves[waveIndex].size(); + int verticesInCurrentWave = verticesForWavefronts[waveIndex].size(); + m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave ); + m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave ); + } + + // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around + m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2); + + // Now we know the maximum number of vertices per-wave we can resize the global vertices array + m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts ); + + // Grab backup copies of all the link data arrays for the sorting process + btAlignedObjectArray m_links_Backup(m_links); + btAlignedObjectArray m_linkStrength_Backup(m_linkStrength); + btAlignedObjectArray m_linksMassLSC_Backup(m_linksMassLSC); + btAlignedObjectArray m_linksRestLengthSquared_Backup(m_linksRestLengthSquared); + //btAlignedObjectArray m_linksCLength_Backup(m_linksCLength); + //btAlignedObjectArray m_linksLengthRatio_Backup(m_linksLengthRatio); + btAlignedObjectArray m_linksRestLength_Backup(m_linksRestLength); + btAlignedObjectArray m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient); + + // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses. + m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); + + // Then re-order links into wavefront blocks + + // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts. + int wavefrontCount = 0; + + // Iterate over batches of wavefronts, then wavefronts in the batch + for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex ) + { + btAlignedObjectArray &batch( wavefrontBatches[batchIndex] ); + int wavefrontsInBatch = batch.size(); + + + for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex ) + { + + int originalWavefrontIndex = batch[wavefrontIndex]; + btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] ); + int verticesUsedByWavefront = wavefrontVertices.size(); + + // Copy the set of vertices into the correctly structured array for use on the device + // Fill the non-vertices with -1s + // so we can mask out those reads + for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex ) + { + m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex]; + } + for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex ) + { + m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1; + } + + // Obtain the set of batches within the current wavefront + btAlignedObjectArray < btAlignedObjectArray > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] ); + // Set the size of the batches for use in the solver, correctly ordered + NumBatchesVerticesPair batchesAndVertices; + batchesAndVertices.numBatches = batchesWithinWavefront.size(); + batchesAndVertices.numVertices = verticesUsedByWavefront; + m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices; + + + // Now iterate over batches within the wavefront to structure the links correctly + for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch ) + { + btAlignedObjectArray &linksInBatch( batchesWithinWavefront[wavefrontBatch] ); + int wavefrontBatchSize = linksInBatch.size(); + + int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch; + + for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex ) + { + int originalLinkAddress = linksInBatch[linkIndex]; + // Reorder simple arrays trivially + m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress]; + m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress]; + m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress]; + m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress]; + m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress]; + m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress]; + + // The local address is more complicated. We need to work out where a given vertex will end up + // by searching the set of vertices for this link and using the index as the local address + btSoftBodyLinkData::LinkNodePair localPair; + btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex]; + localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 ); + localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 ); + m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair; + } + for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex ) + { + // Put 0s into these arrays for padding for cleanliness + m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0); + m_linkStrength[batchAddressInTarget + linkIndex] = 0.f; + m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f; + m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f; + m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f; + m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f; + + + // For local addresses of junk data choose a set of addresses just above the range of valid ones + // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses + // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work + // off the end of that range so we need no control + btSoftBodyLinkData::LinkNodePair localPair; + localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16); + localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16); + m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair; + } + + } + + + wavefrontCount++; + } + + + } + +} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches() diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h new file mode 100644 index 000000000..ceac535e2 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h @@ -0,0 +1,432 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "vectormath/vmInclude.h" +#include "BulletSoftBody/btSoftBodySolvers.h" +#include "btSoftBodySolverVertexBuffer_DX11.h" +#include "btSoftBodySolverLinkData_DX11SIMDAware.h" +#include "btSoftBodySolverVertexData_DX11.h" +#include "btSoftBodySolverTriangleData_DX11.h" + + +#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H +#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H + +class btDX11SIMDAwareSoftBodySolver : public btSoftBodySolver +{ +public: + + /** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ + class btAcceleratedSoftBodyInterface + { + protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + + public: + btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + + }; + + + class KernelDesc + { + protected: + + + public: + ID3D11ComputeShader* kernel; + ID3D11Buffer* constBuffer; + + KernelDesc() + { + kernel = 0; + constBuffer = 0; + } + + virtual ~KernelDesc() + { + // TODO: this should probably destroy its kernel but we need to be careful + // in case KernelDescs are copied + } + }; + + struct SolvePositionsFromLinksKernelCB + { + int startWave; + int numWaves; + float kst; + float ti; + }; + + struct IntegrateCB + { + int numNodes; + float solverdt; + int padding1; + int padding2; + }; + + struct UpdatePositionsFromVelocitiesCB + { + int numNodes; + float solverSDT; + int padding1; + int padding2; + }; + + struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB + { + int numNodes; + float isolverdt; + int padding1; + int padding2; + }; + + struct UpdateVelocitiesFromPositionsWithVelocitiesCB + { + int numNodes; + float isolverdt; + int padding1; + int padding2; + }; + + struct UpdateSoftBodiesCB + { + int numNodes; + int startFace; + int numFaces; + float epsilon; + }; + + + struct OutputToVertexArrayCB + { + int startNode; + int numNodes; + int positionOffset; + int positionStride; + + int normalOffset; + int normalStride; + int padding1; + int padding2; + }; + + + struct ApplyForcesCB + { + unsigned int numNodes; + float solverdt; + float epsilon; + int padding3; + }; + + struct AddVelocityCB + { + int startNode; + int lastNode; + float velocityX; + float velocityY; + float velocityZ; + int padding1; + int padding2; + int padding3; + }; + + +private: + ID3D11Device * m_dx11Device; + ID3D11DeviceContext* m_dx11Context; + + + /** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */ + btSoftBodyLinkDataDX11SIMDAware m_linkData; + btSoftBodyVertexDataDX11 m_vertexData; + btSoftBodyTriangleDataDX11 m_triangleData; + + /** Variable to define whether we need to update solver constants on the next iteration */ + bool m_updateSolverConstants; + + bool m_shadersInitialized; + + /** + * Cloths owned by this solver. + * Only our cloths are in this array. + */ + btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + + /** Acceleration value to be applied to all non-static vertices in the solver. + * Index n is cloth n, array sized by number of cloths in the world not the solver. + */ + btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration; + btDX11Buffer m_dx11PerClothAcceleration; + + /** Wind velocity to be applied normal to all non-static vertices in the solver. + * Index n is cloth n, array sized by number of cloths in the world not the solver. + */ + btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity; + btDX11Buffer m_dx11PerClothWindVelocity; + + /** Velocity damping factor */ + btAlignedObjectArray< float > m_perClothDampingFactor; + btDX11Buffer m_dx11PerClothDampingFactor; + + /** Velocity correction coefficient */ + btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient; + btDX11Buffer m_dx11PerClothVelocityCorrectionCoefficient; + + /** Lift parameter for wind effect on cloth. */ + btAlignedObjectArray< float > m_perClothLiftFactor; + btDX11Buffer m_dx11PerClothLiftFactor; + + /** Drag parameter for wind effect on cloth. */ + btAlignedObjectArray< float > m_perClothDragFactor; + btDX11Buffer m_dx11PerClothDragFactor; + + /** Density of the medium in which each cloth sits */ + btAlignedObjectArray< float > m_perClothMediumDensity; + btDX11Buffer m_dx11PerClothMediumDensity; + + KernelDesc solvePositionsFromLinksKernel; + KernelDesc integrateKernel; + KernelDesc addVelocityKernel; + KernelDesc updatePositionsFromVelocitiesKernel; + KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel; + KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel; + KernelDesc resetNormalsAndAreasKernel; + KernelDesc normalizeNormalsAndAreasKernel; + KernelDesc updateSoftBodiesKernel; + KernelDesc outputToVertexArrayWithNormalsKernel; + KernelDesc outputToVertexArrayWithoutNormalsKernel; + + KernelDesc outputToVertexArrayKernel; + KernelDesc applyForcesKernel; + KernelDesc collideSphereKernel; + KernelDesc collideCylinderKernel; + + + + /** + * Integrate motion on the solver. + */ + virtual void integrate( float solverdt ); + float computeTriangleArea( + const Vectormath::Aos::Point3 &vertex0, + const Vectormath::Aos::Point3 &vertex1, + const Vectormath::Aos::Point3 &vertex2 ); + + + /** + * Compile a compute shader kernel from a string and return the appropriate KernelDesc object. + */ + KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 ); + + bool buildShaders(); + + void resetNormalsAndAreas( int numVertices ); + + void normalizeNormalsAndAreas( int numVertices ); + + void executeUpdateSoftBodies( int firstTriangle, int numTriangles ); + + Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ); + + void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); + + virtual void applyForces( float solverdt ); + + void updateConstants( float timeStep ); + + btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + + ////////////////////////////////////// + // Kernel dispatches + void prepareLinks(); + + void updatePositionsFromVelocities( float solverdt ); + void solveLinksForPosition( int startLink, int numLinks, float kst, float ti ); + void solveLinksForVelocity( int startLink, int numLinks, float kst ); + + void updateVelocitiesFromPositionsWithVelocities( float isolverdt ); + void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ); + + // End kernel dispatches + ///////////////////////////////////// + + void releaseKernels(); + + +public: + btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context); + + virtual ~btDX11SIMDAwareSoftBodySolver(); + + + + virtual btSoftBodyLinkData &getLinkData(); + + virtual btSoftBodyVertexData &getVertexData(); + + virtual btSoftBodyTriangleData &getTriangleData(); + + + + virtual bool checkInitialized(); + + virtual void updateSoftBodies( ); + + virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies ); + + virtual void solveConstraints( float solverdt ); + + virtual void predictMotion( float solverdt ); + + virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer ); +}; + +#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt new file mode 100644 index 000000000..d2ef78f69 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt @@ -0,0 +1,82 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + +ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) + + +IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) +ELSE() + INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include ) +ENDIF() + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt new file mode 100644 index 000000000..65bbea43a --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt @@ -0,0 +1,73 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt index 0c63b945a..36b173cf8 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt @@ -1,71 +1,16 @@ -INCLUDE_DIRECTORIES( -${BULLET_PHYSICS_SOURCE_DIR}/src -) +IF(BUILD_MINICL_OPENCL_DEMOS) + SUBDIRS( MiniCL ) +ENDIF() +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() -SET(OPENCL_DIR $ENV{ATISTREAMSDKROOT}) -SET(OPENCL_INCLUDE_PATH "${ATISTREAMSDKROOT}/include" CACHE DOCSTRING "OpenCL SDK include path") +IF(BUILD_NVIDIA_OPENCL_DEMOS) + SUBDIRS(NVidia) +ENDIF() -INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_PATH} "../cpu/") - -SET(BulletSoftBodyOpenCLSolvers_SRCS - btSoftBodySolver_OpenCL.cpp -) - -SET(BulletSoftBodyOpenCLSolvers_HDRS - btSoftBodySolver_OpenCL.h - ../cpu/btSoftBodySolverData.h - btSoftBodySolverVertexData_OpenCL.h - btSoftBodySolverTriangleData_OpenCL.h - btSoftBodySolverLinkData_OpenCL.h - btSoftBodySolverBuffer_OpenCL.h -) - -# OpenCL and HLSL Shaders. -# Build rules generated to stringify these into headers -# which are needed by some of the sources -SET(BulletSoftBodyOpenCLSolvers_Shaders -# OutputToVertexArray - UpdateNormals - Integrate - UpdatePositions - UpdateNodes - SolvePositions - UpdatePositionsFromVelocities - ApplyForces - PrepareLinks - VSolveLinks -) - -foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) - LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "OpenCLC/${f}.cl") -endforeach(f) - - - -ADD_LIBRARY(BulletSoftBodySolvers_OpenCL ${BulletSoftBodyOpenCLSolvers_SRCS} ${BulletSoftBodyOpenCLSolvers_HDRS} ${BulletSoftBodyOpenCLSolvers_OpenCLC}) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES VERSION ${BULLET_VERSION}) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES SOVERSION ${BULLET_VERSION}) -IF (BUILD_SHARED_LIBS) - TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) -ENDIF (BUILD_SHARED_LIBS) - - -IF (INSTALL_LIBS) - IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) - IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION .) - ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_OpenCL DESTINATION lib${LIB_SUFFIX}) - INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES FRAMEWORK true) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) -ENDIF (INSTALL_LIBS) +IF(APPLE) + SUBDIRS(Apple) +ENDIF() diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt new file mode 100644 index 000000000..e9f86c2c9 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt @@ -0,0 +1,75 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + +ADD_DEFINITIONS(-DUSE_MINICL) + + + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp new file mode 100644 index 000000000..79b0ac234 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp @@ -0,0 +1,40 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + +#define MSTRINGIFY(A) A +#include "../OpenCLC10/ApplyForces.cl" +#include "../OpenCLC10/Integrate.cl" +#include "../OpenCLC10/PrepareLinks.cl" +#include "../OpenCLC10/SolvePositions.cl" +#include "../OpenCLC10/UpdateNodes.cl" +#include "../OpenCLC10/UpdateNormals.cl" +#include "../OpenCLC10/UpdatePositions.cl" +#include "../OpenCLC10/UpdatePositionsFromVelocities.cl" +//#include "../OpenCLC10/VSolveLinks.cl" + +MINICL_REGISTER(PrepareLinksKernel) +MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel) +MINICL_REGISTER(SolvePositionsFromLinksKernel) +MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel) +MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel) +MINICL_REGISTER(IntegrateKernel) +MINICL_REGISTER(ApplyForcesKernel) +MINICL_REGISTER(ResetNormalsAndAreasKernel) +MINICL_REGISTER(NormalizeNormalsAndAreasKernel) +MINICL_REGISTER(UpdateSoftBodiesKernel) + + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt new file mode 100644 index 000000000..7608492b7 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt @@ -0,0 +1,79 @@ + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +) + + + +IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc ) +ELSE() + INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc ) +ENDIF() + + +SET(BulletSoftBodyOpenCLSolvers_SRCS + ../btSoftBodySolver_OpenCL.cpp +) + +SET(BulletSoftBodyOpenCLSolvers_HDRS + ../btSoftBodySolver_OpenCL.h + ../../CPU/btSoftBodySolverData.h + ../btSoftBodySolverVertexData_OpenCL.h + ../btSoftBodySolverTriangleData_OpenCL.h + ../btSoftBodySolverLinkData_OpenCL.h + ../btSoftBodySolverBuffer_OpenCL.h +) + +# OpenCL and HLSL Shaders. +# Build rules generated to stringify these into headers +# which are needed by some of the sources +SET(BulletSoftBodyOpenCLSolvers_Shaders +# OutputToVertexArray + UpdateNormals + Integrate + UpdatePositions + UpdateNodes + SolvePositions + UpdatePositionsFromVelocities + ApplyForces + PrepareLinks + VSolveLinks +) + +foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders}) + LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC/${f}.cl") +endforeach(f) + + + +ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia + ${BulletSoftBodyOpenCLSolvers_SRCS} + ${BulletSoftBodyOpenCLSolvers_HDRS} + ${BulletSoftBodyOpenCLSolvers_OpenCLC} +) + +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION}) +SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION}) +IF (BUILD_SHARED_LIBS) + TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics) +ENDIF (BUILD_SHARED_LIBS) + + +IF (INSTALL_LIBS) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .) + ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX}) + INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) + + IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true) + SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}") + ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) +ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl new file mode 100644 index 000000000..555d07a1d --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl @@ -0,0 +1,91 @@ +MSTRINGIFY( + + +float adot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +float4 projectOnAxis( float4 v, float4 a ) +{ + return (a*adot3(v, a)); +} + +__kernel void +ApplyForcesKernel( + const uint numNodes, + const float solverdt, + const float epsilon, + __global int * g_vertexClothIdentifier, + __global float4 * g_vertexNormal, + __global float * g_vertexArea, + __global float * g_vertexInverseMass, + __global float * g_clothLiftFactor, + __global float * g_clothDragFactor, + __global float4 * g_clothWindVelocity, + __global float4 * g_clothAcceleration, + __global float * g_clothMediumDensity, + __global float4 * g_vertexForceAccumulator, + __global float4 * g_vertexVelocity GUID_ARG) +{ + unsigned int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + int clothId = g_vertexClothIdentifier[nodeID]; + float nodeIM = g_vertexInverseMass[nodeID]; + + if( nodeIM > 0.0f ) + { + float4 nodeV = g_vertexVelocity[nodeID]; + float4 normal = g_vertexNormal[nodeID]; + float area = g_vertexArea[nodeID]; + float4 nodeF = g_vertexForceAccumulator[nodeID]; + + // Read per-cloth values + float4 clothAcceleration = g_clothAcceleration[clothId]; + float4 clothWindVelocity = g_clothWindVelocity[clothId]; + float liftFactor = g_clothLiftFactor[clothId]; + float dragFactor = g_clothDragFactor[clothId]; + float mediumDensity = g_clothMediumDensity[clothId]; + + // Apply the acceleration to the cloth rather than do this via a force + nodeV += (clothAcceleration*solverdt); + + g_vertexVelocity[nodeID] = nodeV; + + float4 relativeWindVelocity = nodeV - clothWindVelocity; + float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity); + + if( relativeSpeedSquared > epsilon ) + { + // Correct direction of normal relative to wind direction and get dot product + normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f); + float dvNormal = dot(normal, relativeWindVelocity); + if( dvNormal > 0 ) + { + float4 force = (float4)(0.f, 0.f, 0.f, 0.f); + float c0 = area * dvNormal * relativeSpeedSquared / 2.f; + float c1 = c0 * mediumDensity; + force += normal * (-c1 * liftFactor); + force += normalize(relativeWindVelocity)*(-c1 * dragFactor); + + float dtim = solverdt * nodeIM; + float4 forceDTIM = force * dtim; + + float4 nodeFPlusForce = nodeF + force; + + // m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim; + float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim); + + nodeF = nodeFPlusForce; + if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) + nodeF = nodeFMinus; + + g_vertexForceAccumulator[nodeID] = nodeF; + } + } + } + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl new file mode 100644 index 000000000..fb65330d9 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl @@ -0,0 +1,35 @@ +MSTRINGIFY( + +// Node indices for each link + + + +__kernel void +IntegrateKernel( + const int numNodes, + const float solverdt, + __global float * g_vertexInverseMasses, + __global float4 * g_vertexPositions, + __global float4 * g_vertexVelocity, + __global float4 * g_vertexPreviousPositions, + __global float4 * g_vertexForceAccumulator GUID_ARG) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 velocity = g_vertexVelocity[nodeID]; + float4 force = g_vertexForceAccumulator[nodeID]; + float inverseMass = g_vertexInverseMasses[nodeID]; + + g_vertexPreviousPositions[nodeID] = position; + velocity += force * inverseMass * solverdt; + position += velocity * solverdt; + + g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f); + g_vertexPositions[nodeID] = position; + g_vertexVelocity[nodeID] = velocity; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl new file mode 100644 index 000000000..ba3277667 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl @@ -0,0 +1,41 @@ +MSTRINGIFY( + +float dot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +__kernel void +PrepareLinksKernel( + const int numLinks, + __global int2 * g_linksVertexIndices, + __global float * g_linksMassLSC, + __global float4 * g_nodesPreviousPosition, + __global float * g_linksLengthRatio, + __global float4 * g_linksCurrentLength GUID_ARG) +{ + int linkID = get_global_id(0); + if( linkID < numLinks ) + { + + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0]; + float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1]; + + float massLSC = g_linksMassLSC[linkID]; + + float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0; + + float linkLengthRatio = dot3(linkCurrentLength, linkCurrentLength)*massLSC; + linkLengthRatio = 1.0f/linkLengthRatio; + + g_linksCurrentLength[linkID] = linkCurrentLength; + g_linksLengthRatio[linkID] = linkLengthRatio; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl new file mode 100644 index 000000000..fe7aec66e --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl @@ -0,0 +1,57 @@ + + + +MSTRINGIFY( + + +float mydot3(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +__kernel void +SolvePositionsFromLinksKernel( + const int startLink, + const int numLinks, + const float kst, + const float ti, + __global int2 * g_linksVertexIndices, + __global float * g_linksMassLSC, + __global float * g_linksRestLengthSquared, + __global float * g_verticesInverseMass, + __global float4 * g_vertexPositions GUID_ARG) + +{ + int linkID = get_global_id(0) + startLink; + if( get_global_id(0) < numLinks ) + { + float massLSC = g_linksMassLSC[linkID]; + float restLengthSquared = g_linksRestLengthSquared[linkID]; + + if( massLSC > 0.0f ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float4 position0 = g_vertexPositions[node0]; + float4 position1 = g_vertexPositions[node1]; + + float inverseMass0 = g_verticesInverseMass[node0]; + float inverseMass1 = g_verticesInverseMass[node1]; + + float4 del = position1 - position0; + float len = mydot3(del, del); + float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst; + position0 = position0 - del*(k*inverseMass0); + position1 = position1 + del*(k*inverseMass1); + + g_vertexPositions[node0] = position0; + g_vertexPositions[node1] = position1; + + } + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl new file mode 100644 index 000000000..488a58479 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl @@ -0,0 +1,44 @@ +MSTRINGIFY( + +/*#define float3 float4 + +float dot3(float3 a, float3 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +}*/ + +__kernel void +UpdateConstantsKernel( + const int numLinks, + __global int2 * g_linksVertexIndices, + __global float4 * g_vertexPositions, + __global float * g_vertexInverseMasses, + __global float * g_linksMaterialLSC, + __global float * g_linksMassLSC, + __global float * g_linksRestLengthSquared, + __global float * g_linksRestLengths) +{ + int linkID = get_global_id(0); + if( linkID < numLinks ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ]; + + float3 position0 = g_vertexPositions[node0].xyz; + float3 position1 = g_vertexPositions[node1].xyz; + float inverseMass0 = g_vertexInverseMasses[node0]; + float inverseMass1 = g_vertexInverseMasses[node1]; + + float3 difference = position0 - position1; + float length2 = dot(difference, difference); + float length = sqrt(length2); + + g_linksRestLengths[linkID] = length; + g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient; + g_linksRestLengthSquared[linkID] = length*length; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl new file mode 100644 index 000000000..9ad227b45 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl @@ -0,0 +1,39 @@ +MSTRINGIFY( + + +__kernel void +updateVelocitiesFromPositionsWithVelocitiesKernel( + int numNodes, + float isolverdt, + __global float4 * g_vertexPositions, + __global float4 * g_vertexPreviousPositions, + __global int * g_vertexClothIndices, + __global float *g_clothVelocityCorrectionCoefficients, + __global float * g_clothDampingFactor, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexForces GUID_ARG) +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 previousPosition = g_vertexPreviousPositions[nodeID]; + float4 velocity = g_vertexVelocities[nodeID]; + int clothIndex = g_vertexClothIndices[nodeID]; + float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex]; + float dampingFactor = g_clothDampingFactor[clothIndex]; + float velocityCoefficient = (1.f - dampingFactor); + + float4 difference = position - previousPosition; + + velocity += difference*velocityCorrectionCoefficient*isolverdt; + + // Damp the velocity + velocity *= velocityCoefficient; + + g_vertexVelocities[nodeID] = velocity; + g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl new file mode 100644 index 000000000..7bb233413 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl @@ -0,0 +1,102 @@ +MSTRINGIFY( + +float length3(float4 a) +{ + a.w = 0; + return length(a); +} + +float4 normalize3(float4 a) +{ + a.w = 0; + return normalize(a); +} + +__kernel void +ResetNormalsAndAreasKernel( + const unsigned int numNodes, + __global float4 * g_vertexNormals, + __global float * g_vertexArea GUID_ARG) +{ + if( get_global_id(0) < numNodes ) + { + g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + g_vertexArea[get_global_id(0)] = 0.0f; + } +} + + +__kernel void +UpdateSoftBodiesKernel( + const unsigned int startFace, + const unsigned int numFaces, + __global int4 * g_triangleVertexIndexSet, + __global float4 * g_vertexPositions, + __global float4 * g_vertexNormals, + __global float * g_vertexArea, + __global float4 * g_triangleNormals, + __global float * g_triangleArea GUID_ARG) +{ + int faceID = get_global_id(0) + startFace; + if( get_global_id(0) < numFaces ) + { + int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ]; + int nodeIndex0 = triangleIndexSet.x; + int nodeIndex1 = triangleIndexSet.y; + int nodeIndex2 = triangleIndexSet.z; + + float4 node0 = g_vertexPositions[nodeIndex0]; + float4 node1 = g_vertexPositions[nodeIndex1]; + float4 node2 = g_vertexPositions[nodeIndex2]; + float4 nodeNormal0 = g_vertexNormals[nodeIndex0]; + float4 nodeNormal1 = g_vertexNormals[nodeIndex1]; + float4 nodeNormal2 = g_vertexNormals[nodeIndex2]; + float vertexArea0 = g_vertexArea[nodeIndex0]; + float vertexArea1 = g_vertexArea[nodeIndex1]; + float vertexArea2 = g_vertexArea[nodeIndex2]; + + float4 vector0 = node1 - node0; + float4 vector1 = node2 - node0; + + float4 faceNormal = cross(vector0, vector1); + float triangleArea = length(faceNormal); + + nodeNormal0 = nodeNormal0 + faceNormal; + nodeNormal1 = nodeNormal1 + faceNormal; + nodeNormal2 = nodeNormal2 + faceNormal; + vertexArea0 = vertexArea0 + triangleArea; + vertexArea1 = vertexArea1 + triangleArea; + vertexArea2 = vertexArea2 + triangleArea; + + g_triangleNormals[faceID] = normalize3(faceNormal); + g_vertexNormals[nodeIndex0] = nodeNormal0; + g_vertexNormals[nodeIndex1] = nodeNormal1; + g_vertexNormals[nodeIndex2] = nodeNormal2; + g_triangleArea[faceID] = triangleArea; + g_vertexArea[nodeIndex0] = vertexArea0; + g_vertexArea[nodeIndex1] = vertexArea1; + g_vertexArea[nodeIndex2] = vertexArea2; + } +} + +__kernel void +NormalizeNormalsAndAreasKernel( + const unsigned int numNodes, + __global int * g_vertexTriangleCount, + __global float4 * g_vertexNormals, + __global float * g_vertexArea GUID_ARG) +{ + if( get_global_id(0) < numNodes ) + { + float4 normal = g_vertexNormals[get_global_id(0)]; + float area = g_vertexArea[get_global_id(0)]; + int numTriangles = g_vertexTriangleCount[get_global_id(0)]; + + float vectorLength = length3(normal); + + g_vertexNormals[get_global_id(0)] = normalize3(normal); + g_vertexArea[get_global_id(0)] = area/(float)(numTriangles); + } +} + +); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl new file mode 100644 index 000000000..3155a04e4 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl @@ -0,0 +1,34 @@ +MSTRINGIFY( + +__kernel void +updateVelocitiesFromPositionsWithoutVelocitiesKernel( + const int numNodes, + const float isolverdt, + __global float4 * g_vertexPositions, + __global float4 * g_vertexPreviousPositions, + __global int * g_vertexClothIndices, + __global float * g_clothDampingFactor, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexForces GUID_ARG) + +{ + int nodeID = get_global_id(0); + if( nodeID < numNodes ) + { + float4 position = g_vertexPositions[nodeID]; + float4 previousPosition = g_vertexPreviousPositions[nodeID]; + float4 velocity = g_vertexVelocities[nodeID]; + int clothIndex = g_vertexClothIndices[nodeID]; + float dampingFactor = g_clothDampingFactor[clothIndex]; + float velocityCoefficient = (1.f - dampingFactor); + + float4 difference = position - previousPosition; + + velocity = difference*velocityCoefficient*isolverdt; + + g_vertexVelocities[nodeID] = velocity; + g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl new file mode 100644 index 000000000..97e708bc3 --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl @@ -0,0 +1,28 @@ + +MSTRINGIFY( + + + + +__kernel void +UpdatePositionsFromVelocitiesKernel( + const int numNodes, + const float solverSDT, + __global float4 * g_vertexVelocities, + __global float4 * g_vertexPreviousPositions, + __global float4 * g_vertexCurrentPosition GUID_ARG) +{ + int vertexID = get_global_id(0); + if( vertexID < numNodes ) + { + float4 previousPosition = g_vertexPreviousPositions[vertexID]; + float4 velocity = g_vertexVelocities[vertexID]; + + float4 newPosition = previousPosition + velocity*solverSDT; + + g_vertexCurrentPosition[vertexID] = newPosition; + g_vertexPreviousPositions[vertexID] = newPosition; + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl new file mode 100644 index 000000000..a618d69cc --- /dev/null +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl @@ -0,0 +1,45 @@ +MSTRINGIFY( + +__kernel void +VSolveLinksKernel( + int startLink, + int numLinks, + float kst, + __global int2 * g_linksVertexIndices, + __global float * g_linksLengthRatio, + __global float4 * g_linksCurrentLength, + __global float * g_vertexInverseMass, + __global float4 * g_vertexVelocity GUID_ARG) +{ + int linkID = get_global_id(0) + startLink; + if( get_global_id(0) < numLinks ) + { + int2 nodeIndices = g_linksVertexIndices[linkID]; + int node0 = nodeIndices.x; + int node1 = nodeIndices.y; + + float linkLengthRatio = g_linksLengthRatio[linkID]; + float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz; + + float3 vertexVelocity0 = g_vertexVelocity[node0].xyz; + float3 vertexVelocity1 = g_vertexVelocity[node1].xyz; + + float vertexInverseMass0 = g_vertexInverseMass[node0]; + float vertexInverseMass1 = g_vertexInverseMass[node1]; + + float3 nodeDifference = vertexVelocity0 - vertexVelocity1; + float dotResult = dot(linkCurrentLength, nodeDifference); + float j = -dotResult*linkLengthRatio*kst; + + float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0); + float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1); + + vertexVelocity0 += velocityChange0; + vertexVelocity1 -= velocityChange1; + + g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f); + g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f); + } +} + +); \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h index e71ae8778..8fa58cd16 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h @@ -17,7 +17,16 @@ subject to the following restrictions: #define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H // OpenCL support -#include + +#ifdef USE_MINICL + #include "MiniCL/cl.h" +#else //USE_MINICL + #ifdef __APPLE__ + #include + #else + #include + #endif //__APPLE__ +#endif//USE_MINICL #ifndef SAFE_RELEASE #define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } @@ -25,22 +34,25 @@ subject to the following restrictions: template class btOpenCLBuffer { -protected: - cl::CommandQueue m_queue; - btAlignedObjectArray< ElementType > * m_CPUBuffer; - cl::Buffer m_buffer; +public: + cl_command_queue m_cqCommandQue; + cl_context m_clContext; + cl_mem m_buffer; + + + + btAlignedObjectArray< ElementType > * m_CPUBuffer; + int m_gpuSize; bool m_onGPU; - bool m_readOnlyOnGPU; - bool m_allocated; - // TODO: Remove this once C++ bindings are fixed - cl::Context context; - bool createBuffer( cl::Buffer *preexistingBuffer = 0) + + bool createBuffer( cl_mem* preexistingBuffer = 0) { + cl_int err; @@ -49,12 +61,11 @@ protected: m_buffer = *preexistingBuffer; } else { - m_buffer = cl::Buffer( - context, - m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE, - m_CPUBuffer->size() * sizeof(ElementType), - 0, - &err); + + cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE; + + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err); if( err != CL_SUCCESS ) { btAssert( "Buffer::Buffer(m_buffer)"); @@ -62,35 +73,31 @@ protected: } m_gpuSize = m_CPUBuffer->size(); + return true; } public: - btOpenCLBuffer( - cl::CommandQueue queue, - btAlignedObjectArray< ElementType > *CPUBuffer, - bool readOnly) : - m_queue(queue), + btOpenCLBuffer( cl_command_queue commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly) + :m_cqCommandQue(commandQue), + m_clContext(ctx), m_CPUBuffer(CPUBuffer), m_gpuSize(0), m_onGPU(false), m_readOnlyOnGPU(readOnly), m_allocated(false) { - context = m_queue.getInfo(); } ~btOpenCLBuffer() { } - cl::Buffer getBuffer() - { - return m_buffer; - } bool moveToGPU() { + + cl_int err; if( (m_CPUBuffer->size() != m_gpuSize) ) @@ -107,12 +114,12 @@ public: m_allocated = true; } - err = m_queue.enqueueWriteBuffer( - m_buffer, + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer, CL_FALSE, 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" ); @@ -122,20 +129,23 @@ public: } return true; + } bool moveFromGPU() { + cl_int err; if (m_CPUBuffer->size() > 0) { if (m_onGPU && !m_readOnlyOnGPU) { - err = m_queue.enqueueReadBuffer( + size_t size = m_CPUBuffer->size() * sizeof(ElementType); + err = clEnqueueReadBuffer(m_cqCommandQue, m_buffer, CL_TRUE, 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { @@ -151,16 +161,17 @@ public: bool copyFromGPU() { + cl_int err; + size_t size = m_CPUBuffer->size() * sizeof(ElementType); if (m_CPUBuffer->size() > 0) { if (m_onGPU && !m_readOnlyOnGPU) { - err = m_queue.enqueueReadBuffer( + err = clEnqueueReadBuffer(m_cqCommandQue, m_buffer, CL_TRUE, - 0, - m_CPUBuffer->size() * sizeof(ElementType), - &((*m_CPUBuffer)[0])); + 0,size, + &((*m_CPUBuffer)[0]),0,0,0); if( err != CL_SUCCESS ) { diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h index 6c270c5b5..cef924f6f 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h @@ -13,8 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H @@ -25,7 +25,9 @@ class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData { public: bool m_onGPU; - cl::CommandQueue m_queue; + + cl_command_queue m_cqCommandQue; + btOpenCLBuffer m_clLinks; btOpenCLBuffer m_clLinkStrength; @@ -36,6 +38,24 @@ public: btOpenCLBuffer m_clLinksRestLength; btOpenCLBuffer m_clLinksMaterialLinearStiffnessCoefficient; + struct BatchPair + { + int start; + int length; + + BatchPair() : + start(0), + length(0) + { + } + + BatchPair( int s, int l ) : + start( s ), + length( l ) + { + } + }; + /** * Link addressing information for each cloth. * Allows link locations to be computed independently of data batching. @@ -45,9 +65,9 @@ public: /** * Start and length values for computation batches over link data. */ - btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; + btAlignedObjectArray< BatchPair > m_batchStartLengths; - btSoftBodyLinkDataOpenCL(cl::CommandQueue queue); + btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx); virtual ~btSoftBodyLinkDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h index 4bc9215ea..e1094e38a 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h @@ -14,8 +14,8 @@ subject to the following restrictions: */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H @@ -26,7 +26,7 @@ class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData { public: bool m_onGPU; - cl::CommandQueue m_queue; + cl_command_queue m_queue; btOpenCLBuffer m_clVertexIndices; btOpenCLBuffer m_clArea; @@ -41,10 +41,20 @@ public: /** * Start and length values for computation batches over link data. */ - btAlignedObjectArray< std::pair< int, int > > m_batchStartLengths; + struct btSomePair + { + btSomePair() {} + btSomePair(int f,int s) + :first(f),second(s) + { + } + int first; + int second; + }; + btAlignedObjectArray< btSomePair > m_batchStartLengths; public: - btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ); + btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx ); virtual ~btSoftBodyTriangleDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h index 8f65c9de4..24997e726 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h @@ -13,8 +13,8 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletSoftBody/Solvers/CPU/btSoftBodySolverData.h" -#include "BulletSoftBody/Solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H #define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H @@ -24,7 +24,7 @@ class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData { protected: bool m_onGPU; - cl::CommandQueue m_queue; + cl_command_queue m_queue; public: btOpenCLBuffer m_clClothIdentifier; @@ -37,7 +37,7 @@ public: btOpenCLBuffer m_clVertexArea; btOpenCLBuffer m_clVertexTriangleCount; public: - btSoftBodyVertexDataOpenCL( cl::CommandQueue queue); + btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx); virtual ~btSoftBodyVertexDataOpenCL(); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp index 31b52f679..8198a12d3 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp @@ -16,10 +16,18 @@ subject to the following restrictions: #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" #include "vectormath/vmInclude.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolver_OpenCL.h" -#include "BulletSoftBody/VertexBuffers/btSoftBodySolverVertexBuffer.h" +#include //@todo: remove the debugging printf at some stage +#include "btSoftBodySolver_OpenCL.h" +#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" #include "BulletSoftBody/btSoftBody.h" + static const size_t workGroupSize = 128; + + +//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it + +#if (0)//CL_VERSION_1_1 == 1) + //OpenCL 1.1 kernels use float3 #define MSTRINGIFY(A) #A static char* PrepareLinksCLString = #include "OpenCLC/PrepareLinks.cl" @@ -41,19 +49,43 @@ static char* UpdateNormalsCLString = #include "OpenCLC/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC/VSolveLinks.cl" +#else +////OpenCL 1.0 kernels don't use float3 +#define MSTRINGIFY(A) #A +static char* PrepareLinksCLString = +#include "OpenCLC10/PrepareLinks.cl" +static char* UpdatePositionsFromVelocitiesCLString = +#include "OpenCLC10/UpdatePositionsFromVelocities.cl" +static char* SolvePositionsCLString = +#include "OpenCLC10/SolvePositions.cl" +static char* UpdateNodesCLString = +#include "OpenCLC10/UpdateNodes.cl" +static char* UpdatePositionsCLString = +#include "OpenCLC10/UpdatePositions.cl" +static char* UpdateConstantsCLString = +#include "OpenCLC10/UpdateConstants.cl" +static char* IntegrateCLString = +#include "OpenCLC10/Integrate.cl" +static char* ApplyForcesCLString = +#include "OpenCLC10/ApplyForces.cl" +static char* UpdateNormalsCLString = +#include "OpenCLC10/UpdateNormals.cl" +static char* VSolveLinksCLString = +#include "OpenCLC10/VSolveLinks.cl" +#endif //CL_VERSION_1_1 -btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl::CommandQueue queue) : +btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) : m_queue(queue), - m_clClothIdentifier( queue, &m_clothIdentifier, false ), - m_clVertexPosition( queue, &m_vertexPosition, false ), - m_clVertexPreviousPosition( queue, &m_vertexPreviousPosition, false ), - m_clVertexVelocity( queue, &m_vertexVelocity, false ), - m_clVertexForceAccumulator( queue, &m_vertexForceAccumulator, false ), - m_clVertexNormal( queue, &m_vertexNormal, false ), - m_clVertexInverseMass( queue, &m_vertexInverseMass, false ), - m_clVertexArea( queue, &m_vertexArea, false ), - m_clVertexTriangleCount( queue, &m_vertexTriangleCount, false ) + m_clClothIdentifier( queue, ctx, &m_clothIdentifier, false ), + m_clVertexPosition( queue, ctx, &m_vertexPosition, false ), + m_clVertexPreviousPosition( queue, ctx, &m_vertexPreviousPosition, false ), + m_clVertexVelocity( queue, ctx, &m_vertexVelocity, false ), + m_clVertexForceAccumulator( queue, ctx, &m_vertexForceAccumulator, false ), + m_clVertexNormal( queue, ctx, &m_vertexNormal, false ), + m_clVertexInverseMass( queue, ctx, &m_vertexInverseMass, false ), + m_clVertexArea( queue, ctx, &m_vertexArea, false ), + m_clVertexTriangleCount( queue, ctx, &m_vertexTriangleCount, false ) { } @@ -108,16 +140,16 @@ bool btSoftBodyVertexDataOpenCL::moveFromAccelerator() -btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl::CommandQueue queue) : - m_queue(queue), - m_clLinks( queue, &m_links, false ), - m_clLinkStrength( queue, &m_linkStrength, false ), - m_clLinksMassLSC( queue, &m_linksMassLSC, false ), - m_clLinksRestLengthSquared( queue, &m_linksRestLengthSquared, false ), - m_clLinksCLength( queue, &m_linksCLength, false ), - m_clLinksLengthRatio( queue, &m_linksLengthRatio, false ), - m_clLinksRestLength( queue, &m_linksRestLength, false ), - m_clLinksMaterialLinearStiffnessCoefficient( queue, &m_linksMaterialLinearStiffnessCoefficient, false ) +btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx) +:m_cqCommandQue(queue), + m_clLinks( queue, ctx, &m_links, false ), + m_clLinkStrength( queue, ctx, &m_linkStrength, false ), + m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ), + m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ), + m_clLinksCLength( queue, ctx, &m_linksCLength, false ), + m_clLinksLengthRatio( queue, ctx, &m_linksLengthRatio, false ), + m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ), + m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false ) { } @@ -272,13 +304,13 @@ void btSoftBodyLinkDataOpenCL::generateBatches() if( m_batchStartLengths.size() > 0 ) { m_batchStartLengths.resize(batchCounts.size()); - m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); + m_batchStartLengths[0] = BatchPair(0, 0); int sum = 0; for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex ) { - m_batchStartLengths[batchIndex].first = sum; - m_batchStartLengths[batchIndex].second = batchCounts[batchIndex]; + m_batchStartLengths[batchIndex].start = sum; + m_batchStartLengths[batchIndex].length = batchCounts[batchIndex]; sum += batchCounts[batchIndex]; } } @@ -313,7 +345,7 @@ void btSoftBodyLinkDataOpenCL::generateBatches() // next element in that batch, incrementing the batch counter // afterwards int batch = batchValues[linkIndex]; - int newLocation = m_batchStartLengths[batch].first + batchCounts[batch]; + int newLocation = m_batchStartLengths[batch].start + batchCounts[batch]; batchCounts[batch] = batchCounts[batch] + 1; m_links[newLocation] = m_links_Backup[linkLocation]; @@ -336,11 +368,11 @@ void btSoftBodyLinkDataOpenCL::generateBatches() -btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl::CommandQueue queue ) : +btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl_command_queue queue , cl_context ctx) : m_queue( queue ), - m_clVertexIndices( queue, &m_vertexIndices, false ), - m_clArea( queue, &m_area, false ), - m_clNormal( queue, &m_normal, false ) + m_clVertexIndices( queue, ctx, &m_vertexIndices, false ), + m_clArea( queue, ctx, &m_area, false ), + m_clNormal( queue, ctx, &m_normal, false ) { } @@ -493,7 +525,7 @@ void btSoftBodyTriangleDataOpenCL::generateBatches() m_batchStartLengths.resize(batchCounts.size()); - m_batchStartLengths[0] = std::pair< int, int >( 0, 0 ); + m_batchStartLengths[0] = btSomePair(0,0); int sum = 0; @@ -547,18 +579,19 @@ void btSoftBodyTriangleDataOpenCL::generateBatches() -btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(const cl::CommandQueue &queue) : - m_linkData(queue), - m_vertexData(queue), - m_triangleData(queue), - m_clPerClothAcceleration(queue, &m_perClothAcceleration, true ), - m_clPerClothWindVelocity(queue, &m_perClothWindVelocity, true ), - m_clPerClothDampingFactor(queue, &m_perClothDampingFactor, true ), - m_clPerClothVelocityCorrectionCoefficient(queue, &m_perClothVelocityCorrectionCoefficient, true ), - m_clPerClothLiftFactor(queue, &m_perClothLiftFactor, true ), - m_clPerClothDragFactor(queue, &m_perClothDragFactor, true ), - m_clPerClothMediumDensity(queue, &m_perClothMediumDensity, true ), - m_queue( queue ) +btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx) : + m_linkData(queue, ctx), + m_vertexData(queue, ctx), + m_triangleData(queue, ctx), + m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ), + m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ), + m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ), + m_clPerClothVelocityCorrectionCoefficient(queue, ctx,&m_perClothVelocityCorrectionCoefficient, true ), + m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ), + m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ), + m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ), + m_cqCommandQue( queue ), + m_cxMainContext(ctx) { // Initial we will clearly need to update solver constants // For now this is global for the cloths linked with this solver - we should probably make this body specific @@ -590,7 +623,7 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof using Vectormath::Aos::Point3; // Create SoftBody that will store the information within the solver - btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); + btOpenCLAcceleratedSoftBodyInterface *newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody ); m_softBodySet.push_back( newSoftBody ); m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); @@ -712,51 +745,58 @@ bool btOpenCLSoftBodySolver::checkInitialized() void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices ) { - resetNormalsAndAreasKernel.kernel.setArg(0, numVertices); - resetNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexNormal.getBuffer()); - resetNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexArea.getBuffer()); + cl_int ciErrNum; + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 0, sizeof(numVertices), (void*)&numVertices); //oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clSetKernelArg(resetNormalsAndAreasKernel, 2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS); + size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 ); - int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(resetNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" ); + btAssert( 0 && "enqueueNDRangeKernel(resetNormalsAndAreasKernel)" ); } + } void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices ) { - normalizeNormalsAndAreasKernel.kernel.setArg(0, numVertices); - normalizeNormalsAndAreasKernel.kernel.setArg(1, m_vertexData.m_clVertexTriangleCount.getBuffer()); - normalizeNormalsAndAreasKernel.kernel.setArg(2, m_vertexData.m_clVertexNormal.getBuffer()); - normalizeNormalsAndAreasKernel.kernel.setArg(3, m_vertexData.m_clVertexArea.getBuffer()); - int numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(normalizeNormalsAndAreasKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 1, sizeof(cl_mem), &m_vertexData.m_clVertexTriangleCount.m_buffer); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + size_t numWorkItems = workGroupSize*((numVertices + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); } + } void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles ) { - updateSoftBodiesKernel.kernel.setArg(0, firstTriangle); - updateSoftBodiesKernel.kernel.setArg(1, numTriangles); - updateSoftBodiesKernel.kernel.setArg(2, m_triangleData.m_clVertexIndices.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(6, m_triangleData.m_clNormal.getBuffer()); - updateSoftBodiesKernel.kernel.setArg(7, m_triangleData.m_clArea.getBuffer()); + cl_int ciErrNum; + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 1, sizeof(int), &numTriangles); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 2, sizeof(cl_mem), &m_triangleData.m_clVertexIndices.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 6, sizeof(cl_mem), &m_triangleData.m_clNormal.m_buffer); + ciErrNum = clSetKernelArg(updateSoftBodiesKernel, 7, sizeof(cl_mem), &m_triangleData.m_clArea.m_buffer); - int numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateSoftBodiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + size_t numWorkItems = workGroupSize*((numTriangles + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, updateSoftBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(normalizeNormalsAndAreasKernel)"); } + } void btOpenCLSoftBodySolver::updateSoftBodies() @@ -807,6 +847,7 @@ void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath void btOpenCLSoftBodySolver::applyForces( float solverdt ) { + // Ensure data is on accelerator m_vertexData.moveToAccelerator(); m_clPerClothAcceleration.moveToGPU(); @@ -815,85 +856,30 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) m_clPerClothMediumDensity.moveToGPU(); m_clPerClothWindVelocity.moveToGPU(); - cl_int err; - err = applyForcesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - if( err != CL_SUCCESS ) + cl_int ciErrNum ; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(applyForcesKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(applyForcesKernel, 1, sizeof(float), &solverdt); + float fl = FLT_EPSILON; + ciErrNum = clSetKernelArg(applyForcesKernel, 2, sizeof(float), &fl); + ciErrNum = clSetKernelArg(applyForcesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 7, sizeof(cl_mem), &m_clPerClothLiftFactor.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 8 ,sizeof(cl_mem), &m_clPerClothDragFactor.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel, 9, sizeof(cl_mem), &m_clPerClothWindVelocity.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,10, sizeof(cl_mem), &m_clPerClothAcceleration.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); + ciErrNum = clSetKernelArg(applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,applyForcesKernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(1, solverdt); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(2, FLT_EPSILON); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(3, m_vertexData.m_clClothIdentifier.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(4, m_vertexData.m_clVertexNormal.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(5, m_vertexData.m_clVertexArea.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(6, m_vertexData.m_clVertexInverseMass.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(7, m_clPerClothLiftFactor.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(8, m_clPerClothDragFactor.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(9, m_clPerClothWindVelocity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(10, m_clPerClothAcceleration.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(11, m_clPerClothMediumDensity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(12, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } - err = applyForcesKernel.kernel.setArg(13, m_vertexData.m_clVertexVelocity.getBuffer()); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(applyForcesKernel)"); } - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - - err = m_queue.enqueueNDRangeKernel(applyForcesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) - { - btAssert( "enqueueNDRangeKernel(applyForcesKernel)"); - } } /** @@ -901,22 +887,26 @@ void btOpenCLSoftBodySolver::applyForces( float solverdt ) */ void btOpenCLSoftBodySolver::integrate( float solverdt ) { + + // Ensure data is on accelerator m_vertexData.moveToAccelerator(); - integrateKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - integrateKernel.kernel.setArg(1, solverdt); - integrateKernel.kernel.setArg(2, m_vertexData.m_clVertexInverseMass.getBuffer()); - integrateKernel.kernel.setArg(3, m_vertexData.m_clVertexPosition.getBuffer()); - integrateKernel.kernel.setArg(4, m_vertexData.m_clVertexVelocity.getBuffer()); - integrateKernel.kernel.setArg(5, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - integrateKernel.kernel.setArg(6, m_vertexData.m_clVertexForceAccumulator.getBuffer()); + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(integrateKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(integrateKernel, 1, sizeof(float), &solverdt); + ciErrNum = clSetKernelArg(integrateKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(integrateKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,integrateKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(integrateKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(integrateKernel)"); } } @@ -935,6 +925,7 @@ float btOpenCLSoftBodySolver::computeTriangleArea( void btOpenCLSoftBodySolver::updateConstants( float timeStep ) { + using namespace Vectormath::Aos; if( m_updateSolverConstants ) @@ -959,10 +950,12 @@ void btOpenCLSoftBodySolver::updateConstants( float timeStep ) m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; } } + } void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) { + using Vectormath::Aos::Vector3; using Vectormath::Aos::Point3; using Vectormath::Aos::lengthSqr; @@ -988,33 +981,34 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) - // Prepare anchors - /*for(i=0,ni=m_anchors.size();igetWorldTransform().getBasis()*a.m_local; - a.m_c0 = ImpulseMatrix( m_sst.sdt, - a.m_node->m_im, - a.m_body->getInvMass(), - a.m_body->getInvInertiaTensorWorld(), - ra); - a.m_c1 = ra; - a.m_c2 = m_sst.sdt*a.m_node->m_im; - a.m_body->activate(); - }*/ + for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) + { + int startLink = m_linkData.m_batchStartLengths[i].start; + int numLinks = m_linkData.m_batchStartLengths[i].length; - // Really want to combine these into a single loop, don't we? No update in the middle? - - // TODO: Double check what kst is meant to mean - passed in as 1 in the bullet code + solveLinksForVelocity( startLink, numLinks, kst ); + } + } + // Compute new positions from velocity + // Also update the previous position so that our position computation is now based on the new position from the velocity solution + // rather than based directly on the original positions + if( m_numberOfVelocityIterations > 0 ) + { + updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt ); + } else { + updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + } // Solve drift for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) { for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i ) { - int startLink = m_linkData.m_batchStartLengths[i].first; - int numLinks = m_linkData.m_batchStartLengths[i].second; + int startLink = m_linkData.m_batchStartLengths[i].start; + int numLinks = m_linkData.m_batchStartLengths[i].length; solveLinksForPosition( startLink, numLinks, kst, ti ); } @@ -1023,6 +1017,7 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt ); + } @@ -1030,96 +1025,136 @@ void btOpenCLSoftBodySolver::solveConstraints( float solverdt ) // Kernel dispatches void btOpenCLSoftBodySolver::prepareLinks() { - prepareLinksKernel.kernel.setArg(0, m_linkData.getNumLinks()); - prepareLinksKernel.kernel.setArg(1, m_linkData.m_clLinks.getBuffer()); - prepareLinksKernel.kernel.setArg(2, m_linkData.m_clLinksMassLSC.getBuffer()); - prepareLinksKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - prepareLinksKernel.kernel.setArg(4, m_linkData.m_clLinksLengthRatio.getBuffer()); - prepareLinksKernel.kernel.setArg(5, m_linkData.m_clLinksCLength.getBuffer()); - int numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(prepareLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numLinks = m_linkData.getNumLinks(); + ciErrNum = clSetKernelArg(prepareLinksKernel,0, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(prepareLinksKernel,1, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,2, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer); + ciErrNum = clSetKernelArg(prepareLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer); + + size_t numWorkItems = workGroupSize*((m_linkData.getNumLinks() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,prepareLinksKernel, 1 , NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(prepareLinksKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(prepareLinksKernel)"); } + } void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt ) { - updatePositionsFromVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updatePositionsFromVelocitiesKernel.kernel.setArg(1, solverdt); - updatePositionsFromVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexVelocity.getBuffer()); - updatePositionsFromVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updatePositionsFromVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clVertexPosition.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,1, sizeof(float), &solverdt); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updatePositionsFromVelocitiesKernel,4, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updatePositionsFromVelocitiesKernel, 1, NULL, &numWorkItems,&workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updatePositionsFromVelocitiesKernel)"); } + } void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti ) { - solvePositionsFromLinksKernel.kernel.setArg(0, startLink); - solvePositionsFromLinksKernel.kernel.setArg(1, numLinks); - solvePositionsFromLinksKernel.kernel.setArg(2, kst); - solvePositionsFromLinksKernel.kernel.setArg(3, ti); - solvePositionsFromLinksKernel.kernel.setArg(4, m_linkData.m_clLinks.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(5, m_linkData.m_clLinksMassLSC.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(6, m_linkData.m_clLinksRestLengthSquared.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(7, m_vertexData.m_clVertexInverseMass.getBuffer()); - solvePositionsFromLinksKernel.kernel.setArg(8, m_vertexData.m_clVertexPosition.getBuffer()); - int numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(solvePositionsFromLinksKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,0, sizeof(int), &startLink); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,1, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,2, sizeof(float), &kst); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,3, sizeof(float), &ti); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + + size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0); + if( ciErrNum!= CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(solvePositionsFromLinksKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(solvePositionsFromLinksKernel)"); } + } // solveLinksForPosition +void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst ) +{ + + cl_int ciErrNum; + ciErrNum = clSetKernelArg(vSolveLinksKernel, 0, sizeof(int), &startLink); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 1, sizeof(int), &numLinks); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 2, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 3, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 4, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer); + ciErrNum = clSetKernelArg(vSolveLinksKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + + size_t numWorkItems = workGroupSize*((numLinks + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,vSolveLinksKernel,1,NULL,&numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) + { + btAssert( 0 && "enqueueNDRangeKernel(vSolveLinksKernel)"); + } + +} + void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt ) { - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(1, isolverdt); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(5, m_clPerClothVelocityCorrectionCoefficient.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(6, m_clPerClothDampingFactor.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexVelocity.getBuffer()); - updateVelocitiesFromPositionsWithVelocitiesKernel.kernel.setArg(8, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 1, sizeof(float), &isolverdt); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 5, sizeof(cl_mem), &m_clPerClothVelocityCorrectionCoefficient.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 6, sizeof(cl_mem), &m_clPerClothDampingFactor.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithVelocitiesKernel, 8, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithVelocitiesKernel)"); } + } // updateVelocitiesFromPositionsWithVelocities void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ) { - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(0, m_vertexData.getNumVertices()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(1, isolverdt); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(2, m_vertexData.m_clVertexPosition.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(3, m_vertexData.m_clVertexPreviousPosition.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(4, m_vertexData.m_clClothIdentifier.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(5, m_clPerClothDampingFactor.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(6, m_vertexData.m_clVertexVelocity.getBuffer()); - updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel.setArg(7, m_vertexData.m_clVertexForceAccumulator.getBuffer()); - int numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); - cl_int err = m_queue.enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, cl::NullRange, cl::NDRange(numWorkItems), cl::NDRange(workGroupSize)); - if( err != CL_SUCCESS ) + cl_int ciErrNum; + int numVerts = m_vertexData.getNumVertices(); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, sizeof(float), &isolverdt); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 4, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 6, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer); + ciErrNum = clSetKernelArg(updateVelocitiesFromPositionsWithoutVelocitiesKernel, 7, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer); + + size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize); + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0); + if( ciErrNum != CL_SUCCESS ) { - btAssert( "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)"); + btAssert( 0 && "enqueueNDRangeKernel(updateVelocitiesFromPositionsWithoutVelocitiesKernel)"); } + } // updateVelocitiesFromPositionsWithoutVelocities // End kernel dispatches @@ -1133,15 +1168,20 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons // and use them together on a single kernel call if possible by setting up a // per-cloth target buffer array for the copy kernel. - btAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + btOpenCLAcceleratedSoftBodyInterface *currentCloth = findSoftBodyInterface( softBody ); + + const int firstVertex = currentCloth->getFirstVertex(); + const int lastVertex = firstVertex + currentCloth->getNumVertices(); if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) { - const int firstVertex = currentCloth->getFirstVertex(); - const int lastVertex = firstVertex + currentCloth->getNumVertices(); const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); float *basePointer = cpuVertexBuffer->getBasePointer(); + m_vertexData.m_clVertexPosition.copyFromGPU(); + m_vertexData.m_clVertexNormal.copyFromGPU(); + if( vertexBuffer->hasVertexPositions() ) { const int vertexOffset = cpuVertexBuffer->getVertexOffset(); @@ -1173,43 +1213,46 @@ void btOpenCLSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody * cons } } } + } // btCPUSoftBodySolver::outputToVertexBuffers -btOpenCLSoftBodySolver::KernelDesc btOpenCLSoftBodySolver::compileCLKernelFromString( const char *shaderString, const char *shaderName ) +cl_kernel btOpenCLSoftBodySolver::compileCLKernelFromString( const char* kernelSource, const char* kernelName ) { - cl_int err; + printf("compiling kernalName: %s ",kernelName); + cl_kernel kernel; + cl_int ciErrNum; + size_t program_length = strlen(kernelSource); - context = m_queue.getInfo(); - device = m_queue.getInfo(); - std::vector< cl::Device > devices; - devices.push_back( device ); + cl_program m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&kernelSource, &program_length, &ciErrNum); +// oclCHECKERROR(ciErrNum, CL_SUCCESS); + + // Build the program with 'mad' Optimization option +#ifdef MAC + char* flags = "-cl-mad-enable -DMAC -DGUID_ARG"; +#else + const char* flags = "-DGUID_ARG="; +#endif + ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, flags, NULL, NULL); + if (ciErrNum != CL_SUCCESS) + { + printf("Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__); + btAssert(0); + exit(0); + } + + // Create the kernel + kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum); + if (ciErrNum != CL_SUCCESS) + { + printf("Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__); + btAssert(0); + exit(0); + } - cl::Program::Sources source(1, std::make_pair(shaderString, strlen(shaderString) + 1)); - cl::Program program(context, source, &err); - if( err != CL_SUCCESS ) - { - btAssert( "program" ); - } - err = program.build(devices); - if (err != CL_SUCCESS) { - //std::string str; - //str = program.getBuildInfo(devices[0]); - //std::cout << "Program Info: " << str; - if( err != CL_SUCCESS ) - { - btAssert( "Program::build()" ); - } - } - cl::Kernel kernel(program, shaderName, &err); - if( err != CL_SUCCESS ) - { - btAssert( "kernel" ); - } + printf("ready. \n"); + return kernel; - KernelDesc descriptor; - descriptor.kernel = kernel; - return descriptor; } void btOpenCLSoftBodySolver::predictMotion( float timeStep ) @@ -1234,11 +1277,11 @@ void btOpenCLSoftBodySolver::predictMotion( float timeStep ) -btOpenCLSoftBodySolver::btAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) +btOpenCLAcceleratedSoftBodyInterface *btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) { for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; + btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; if( softBodyInterface->getSoftBody() == softBody ) return softBodyInterface; } @@ -1273,4 +1316,4 @@ bool btOpenCLSoftBodySolver::buildShaders() m_shadersInitialized = true; return returnVal; -} \ No newline at end of file +} diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h index 84d208488..b023d475c 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h @@ -16,204 +16,165 @@ subject to the following restrictions: #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H #define BT_SOFT_BODY_SOLVER_OPENCL_H +#include "stddef.h" //for size_t #include "vectormath/vmInclude.h" + #include "BulletSoftBody/btSoftBodySolvers.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h" -#include "BulletSoftBody/solvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h" +#include "btSoftBodySolverBuffer_OpenCL.h" +#include "btSoftBodySolverLinkData_OpenCL.h" +#include "btSoftBodySolverVertexData_OpenCL.h" +#include "btSoftBodySolverTriangleData_OpenCL.h" + + +/** + * SoftBody class to maintain information about a soft body instance + * within a solver. + * This data addresses the main solver arrays. + */ +class btOpenCLAcceleratedSoftBodyInterface +{ +protected: + /** Current number of vertices that are part of this cloth */ + int m_numVertices; + /** Maximum number of vertices allocated to be part of this cloth */ + int m_maxVertices; + /** Current number of triangles that are part of this cloth */ + int m_numTriangles; + /** Maximum number of triangles allocated to be part of this cloth */ + int m_maxTriangles; + /** Index of first vertex in the world allocated to this cloth */ + int m_firstVertex; + /** Index of first triangle in the world allocated to this cloth */ + int m_firstTriangle; + /** Index of first link in the world allocated to this cloth */ + int m_firstLink; + /** Maximum number of links allocated to this cloth */ + int m_maxLinks; + /** Current number of links allocated to this cloth */ + int m_numLinks; + + /** The actual soft body this data represents */ + btSoftBody *m_softBody; + + +public: + btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) : + m_softBody( softBody ) + { + m_numVertices = 0; + m_maxVertices = 0; + m_numTriangles = 0; + m_maxTriangles = 0; + m_firstVertex = 0; + m_firstTriangle = 0; + m_firstLink = 0; + m_maxLinks = 0; + m_numLinks = 0; + } + int getNumVertices() + { + return m_numVertices; + } + + int getNumTriangles() + { + return m_numTriangles; + } + + int getMaxVertices() + { + return m_maxVertices; + } + + int getMaxTriangles() + { + return m_maxTriangles; + } + + int getFirstVertex() + { + return m_firstVertex; + } + + int getFirstTriangle() + { + return m_firstTriangle; + } + + // TODO: All of these set functions will have to do checks and + // update the world because restructuring of the arrays will be necessary + // Reasonable use of "friend"? + void setNumVertices( int numVertices ) + { + m_numVertices = numVertices; + } + + void setNumTriangles( int numTriangles ) + { + m_numTriangles = numTriangles; + } + + void setMaxVertices( int maxVertices ) + { + m_maxVertices = maxVertices; + } + + void setMaxTriangles( int maxTriangles ) + { + m_maxTriangles = maxTriangles; + } + + void setFirstVertex( int firstVertex ) + { + m_firstVertex = firstVertex; + } + + void setFirstTriangle( int firstTriangle ) + { + m_firstTriangle = firstTriangle; + } + + void setMaxLinks( int maxLinks ) + { + m_maxLinks = maxLinks; + } + + void setNumLinks( int numLinks ) + { + m_numLinks = numLinks; + } + + void setFirstLink( int firstLink ) + { + m_firstLink = firstLink; + } + + int getMaxLinks() + { + return m_maxLinks; + } + + int getNumLinks() + { + return m_numLinks; + } + + int getFirstLink() + { + return m_firstLink; + } + + btSoftBody* getSoftBody() + { + return m_softBody; + } + +}; class btOpenCLSoftBodySolver : public btSoftBodySolver { private: - /** - * SoftBody class to maintain information about a soft body instance - * within a solver. - * This data addresses the main solver arrays. - */ - class btAcceleratedSoftBodyInterface - { - protected: - /** Current number of vertices that are part of this cloth */ - int m_numVertices; - /** Maximum number of vertices allocated to be part of this cloth */ - int m_maxVertices; - /** Current number of triangles that are part of this cloth */ - int m_numTriangles; - /** Maximum number of triangles allocated to be part of this cloth */ - int m_maxTriangles; - /** Index of first vertex in the world allocated to this cloth */ - int m_firstVertex; - /** Index of first triangle in the world allocated to this cloth */ - int m_firstTriangle; - /** Index of first link in the world allocated to this cloth */ - int m_firstLink; - /** Maximum number of links allocated to this cloth */ - int m_maxLinks; - /** Current number of links allocated to this cloth */ - int m_numLinks; - - /** The actual soft body this data represents */ - btSoftBody *m_softBody; - - - public: - btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : - m_softBody( softBody ) - { - m_numVertices = 0; - m_maxVertices = 0; - m_numTriangles = 0; - m_maxTriangles = 0; - m_firstVertex = 0; - m_firstTriangle = 0; - m_firstLink = 0; - m_maxLinks = 0; - m_numLinks = 0; - } - int getNumVertices() - { - return m_numVertices; - } - - int getNumTriangles() - { - return m_numTriangles; - } - - int getMaxVertices() - { - return m_maxVertices; - } - - int getMaxTriangles() - { - return m_maxTriangles; - } - - int getFirstVertex() - { - return m_firstVertex; - } - - int getFirstTriangle() - { - return m_firstTriangle; - } - - // TODO: All of these set functions will have to do checks and - // update the world because restructuring of the arrays will be necessary - // Reasonable use of "friend"? - void setNumVertices( int numVertices ) - { - m_numVertices = numVertices; - } - - void setNumTriangles( int numTriangles ) - { - m_numTriangles = numTriangles; - } - - void setMaxVertices( int maxVertices ) - { - m_maxVertices = maxVertices; - } - - void setMaxTriangles( int maxTriangles ) - { - m_maxTriangles = maxTriangles; - } - - void setFirstVertex( int firstVertex ) - { - m_firstVertex = firstVertex; - } - - void setFirstTriangle( int firstTriangle ) - { - m_firstTriangle = firstTriangle; - } - - void setMaxLinks( int maxLinks ) - { - m_maxLinks = maxLinks; - } - - void setNumLinks( int numLinks ) - { - m_numLinks = numLinks; - } - - void setFirstLink( int firstLink ) - { - m_firstLink = firstLink; - } - - int getMaxLinks() - { - return m_maxLinks; - } - - int getNumLinks() - { - return m_numLinks; - } - - int getFirstLink() - { - return m_firstLink; - } - - btSoftBody* getSoftBody() - { - return m_softBody; - } - - #if 0 - void setAcceleration( Vectormath::Aos::Vector3 acceleration ) - { - m_currentSolver->setPerClothAcceleration( m_clothIdentifier, acceleration ); - } - - void setWindVelocity( Vectormath::Aos::Vector3 windVelocity ) - { - m_currentSolver->setPerClothWindVelocity( m_clothIdentifier, windVelocity ); - } - - /** - * Set the density of the air in which the cloth is situated. - */ - void setAirDensity( btScalar density ) - { - m_currentSolver->setPerClothMediumDensity( m_clothIdentifier, static_cast(density) ); - } - - /** - * Add a collision object to this soft body. - */ - void addCollisionObject( btCollisionObject *collisionObject ) - { - m_currentSolver->addCollisionObjectForSoftBody( m_clothIdentifier, collisionObject ); - } - #endif - }; - - - class KernelDesc - { - protected: - public: - cl::Kernel kernel; - - KernelDesc() - { - } - - virtual ~KernelDesc() - { - } - }; btSoftBodyLinkDataOpenCL m_linkData; btSoftBodyVertexDataOpenCL m_vertexData; @@ -228,7 +189,7 @@ private: * Cloths owned by this solver. * Only our cloths are in this array. */ - btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; + btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet; /** Acceleration value to be applied to all non-static vertices in the solver. * Index n is cloth n, array sized by number of cloths in the world not the solver. @@ -262,37 +223,34 @@ private: btAlignedObjectArray< float > m_perClothMediumDensity; btOpenCLBuffer m_clPerClothMediumDensity; - KernelDesc prepareLinksKernel; - KernelDesc solvePositionsFromLinksKernel; - KernelDesc updateConstantsKernel; - KernelDesc integrateKernel; - KernelDesc addVelocityKernel; - KernelDesc updatePositionsFromVelocitiesKernel; - KernelDesc updateVelocitiesFromPositionsWithoutVelocitiesKernel; - KernelDesc updateVelocitiesFromPositionsWithVelocitiesKernel; - KernelDesc vSolveLinksKernel; - KernelDesc resetNormalsAndAreasKernel; - KernelDesc normalizeNormalsAndAreasKernel; - KernelDesc updateSoftBodiesKernel; - KernelDesc outputToVertexArrayWithNormalsKernel; - KernelDesc outputToVertexArrayWithoutNormalsKernel; + cl_kernel prepareLinksKernel; + cl_kernel solvePositionsFromLinksKernel; + cl_kernel updateConstantsKernel; + cl_kernel integrateKernel; + cl_kernel addVelocityKernel; + cl_kernel updatePositionsFromVelocitiesKernel; + cl_kernel updateVelocitiesFromPositionsWithoutVelocitiesKernel; + cl_kernel updateVelocitiesFromPositionsWithVelocitiesKernel; + cl_kernel vSolveLinksKernel; + cl_kernel resetNormalsAndAreasKernel; + cl_kernel normalizeNormalsAndAreasKernel; + cl_kernel updateSoftBodiesKernel; + cl_kernel outputToVertexArrayWithNormalsKernel; + cl_kernel outputToVertexArrayWithoutNormalsKernel; - KernelDesc outputToVertexArrayKernel; - KernelDesc applyForcesKernel; - KernelDesc collideSphereKernel; - KernelDesc collideCylinderKernel; + cl_kernel outputToVertexArrayKernel; + cl_kernel applyForcesKernel; + cl_kernel collideSphereKernel; + cl_kernel collideCylinderKernel; - static const int workGroupSize = 128; - - cl::CommandQueue m_queue; - cl::Context context; - cl::Device device; + cl_command_queue m_cqCommandQue; + cl_context m_cxMainContext; /** - * Compile a compute shader kernel from a string and return the appropriate KernelDesc object. + * Compile a compute shader kernel from a string and return the appropriate cl_kernel object. */ - KernelDesc compileCLKernelFromString( const char *shaderString, const char *shaderName ); + cl_kernel compileCLKernelFromString( const char *shaderString, const char *shaderName ); bool buildShaders(); @@ -306,7 +264,7 @@ private: void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); - btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); + btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); virtual void applyForces( float solverdt ); @@ -342,7 +300,7 @@ private: public: - btOpenCLSoftBodySolver(const cl::CommandQueue &queue); + btOpenCLSoftBodySolver(cl_command_queue queue,cl_context ctx); virtual ~btOpenCLSoftBodySolver(); @@ -371,4 +329,4 @@ public: virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer ); }; // btOpenCLSoftBodySolver -#endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H \ No newline at end of file +#endif #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H diff --git a/src/BulletSoftBody/btSoftBodySolvers.h b/src/BulletSoftBody/btSoftBodySolvers.h index 24a742e49..79a55f706 100644 --- a/src/BulletSoftBody/btSoftBodySolvers.h +++ b/src/BulletSoftBody/btSoftBodySolvers.h @@ -23,7 +23,6 @@ class btSoftBodyTriangleData; class btSoftBodyLinkData; class btSoftBodyVertexData; class btVertexBufferDescriptor; -class btAcceleratedSoftBodyInterface; class btCollisionObject; class btSoftBody; diff --git a/src/MiniCL/MiniCL.cpp b/src/MiniCL/MiniCL.cpp index 9cb1ca331..1e0823a1c 100644 --- a/src/MiniCL/MiniCL.cpp +++ b/src/MiniCL/MiniCL.cpp @@ -30,6 +30,7 @@ subject to the following restrictions: //#define DEBUG_MINICL_KERNELS 1 static char* spPlatformID = "MiniCL, SCEA"; +static char* spDriverVersion= "1.0"; CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs( cl_uint num_entries, @@ -91,23 +92,24 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( cl_device_info param_name , size_t param_value_size , void * param_value , - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0 + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { switch (param_name) { case CL_DEVICE_NAME: { - char deviceName[] = "CPU"; + char deviceName[] = "MiniCL CPU"; unsigned int nameLen = strlen(deviceName)+1; btAssert(param_value_size>strlen(deviceName)); if (nameLen < param_value_size) { - const char* cpuName = "CPU"; + const char* cpuName = "MiniCL CPU"; sprintf((char*)param_value,"%s",cpuName); } else { printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size); + return CL_INVALID_VALUE; } break; } @@ -120,6 +122,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type)); + return CL_INVALID_VALUE; } break; } @@ -132,6 +135,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + return CL_INVALID_VALUE; } break; @@ -149,6 +153,7 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( } else { printf("error: param_value_size should be at least %d\n",sizeof(cl_uint)); + return CL_INVALID_VALUE; } break; } @@ -158,6 +163,142 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( *clock_frequency = 3*1024; break; } + + case CL_DEVICE_VENDOR : + { + if(param_value_size < (strlen(spPlatformID) + 1)) + { + return CL_INVALID_VALUE; + } + strcpy((char*)param_value, spPlatformID); + if(param_value_size_ret != NULL) + { + *param_value_size_ret = strlen(spPlatformID) + 1; + } + break; + } + case CL_DRIVER_VERSION: + { + if(param_value_size < (strlen(spDriverVersion) + 1)) + { + return CL_INVALID_VALUE; + } + strcpy((char*)param_value, spDriverVersion); + if(param_value_size_ret != NULL) + { + *param_value_size_ret = strlen(spDriverVersion) + 1; + } + + break; + } + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: + { + cl_uint* maxDimensions = (cl_uint*)param_value; + *maxDimensions = 1; + break; + } + case CL_DEVICE_MAX_WORK_GROUP_SIZE: + { + cl_uint* maxWorkGroupSize = (cl_uint*)param_value; + *maxWorkGroupSize = 128;//1; + break; + } + case CL_DEVICE_ADDRESS_BITS: + { + cl_uint* addressBits = (cl_uint*)param_value; + *addressBits= 32; //@todo: should this be 64 for 64bit builds? + break; + } + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: + { + cl_ulong* maxMemAlloc = (cl_ulong*)param_value; + *maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ? + break; + } + case CL_DEVICE_GLOBAL_MEM_SIZE: + { + cl_ulong* maxMemAlloc = (cl_ulong*)param_value; + *maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ? + break; + } + + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: + { + cl_bool* error_correction_support = (cl_bool*)param_value; + *error_correction_support = CL_FALSE; + break; + } + + case CL_DEVICE_LOCAL_MEM_TYPE: + { + cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value; + *local_mem_type = CL_GLOBAL; + break; + } + case CL_DEVICE_LOCAL_MEM_SIZE: + { + cl_ulong* localmem = (cl_ulong*) param_value; + *localmem = 32*1024; + break; + } + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: + { + cl_ulong* localmem = (cl_ulong*) param_value; + *localmem = 64*1024; + break; + } + case CL_DEVICE_QUEUE_PROPERTIES: + { + cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value; + memset(queueProp,0,param_value_size); + + break; + } + case CL_DEVICE_IMAGE_SUPPORT: + { + cl_bool* imageSupport = (cl_bool*) param_value; + *imageSupport = CL_FALSE; + break; + } + + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: + case CL_DEVICE_MAX_READ_IMAGE_ARGS: + { + cl_uint* imageArgs = (cl_uint*) param_value; + *imageArgs = 0; + break; + } + case CL_DEVICE_IMAGE3D_MAX_DEPTH: + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: + case CL_DEVICE_IMAGE3D_MAX_WIDTH: + case CL_DEVICE_IMAGE2D_MAX_WIDTH: + { + size_t* maxSize = (size_t*) param_value; + *maxSize = 0; + break; + } + + case CL_DEVICE_EXTENSIONS: + { + char* extensions = (char*) param_value; + *extensions = 0; + break; + } + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: + { + cl_uint* width = (cl_uint*) param_value; + *width = 1; + break; + } + default: { printf("error: unsupported param_name:%d\n",param_name); @@ -486,7 +627,7 @@ extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* co } CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_properties * /* properties */, - cl_device_type /* device_type */, + cl_device_type device_type , void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void * /* user_data */, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 @@ -502,14 +643,18 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie "MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" }; -#ifdef DEBUG_MINICL_KERNELS - SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); - SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); -#else + btThreadSupportInterface* threadSupport = 0; + + if (device_type==CL_DEVICE_TYPE_DEBUG) + { + SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); + threadSupport = new SequentialThreadSupport(stc); + } else + { #if _WIN32 btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports); - Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( + threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo( // "MiniCL", sUniqueThreadSupportName[sUniqueThreadSupportIndex++], processMiniCLTask, //processCollisionTask, @@ -518,10 +663,10 @@ CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(cl_context_propertie #else ///todo: add posix thread support for other platforms SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory); - SequentialThreadSupport* threadSupport = new SequentialThreadSupport(stc); + threadSupport = new SequentialThreadSupport(stc); #endif -#endif //DEBUG_MINICL_KERNELS + } MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks); diff --git a/src/MiniCL/cl.h b/src/MiniCL/cl.h index b0cda4237..053491ee2 100644 --- a/src/MiniCL/cl.h +++ b/src/MiniCL/cl.h @@ -155,8 +155,10 @@ typedef struct _cl_image_format { #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_DEBUG (1 << 4) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + // cl_device_info #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 diff --git a/src/MiniCL/cl_MiniCL_Defs.h b/src/MiniCL/cl_MiniCL_Defs.h index ffdac1026..4a7a84526 100644 --- a/src/MiniCL/cl_MiniCL_Defs.h +++ b/src/MiniCL/cl_MiniCL_Defs.h @@ -140,6 +140,8 @@ static float4 operator+(const float4& a,const float4& b) return tmp; } + + static float4 operator-(const float4& a,const float4& b) { float4 tmp; @@ -159,6 +161,17 @@ static float4 operator*(float a,const float4& b) return tmp; } +static float4 operator/(const float4& b,float a) +{ + float4 tmp; + tmp.x = b.x/a; + tmp.y = b.y/a; + tmp.z = b.z/a; + tmp.w = b.w/a; + return tmp; +} + + static float dot(const float4&a ,const float4& b) { @@ -170,6 +183,22 @@ static float dot(const float4&a ,const float4& b) return tmp.x+tmp.y+tmp.z+tmp.w; } +static float length(const float4&a) +{ + float l = sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + return l; +} + +static float4 normalize(const float4&a) +{ + float4 tmp; + float l = length(a); + tmp = 1.f/l*a; + return tmp; +} + + + static float4 cross(const float4&a ,const float4& b) { float4 tmp;