diff --git a/CMakeLists.txt b/CMakeLists.txt index bcf674d9d..028d9708c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ IF (NOT CMAKE_BUILD_TYPE) ENDIF (NOT CMAKE_BUILD_TYPE) + OPTION(USE_DOUBLE_PRECISION "Use double precision" OFF) OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON) OPTION(USE_MULTITHREADED_BENCHMARK "Use Multithreaded Benchmark" OFF) @@ -94,7 +95,27 @@ ENDIF() OPTION(INTERNAL_CREATE_MSVC_RELATIVE_PATH_PROJECTFILES "Create MSVC projectfiles with relative paths" OFF) OPTION(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES "Add MSVC postfix for executable names (_Debug)" OFF) +IF(WIN32) + FIND_PATH(AMD_OPENCL_BASE_DIR include/CL/cl.h PATH $ENV{ATISTREAMSDKROOT} ) + IF(AMD_OPENCL_BASE_DIR) + OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)" ON) + ELSE() + OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)" OFF) + ENDIF() + FIND_PATH(NVIDIA_OPENCL_BASE_DIR OpenCL/common/inc/CL/cl.h PATH $ENV{NVSDKCOMPUTE_ROOT} ) + IF(NVIDIA_OPENCL_BASE_DIR) + OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)" ON) + ELSE() + OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)" OFF) + ENDIF() +ENDIF() + +OPTION(BUILD_MINICL_OPENCL_DEMOS "Build OpenCL demos for MiniCL (Generic CPU)" OFF) + +OPTION(BUILD_CPU_DEMOS "Build original Bullet CPU demos" ON) + + IF (INTERNAL_CREATE_MSVC_RELATIVE_PATH_PROJECTFILES) SET(CMAKE_SUPPRESS_REGENERATION 1) SET(CMAKE_USE_RELATIVE_PATHS 1) diff --git a/Demos/CMakeLists.txt b/Demos/CMakeLists.txt index 7593fb319..5675d9e54 100644 --- a/Demos/CMakeLists.txt +++ b/Demos/CMakeLists.txt @@ -1,26 +1,33 @@ IF (USE_GLUT) -SET(SharedDemoSubdirs - OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld - CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer - RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo - UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo - CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo - DoublePrecisionDemo ConcaveDemo CollisionDemo - ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo - MultiMaterialDemo SerializeDemo InternalEdgeDemo -) - +IF(BUILD_CPU_DEMOS) + SET(SharedDemoSubdirs + OpenGL AllBulletDemos ConvexDecompositionDemo Benchmarks HelloWorld + CcdPhysicsDemo ConstraintDemo SliderConstraintDemo GenericJointDemo Raytracer + RagdollDemo ForkLiftDemo BasicDemo Box2dDemo BspDemo MovingConcaveDemo VehicleDemo + UserCollisionAlgorithm CharacterDemo SoftDemo HeightFieldFluidDemo + CollisionInterfaceDemo ConcaveConvexcastDemo SimplexDemo DynamicControlDemo + DoublePrecisionDemo ConcaveDemo CollisionDemo + ContinuousConvexCollision ConcaveRaycastDemo GjkConvexCastDemo + MultiMaterialDemo SerializeDemo InternalEdgeDemo + ) +ELSE() + SET(SharedDemoSubdirs + OpenGL + ) +ENDIF() if (CMAKE_SIZEOF_VOID_P MATCHES "8") SUBDIRS( ${SharedDemoSubdirs} ) else (CMAKE_SIZEOF_VOID_P MATCHES "8") SUBDIRS( ${SharedDemoSubdirs} + ThreadingDemo MultiThreadedDemo MiniCL_VectorAdd - ) + ParticlesOpenCL + ) endif (CMAKE_SIZEOF_VOID_P MATCHES "8") ELSE (USE_GLUT) diff --git a/Demos/OpenGL/CMakeLists.txt b/Demos/OpenGL/CMakeLists.txt index 95d69da76..dbff17c17 100644 --- a/Demos/OpenGL/CMakeLists.txt +++ b/Demos/OpenGL/CMakeLists.txt @@ -12,6 +12,7 @@ + INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src ${BULLET_PHYSICS_SOURCE_DIR}/Extras/ConvexHull ) diff --git a/Demos/OpenGL/DemoApplication.cpp b/Demos/OpenGL/DemoApplication.cpp index 9fe47febe..121381ca7 100644 --- a/Demos/OpenGL/DemoApplication.cpp +++ b/Demos/OpenGL/DemoApplication.cpp @@ -85,7 +85,9 @@ m_idle(false), m_enableshadows(false), m_sundirection(btVector3(1,-2,1)*1000), -m_defaultContactProcessingThreshold(BT_LARGE_FLOAT) +m_defaultContactProcessingThreshold(BT_LARGE_FLOAT), +m_frustumZNear(1.f), +m_frustumZFar(10000.f) { #ifndef BT_NO_PROFILE m_profileIterator = CProfileManager::Get_Iterator(); @@ -245,10 +247,12 @@ void DemoApplication::updateCamera() { { if (m_glutScreenWidth > m_glutScreenHeight) { - glFrustum (-aspect, aspect, -1.0, 1.0, 1.0, 10000.0); +// glFrustum (-aspect, aspect, -1.0, 1.0, 1.0, 10000.0); + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); } else { - glFrustum (-1.0, 1.0, -aspect, aspect, 1.0, 10000.0); +// glFrustum (-1.0, 1.0, -aspect, aspect, 1.0, 10000.0); + glFrustum (-aspect * m_frustumZNear, aspect * m_frustumZNear, -m_frustumZNear, m_frustumZNear, m_frustumZNear, m_frustumZFar); } glMatrixMode(GL_MODELVIEW); glLoadIdentity(); diff --git a/Demos/OpenGL/DemoApplication.h b/Demos/OpenGL/DemoApplication.h index cfd23ebc8..cc6b0d6f6 100644 --- a/Demos/OpenGL/DemoApplication.h +++ b/Demos/OpenGL/DemoApplication.h @@ -80,6 +80,9 @@ protected: int m_glutScreenWidth; int m_glutScreenHeight; + float m_frustumZNear; + float m_frustumZFar; + int m_ortho; float m_ShootBoxInitialSpeed; @@ -178,6 +181,11 @@ public: return btScalar(16666.); #endif } + void setFrustumZPlanes(float zNear, float zFar) + { + m_frustumZNear = zNear; + m_frustumZFar = zFar; + } ///glut callbacks diff --git a/Demos/OpenGL/GL_DialogDynamicsWorld.cpp b/Demos/OpenGL/GL_DialogDynamicsWorld.cpp index 63501808d..3f049fc43 100644 --- a/Demos/OpenGL/GL_DialogDynamicsWorld.cpp +++ b/Demos/OpenGL/GL_DialogDynamicsWorld.cpp @@ -195,7 +195,8 @@ GL_DialogWindow* GL_DialogDynamicsWorld::createDialog(int horPos,int vertPos,int btRigidBody* body = new btRigidBody(rbInfo); btTransform trans; trans.setIdentity(); - trans.setOrigin(btVector3(btScalar(horPos-m_screenWidth/2+dialogWidth/2), btScalar(vertPos+m_screenHeight/2.+dialogHeight/2),btScalar(0.))); +// trans.setOrigin(btVector3(btScalar(horPos-m_screenWidth/2+dialogWidth/2), btScalar(vertPos+m_screenHeight/2.+dialogHeight/2),btScalar(0.))); + trans.setOrigin(btVector3(btScalar(horPos-m_screenWidth/2+dialogWidth/2), btScalar(vertPos-m_screenHeight/2.+dialogHeight/2),btScalar(0.))); @@ -215,7 +216,7 @@ GL_DialogWindow* GL_DialogDynamicsWorld::createDialog(int horPos,int vertPos,int } -GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, const char* sliderText) +GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, const char* sliderText, btScalar initialFraction) { btBox2dShape* boxShape = new btBox2dShape(btVector3(6.f,6.f,0.4f)); btScalar mass = .1f; @@ -225,7 +226,10 @@ GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, btRigidBody* body = new btRigidBody(rbInfo); btTransform trans; trans.setIdentity(); - trans.setOrigin(btVector3(dialog->getDialogHorPos()-m_screenWidth/2.f+dialog->getDialogWidth()/2.f, dialog->getDialogVertPos()+m_screenHeight/2.f+dialog->getDialogHeight()/2+dialog->getNumControls()*20.f,-0.2f)); + int sliderX = dialog->getDialogHorPos() - m_screenWidth/2 + dialog->getDialogWidth()/2; +// int sliderY = dialog->getDialogVertPos() + m_screenHeight/2 + dialog->getDialogHeight()/2 + dialog->getNumControls()*20; + int sliderY = dialog->getDialogVertPos() - m_screenHeight/2 + dialog->getDialogHeight()/2 + dialog->getNumControls()*20; + trans.setOrigin(btVector3(sliderX, sliderY,-0.2f)); body->setWorldTransform(trans); //body->setDamping(0.999,0.99); @@ -245,7 +249,9 @@ GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, btTransform frameInA; frameInA.setIdentity(); - btVector3 offset(btVector3(-dialog->getDialogWidth()/2.f+16.f,-dialog->getDialogHeight()/2.f+dialog->getNumControls()*20.f+36.f,0.2f)); + int offsX = -dialog->getDialogWidth()/2 + 16; + int offsY = -dialog->getDialogHeight()/2 + dialog->getNumControls()*20 + 36; + btVector3 offset(btVector3(offsX, offsY, 0.2f)); frameInA.setOrigin(offset); @@ -253,8 +259,13 @@ GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, frameInB.setIdentity(); //frameInB.setOrigin(-offset/2); - btScalar lowerLimit = 80.f; - btScalar upperLimit = 170.f; +// btScalar lowerLimit = 80.f; +// btScalar upperLimit = 170.f; + btScalar lowerLimit = 141.f; + btScalar upperLimit = 227.f; + + btScalar actualLimit = lowerLimit+initialFraction*(upperLimit-lowerLimit); + #if 0 bool useFrameA = false; @@ -264,19 +275,24 @@ GL_SliderControl* GL_DialogDynamicsWorld::createSlider(GL_DialogWindow* dialog, constraint->setLimit(0,lowerLimit,upperLimit); #else btSliderConstraint* sliderConstraint = new btSliderConstraint(*dialogBody,*body,frameInA,frameInB,true);//useFrameA); - sliderConstraint->setLowerLinLimit(lowerLimit); - sliderConstraint->setUpperLinLimit(upperLimit); + sliderConstraint->setLowerLinLimit(actualLimit); + sliderConstraint->setUpperLinLimit(actualLimit); m_dynamicsWorld->addConstraint(sliderConstraint,true); #endif + GL_SliderControl* slider = new GL_SliderControl(sliderText, body,dialog,lowerLimit,upperLimit, sliderConstraint); body->setUserPointer(slider); dialog->addControl(slider); + + slider->m_fraction = initialFraction; + return slider; } + GL_ToggleControl* GL_DialogDynamicsWorld::createToggle(GL_DialogWindow* dialog, const char* toggleText) { @@ -289,7 +305,11 @@ GL_ToggleControl* GL_DialogDynamicsWorld::createToggle(GL_DialogWindow* dialog, btRigidBody* body = new btRigidBody(rbInfo); btTransform trans; trans.setIdentity(); - trans.setOrigin(btVector3(dialog->getDialogHorPos()-m_screenWidth/2.f+dialog->getDialogWidth()/2.f, dialog->getDialogVertPos()+m_screenHeight/2.f+dialog->getDialogHeight()/2+dialog->getNumControls()*20.f,-0.2f)); + + int toggleX = dialog->getDialogHorPos() - m_screenWidth/2 + dialog->getDialogWidth()/2; +// int toggleY = dialog->getDialogVertPos() + m_screenHeight/2 + dialog->getDialogHeight()/2 + dialog->getNumControls()*20; + int toggleY = dialog->getDialogVertPos() - m_screenHeight/2 + dialog->getDialogHeight()/2 + dialog->getNumControls()*20; + trans.setOrigin(btVector3(toggleX, toggleY, -0.2f)); body->setWorldTransform(trans); body->setDamping(0.999f,0.99f); @@ -737,3 +757,5 @@ void GL_DialogDynamicsWorld::mouseMotionFunc(int x,int y) } + + diff --git a/Demos/OpenGL/GL_DialogDynamicsWorld.h b/Demos/OpenGL/GL_DialogDynamicsWorld.h index 24854de3f..8292ae48a 100644 --- a/Demos/OpenGL/GL_DialogDynamicsWorld.h +++ b/Demos/OpenGL/GL_DialogDynamicsWorld.h @@ -78,7 +78,7 @@ public: GL_ToggleControl* createToggle(GL_DialogWindow* dialog, const char* toggleText); - GL_SliderControl* createSlider(GL_DialogWindow* dialog, const char* sliderText); + GL_SliderControl* createSlider(GL_DialogWindow* dialog, const char* sliderText, btScalar initialFraction = btScalar(0.5f)); virtual void draw(btScalar timeStep); diff --git a/Demos/OpenGL/GL_DialogWindow.cpp b/Demos/OpenGL/GL_DialogWindow.cpp index 70477d5a7..d47f33109 100644 --- a/Demos/OpenGL/GL_DialogWindow.cpp +++ b/Demos/OpenGL/GL_DialogWindow.cpp @@ -20,6 +20,8 @@ subject to the following restrictions: #include "GLDebugFont.h" #include "btBulletDynamicsCommon.h" +#include // for sprintf() + #define USE_ARRAYS 1 @@ -305,7 +307,6 @@ void GL_ToggleControl::draw(int& parentHorPos2,int& parentVertPos2,btScalar delt } - void GL_SliderControl::draw(int& parentHorPos2,int& parentVertPos2,btScalar deltaTime) { @@ -318,7 +319,17 @@ void GL_SliderControl::draw(int& parentHorPos2,int& parentVertPos2,btScalar delt unsigned int grey = 0xff6f6f6f; int borderSize = 2; unsigned int white = 0xffefefef; - drawRect(parentHorPos2+80+borderSize, parentVertPos2+borderSize, parentHorPos2+m_parentWindow->getDialogWidth()-16-borderSize, parentVertPos2+2-borderSize, white,white,white,white); + int sliderPosS = parentHorPos2+150+borderSize; + int sliderPosE = parentHorPos2+m_parentWindow->getDialogWidth()-40-borderSize; + int sliderPos = controlHorPos; + if(sliderPos < sliderPosS) sliderPos = sliderPosS; + if(sliderPos > sliderPosE) sliderPos = sliderPosE; +// drawRect(parentHorPos2+80+borderSize, parentVertPos2+borderSize, parentHorPos2+m_parentWindow->getDialogWidth()-16-borderSize, parentVertPos2+2-borderSize, white,white,white,white); + drawRect( sliderPosS, + parentVertPos2+borderSize, + sliderPosE, + parentVertPos2+2-borderSize, + white,white,white,white); drawRect(parentHorPos, parentVertPos, parentHorPos+16, parentVertPos+16, grey, grey, grey, grey); @@ -329,8 +340,19 @@ void GL_SliderControl::draw(int& parentHorPos2,int& parentVertPos2,btScalar delt btVector3 rgb(1,1,1); - - GLDebugDrawStringInternal(parentHorPos2,parentVertPos2+8,m_sliderText,rgb); + + btSliderConstraint* pSlider = (btSliderConstraint*)m_constraint; + btScalar currPos = pSlider->getLinearPos(); +// if(currPos < pSlider->getLowerLinLimit()) currPos = pSlider->getLowerLinLimit(); +// if(currPos > pSlider->getUpperLinLimit()) currPos = pSlider->getUpperLinLimit(); +// m_fraction = (currPos - pSlider->getLowerLinLimit()) / (pSlider->getUpperLinLimit() - pSlider->getLowerLinLimit()); + m_fraction = (btScalar)(sliderPos - sliderPosS) / (btScalar)(sliderPosE - sliderPosS); + + char tmpBuf[256]; + sprintf(tmpBuf, "%s %3d%%", m_sliderText, (int)(m_fraction * 100.f)); + +// GLDebugDrawStringInternal(parentHorPos2,parentVertPos2+8,m_sliderText,rgb); + GLDebugDrawStringInternal(parentHorPos2,parentVertPos2+8, tmpBuf, rgb); parentVertPos2+=20; } diff --git a/Demos/OpenGL/GL_DialogWindow.h b/Demos/OpenGL/GL_DialogWindow.h index 25a90cf00..fb18ffb9f 100644 --- a/Demos/OpenGL/GL_DialogWindow.h +++ b/Demos/OpenGL/GL_DialogWindow.h @@ -123,6 +123,7 @@ struct GL_SliderControl : public GL_DialogControl btScalar m_lowerLimit; btScalar m_upperLimit; btTypedConstraint* m_constraint; + btScalar m_fraction; const char* m_sliderText; public: @@ -140,7 +141,7 @@ public: virtual void draw(int& parentHorPos,int& parentVertPos,btScalar deltaTime); - btScalar btGetFraction(); + btScalar btGetFraction() { return m_fraction; } btScalar getLowerLimit() { diff --git a/Demos/ParticlesOpenCL/AMD/CMakeLists.txt b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt new file mode 100644 index 000000000..8f718bedf --- /dev/null +++ b/Demos/ParticlesOpenCL/AMD/CMakeLists.txt @@ -0,0 +1,93 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==ATISTREAMSDKROOT=}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{==ATISTREAMSDKROOT=}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{ATISTREAMSDKROOT}/include ) + IF (CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86_64 ) + ELSE(CMAKE_CL_64) + SET(CMAK_ATISTREAMSDK_LIBPATH $ENV{ATISTREAMSDKROOT}/lib/x86 ) + ENDIF(CMAKE_CL_64) +ENDIF() + + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletDynamics + BulletCollision + BulletMultiThreaded + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_ATISTREAMSDK_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppParticlesOCL_AMD + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} ) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_AMD POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppParticlesOCL_AMD pthread) +ENDIF(UNIX) + diff --git a/Demos/ParticlesOpenCL/Apple/CMakeLists.txt b/Demos/ParticlesOpenCL/Apple/CMakeLists.txt new file mode 100644 index 000000000..4caf51eb0 --- /dev/null +++ b/Demos/ParticlesOpenCL/Apple/CMakeLists.txt @@ -0,0 +1,85 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +IF (APPLE) + FIND_LIBRARY(OPENCL_LIBRARY OpenCL DOC "OpenCL lib for OSX") + FIND_PATH(OPENCL_INCLUDE_DIR OpenCL/cl.h DOC "Include for OpenCL on OSX") +ENDIF (APPLE) + + +#ADD_DEFINITIONS(-DUSE_MINICL) + +IF(WIN32) +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletDynamics + BulletCollision + LinearMath + ${OPENCL_LIBRARY} + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ) + + + ADD_EXECUTABLE(AppParticlesOCL_Apple + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} ) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Apple POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppParticlesOCL_Apple pthread) +ENDIF(UNIX) + diff --git a/Demos/ParticlesOpenCL/CMakeLists.txt b/Demos/ParticlesOpenCL/CMakeLists.txt new file mode 100644 index 000000000..a16ecfca7 --- /dev/null +++ b/Demos/ParticlesOpenCL/CMakeLists.txt @@ -0,0 +1,15 @@ +IF(BUILD_MINICL_OPENCL_DEMOS) + SUBDIRS( MiniCL ) +ENDIF() + +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() + +IF(BUILD_NVIDIA_OPENCL_DEMOS) + SUBDIRS(NVidia) +ENDIF() + +IF(APPLE) + SUBDIRS(Apple) +ENDIF() diff --git a/Demos/ParticlesOpenCL/MiniCL/CMakeLists.txt b/Demos/ParticlesOpenCL/MiniCL/CMakeLists.txt new file mode 100644 index 000000000..600ded733 --- /dev/null +++ b/Demos/ParticlesOpenCL/MiniCL/CMakeLists.txt @@ -0,0 +1,83 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + +ADD_DEFINITIONS(-DUSE_MINICL) + +IF(WIN32) +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletDynamics + BulletCollision + BulletMultiThreaded + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ) + + + ADD_EXECUTABLE(AppParticlesOCL_Mini + + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/MiniCL/MiniCLTaskWrap.cpp + + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} ) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Mini POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppParticlesOCL_Mini pthread) +ENDIF(UNIX) + diff --git a/Demos/ParticlesOpenCL/MiniCL/MiniCLTaskWrap.cpp b/Demos/ParticlesOpenCL/MiniCL/MiniCLTaskWrap.cpp new file mode 100644 index 000000000..b6b135e57 --- /dev/null +++ b/Demos/ParticlesOpenCL/MiniCL/MiniCLTaskWrap.cpp @@ -0,0 +1,30 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include + +#include "../ParticlesOCL.cl" + +MINICL_REGISTER(kComputeCellId) +MINICL_REGISTER(kClearCellStart) +MINICL_REGISTER(kFindCellStart) +MINICL_REGISTER(kIntegrateMotion) +MINICL_REGISTER(kCollideParticles) +MINICL_REGISTER(kBitonicSortCellIdLocal) +MINICL_REGISTER(kBitonicSortCellIdLocal1) +MINICL_REGISTER(kBitonicSortCellIdMergeGlobal) +MINICL_REGISTER(kBitonicSortCellIdMergeLocal) + diff --git a/Demos/ParticlesOpenCL/NVidia/CMakeLists.txt b/Demos/ParticlesOpenCL/NVidia/CMakeLists.txt new file mode 100644 index 000000000..56c7f31c0 --- /dev/null +++ b/Demos/ParticlesOpenCL/NVidia/CMakeLists.txt @@ -0,0 +1,92 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +) + + +IF(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + INCLUDE_DIRECTORIES( $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{==NVSDKCOMPUTE_ROOT=}/OpenCL/common/lib/x64 ) + ENDIF(CMAKE_CL_64) +ELSE() + INCLUDE_DIRECTORIES( $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/inc ) + IF (CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH ) + ELSE(CMAKE_CL_64) + SET(CMAK_NVSDKCOMPUTE_LIBPATH $ENV{NVSDKCOMPUTE_ROOT}/OpenCL/common/lib/Win32 ) + ENDIF(CMAKE_CL_64) +ENDIF() + +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletDynamics + BulletCollision + BulletMultiThreaded + LinearMath + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAK_NVSDKCOMPUTE_LIBPATH}/OpenCL.lib + ) + + + ADD_EXECUTABLE(AppParticlesOCL_Nv + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedDefs.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesSharedTypes.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.h + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/main.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesDemo.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/shaders.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Extras/OpenCL/Shared/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl + ) +ELSE (USE_GLUT) +ENDIF (USE_GLUT) + +IF(WIN32) +IF (CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} ) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW64.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ELSE(CMAKE_CL_64) + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLEW32.DLL ${CMAKE_CURRENT_BINARY_DIR}) + ADD_CUSTOM_COMMAND( TARGET AppParticlesOCL_Nv POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/ParticlesOpenCL/ParticlesOCL.cl ${CMAKE_CURRENT_BINARY_DIR}) + ENDIF() +ENDIF(CMAKE_CL_64) +ENDIF(WIN32) + +IF (UNIX) + TARGET_LINK_LIBRARIES(AppParticlesOCL_Nv pthread) +ENDIF(UNIX) + diff --git a/Demos/ParticlesOpenCL/ParticlesDemo.cpp b/Demos/ParticlesOpenCL/ParticlesDemo.cpp new file mode 100644 index 000000000..6a789a2d3 --- /dev/null +++ b/Demos/ParticlesOpenCL/ParticlesDemo.cpp @@ -0,0 +1,632 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#define START_POS_X btScalar(0.f) +#define START_POS_Y btScalar(0.f) +#define START_POS_Z btScalar(0.f) +//#define START_POS_Y btScalar(40.f) +//#define START_POS_Z btScalar(40.f) +//#define START_POS_Y btScalar(0.4f) +//#define START_POS_Z btScalar(0.4f) +#define ARRAY_SIZE_X 32 +#define ARRAY_SIZE_Y 32 +//#define ARRAY_SIZE_Y 5 +#define ARRAY_SIZE_Z 16 +//#define ARRAY_SIZE_Z 1 +//#define DIST btScalar(2.f) +#define DIST (DEF_PARTICLE_RADIUS * 2.f) + +#define STRESS_X 20 +//#define STRESS_Y 200 +#define STRESS_Y 640 + + + + + +///The 3 following lines include the CPU implementation of the kernels, keep them in this order. +#include "BulletMultiThreaded/btGpuDefines.h" +#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h" +#include "BulletMultiThreaded/btGpuUtilsSharedCode.h" +#ifndef __APPLE__ +#include +#endif + + +#include "GL_DialogDynamicsWorld.h" +#include "GL_DialogWindow.h" + + + +#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h" +#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h" +#include "GLDebugFont.h" +#include "GlutStuff.h" +///btBulletDynamicsCommon.h is the main Bullet include file, contains most common include files. +#include "btBulletDynamicsCommon.h" +#include //printf debugging +#include "shaders.h" + +#include "ParticlesDemo.h" + + + + + +btScalar gTimeStep = btScalar(1./60.); + +#define SCALING btScalar(1.f) + +void ParticlesDemo::clientMoveAndDisplay() +{ + + + updateCamera(); + glDisable(GL_LIGHTING); + glColor3f(1.f, 1.f, 1.f); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glDisable(GL_TEXTURE_2D); // we always draw wireframe in this demo + + //simple dynamics world doesn't handle fixed-time-stepping + float ms = getDeltaTimeMicroseconds(); + + renderme(); + + if (m_dialogDynamicsWorld) + m_dialogDynamicsWorld->draw(gTimeStep); + + ///step the simulation + if (m_dynamicsWorld) + { + m_dynamicsWorld->stepSimulation(gTimeStep,0);//ms / 1000000.f); + //optional but useful: debug drawing + m_dynamicsWorld->debugDrawWorld(); + } + + + + ms = getDeltaTimeMicroseconds(); + + glFlush(); + + glutSwapBuffers(); + +} + + + +void ParticlesDemo::displayCallback(void) { + + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + renderme(); + + //optional but useful: debug drawing to detect problems + if (m_dynamicsWorld) + m_dynamicsWorld->debugDrawWorld(); + + //if (m_dialogDynamicsWorld) + // m_dialogDynamicsWorld->draw(gTimeStep); + + glFlush(); + glutSwapBuffers(); +} + +class btNullBroadphase : public btBroadphaseInterface +{ +public: + btNullBroadphase() + { + } + virtual ~btNullBroadphase() + { + } + virtual btBroadphaseProxy* createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr, short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) + { + return NULL; + } + virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher) + { + } + virtual void setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher) + { + } + virtual void getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const + { + } + virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0)) + { + } + virtual void calculateOverlappingPairs(btDispatcher* dispatcher) + { + } + virtual btOverlappingPairCache* getOverlappingPairCache() + { + return NULL; + } + virtual const btOverlappingPairCache* getOverlappingPairCache() const + { + return NULL; + } + virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const + { + } + virtual void resetPool(btDispatcher* dispatcher) + { + } + virtual void printStats() + { + } + virtual void aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback) + { + } +}; + + + +void ParticlesDemo::initPhysics() +{ + + setTexturing(false); + setShadows(false); + +// setCameraDistance(80.f); + setCameraDistance(3.0f); +// m_cameraTargetPosition.setValue(50, 10, 0); + m_cameraTargetPosition.setValue(0, 0, 0); +// m_azi = btScalar(0.f); +// m_ele = btScalar(0.f); + m_azi = btScalar(45.f); + m_ele = btScalar(30.f); + setFrustumZPlanes(0.1f, 10.f); + + ///collision configuration contains default setup for memory, collision setup + + btDefaultCollisionConstructionInfo dci; + dci.m_defaultMaxPersistentManifoldPoolSize=50000; + dci.m_defaultMaxCollisionAlgorithmPoolSize=50000; + + m_collisionConfiguration = new btDefaultCollisionConfiguration(dci); + + ///use the default collision dispatcher. For parallel processing you can use a diffent dispatcher (see Extras/BulletMultiThreaded) + m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration); + + m_pairCache = new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16))btHashedOverlappingPairCache(); + + +// m_broadphase = new btDbvtBroadphase(m_pairCache); + m_broadphase = new btNullBroadphase(); + + ///the default constraint solver + m_solver = new btSequentialImpulseConstraintSolver(); + + m_pWorld = new btParticlesDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration, 65536); + + m_dialogDynamicsWorld = new GL_DialogDynamicsWorld(); + GL_DialogWindow* settings = m_dialogDynamicsWorld->createDialog(50,0,280,280,"CPU fallback"); + + m_pWorld->m_useCpuControls[0] = 0; + GL_ToggleControl* ctrl = 0; + m_pWorld->m_useCpuControls[SIMSTAGE_INTEGRATE_MOTION] = m_dialogDynamicsWorld->createToggle(settings,"Integrate Motion"); + m_pWorld->m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID] = m_dialogDynamicsWorld->createToggle(settings,"Compute Cell ID"); + m_pWorld->m_useCpuControls[SIMSTAGE_SORT_CELL_ID] = m_dialogDynamicsWorld->createToggle(settings,"Sort Cell ID"); + m_pWorld->m_useCpuControls[SIMSTAGE_FIND_CELL_START] = m_dialogDynamicsWorld->createToggle(settings,"Find Cell Start"); + m_pWorld->m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES] = m_dialogDynamicsWorld->createToggle(settings,"Collide Particles"); + + + for(int i = 1; i < SIMSTAGE_TOTAL; i++) + { + m_pWorld->m_useCpuControls[i]->m_active = false; + } +#if defined(CL_PLATFORM_MINI_CL) + // these kernels use barrier() + m_pWorld->m_useCpuControls[SIMSTAGE_SORT_CELL_ID]->m_active = true; + m_pWorld->m_useCpuControls[SIMSTAGE_FIND_CELL_START]->m_active = true; +#endif + +#if defined(CL_PLATFORM_AMD) + // these kernels use barrier() + m_pWorld->m_useCpuControls[SIMSTAGE_SORT_CELL_ID]->m_active = true; + m_pWorld->m_useCpuControls[SIMSTAGE_FIND_CELL_START]->m_active = true; +#endif + + + m_dynamicsWorld = m_pWorld; + + m_pWorld->getSimulationIslandManager()->setSplitIslands(true); + m_pWorld->setGravity(btVector3(0,-10.,0)); + m_pWorld->getSolverInfo().m_numIterations = 4; + + { +// btCollisionShape* colShape = new btSphereShape(btScalar(1.0f)); + btCollisionShape* colShape = new btSphereShape(DEF_PARTICLE_RADIUS); + m_collisionShapes.push_back(colShape); + btTransform startTransform; + startTransform.setIdentity(); + btScalar mass(1.f); + btVector3 localInertia(0,0,0); + colShape->calculateLocalInertia(mass,localInertia); + float start_x = START_POS_X - ARRAY_SIZE_X * DIST * btScalar(0.5f); + float start_y = START_POS_Y - ARRAY_SIZE_Y * DIST * btScalar(0.5f); + float start_z = START_POS_Z - ARRAY_SIZE_Z * DIST * btScalar(0.5f); + startTransform.setOrigin(btVector3(start_x, start_y, start_z)); + btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,0,colShape,localInertia); + rbInfo.m_startWorldTransform = startTransform; + btRigidBody* body = new btRigidBody(rbInfo); + m_pWorld->addRigidBody(body); + init_scene_directly(); + } + clientResetScene(); + m_pWorld->initDeviceData(); +} + +static float frand(void) { return 2.0f * (float)rand()/(float)RAND_MAX - 1.0f; } + +void ParticlesDemo::init_scene_directly() +{ + float start_x = START_POS_X - ARRAY_SIZE_X * DIST * btScalar(0.5f); + float start_y = START_POS_Y - ARRAY_SIZE_Y * DIST * btScalar(0.5f); + float start_z = START_POS_Z - ARRAY_SIZE_Z * DIST * btScalar(0.5f); + int total = ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z; + m_pWorld->m_hPos.resize(total); + m_pWorld->m_hVel.resize(total); + total = 0; + for (int k=0;km_hVel[total] = btVector3(0., 0., 0.); + btVector3 jitter = 0.01f * 0.03f * btVector3(frand(), frand(), frand()); + m_pWorld->m_hPos[total] = btVector3(DIST*i + start_x, DIST*k + start_y, DIST*j + start_z) + jitter; + total++; + } + } + } + m_pWorld->m_numParticles = total; +} + + +void ParticlesDemo::clientResetScene() +{ + static bool bFirstCall = true; + DemoApplication::clientResetScene(); + init_scene_directly(); + if(bFirstCall) + { + bFirstCall = false; + } + else + { + m_pWorld->grabSimulationData(); + } +} + + +void ParticlesDemo::exitPhysics() +{ + delete m_dialogDynamicsWorld; + m_dialogDynamicsWorld = 0; + + //cleanup in the reverse order of creation/initialization + int i; + + //remove the rigidbodies from the dynamics world and delete them + for (i=m_pWorld->getNumCollisionObjects()-1; i>=0 ;i--) + { + btCollisionObject* obj = m_pWorld->getCollisionObjectArray()[i]; + btRigidBody* body = btRigidBody::upcast(obj); + if (body && body->getMotionState()) + { + delete body->getMotionState(); + } + m_pWorld->removeCollisionObject( obj ); + delete obj; + } + //delete collision shapes + for (int j=0;j m_glutScreenHeight) ? m_glutScreenHeight : m_glutScreenWidth; + glUniform1f( glGetUniformLocation(m_shaderProgram, "pointScale"), dist ); +// glUniform1f( glGetUniformLocation(m_shaderProgram, "pointRadius"), 0.5f ); + int numParticles = m_pWorld->getNumParticles(); + int col_vbo = m_pWorld->m_colVbo; + int curr_vbo = m_pWorld->m_vbo; + float sphere_rad = m_pWorld->m_particleRad; + + glUniform1f( glGetUniformLocation(m_shaderProgram, "pointRadius"), sphere_rad ); + glColor3f(1, 1, 1); + + // render from the vbo + glBindBuffer(GL_ARRAY_BUFFER, curr_vbo); + glVertexPointer(4, GL_FLOAT, 0, 0); + glEnableClientState(GL_VERTEX_ARRAY); + if(col_vbo) + { + glBindBufferARB(GL_ARRAY_BUFFER_ARB, col_vbo); + glColorPointer(4, GL_FLOAT, 0, 0); + glEnableClientState(GL_COLOR_ARRAY); + } + glDrawArrays(GL_POINTS, 0, numParticles); + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState(GL_COLOR_ARRAY); + glUseProgram(0); + glDisable(GL_POINT_SPRITE_ARB); + glBindBufferARB(GL_ARRAY_BUFFER,0); + if(m_drawGridMode) + { + btVector3& wmin = m_pWorld->m_worldMin; + btVector3& wmax = m_pWorld->m_worldMax; + glBegin(GL_LINE_LOOP); + glVertex3f(wmin[0], wmin[1], wmin[2]); + glVertex3f(wmin[0], wmax[1], wmin[2]); + glVertex3f(wmax[0], wmax[1], wmin[2]); + glVertex3f(wmax[0], wmin[1], wmin[2]); + glVertex3f(wmax[0], wmin[1], wmax[2]); + glVertex3f(wmax[0], wmax[1], wmax[2]); + glVertex3f(wmin[0], wmax[1], wmax[2]); + glVertex3f(wmin[0], wmin[1], wmax[2]); + glEnd(); + glBegin(GL_LINES); + glVertex3f(wmin[0], wmin[1], wmin[2]); + glVertex3f(wmax[0], wmin[1], wmin[2]); + glVertex3f(wmin[0], wmin[1], wmax[2]); + glVertex3f(wmax[0], wmin[1], wmax[2]); + glVertex3f(wmin[0], wmax[1], wmin[2]); + glVertex3f(wmin[0], wmax[1], wmax[2]); + glVertex3f(wmax[0], wmax[1], wmin[2]); + glVertex3f(wmax[0], wmax[1], wmax[2]); + glEnd(); + if(m_drawGridMode == 2) + { + int szx = m_pWorld->m_simParams.m_gridSize[0]; + int szy = m_pWorld->m_simParams.m_gridSize[1]; + glBegin(GL_LINES); + for(int i = 1; i < (szx-1); i++) + { + float wgt = (float)i / (float)(szx-1); + btVector3 vtx = wmax * wgt + wmin * (1.0f - wgt); + glVertex3f(vtx[0], wmin[1], wmin[2]); + glVertex3f(vtx[0], wmax[1], wmin[2]); + } + for(int i = 1; i < (szy-1); i++) + { + float wgt = (float)i / (float)(szy-1); + btVector3 vtx = wmax * wgt + wmin * (1.0f - wgt); + glVertex3f(wmin[0], vtx[1], wmin[2]); + glVertex3f(wmax[0], vtx[1], wmin[2]); + } + glEnd(); + } + } + + if ((m_debugMode & btIDebugDraw::DBG_NoHelpText)==0) + { + setOrthographicProjection(); + int xOffset = 10.f; + int yStart = 20.f; + int yIncr = 20.f; + showProfileInfo(xOffset, yStart, yIncr); + outputDebugInfo(xOffset, yStart, yIncr); + resetPerspectiveProjection(); + } +} + + + +void ParticlesDemo::outputDebugInfo(int & xOffset,int & yStart, int yIncr) +{ + char buf[124]; + glDisable(GL_LIGHTING); + glColor3f(0, 0, 0); + + sprintf(buf,"mouse move+buttons to interact"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"space to reset"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"cursor keys and z,x to navigate"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"i to toggle simulation, s single step"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"q to quit"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"h to toggle help text"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + + sprintf(buf,"p to toggle profiling (+results to file)"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + sprintf(buf,"j to toggle between demos (integration/OECake2D/OECake3D)"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + { + sprintf(buf,"G to draw broadphase grid"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + sprintf(buf,"D and U to toggle between GPU and CPU"); + GLDebugDrawString(xOffset,yStart,buf); + yStart += yIncr; + + } + +} + + +GLuint _compileProgram(const char *vsource, const char *fsource) +{ + GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER); + GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER); + + glShaderSource(vertexShader, 1, &vsource, 0); + glShaderSource(fragmentShader, 1, &fsource, 0); + + glCompileShader(vertexShader); + glCompileShader(fragmentShader); + + GLuint program = glCreateProgram(); + + glAttachShader(program, vertexShader); + glAttachShader(program, fragmentShader); + + glLinkProgram(program); + + // check if program linked + GLint success = 0; + glGetProgramiv(program, GL_LINK_STATUS, &success); + + if (!success) { + char temp[256]; + glGetProgramInfoLog(program, 256, 0, temp); + printf("Failed to link program:\n%s\n", temp); + glDeleteProgram(program); + program = 0; + } + return program; +} + + +void ParticlesDemo::myinit() +{ + DemoApplication::myinit(); +#ifndef __APPLE__ + glewInit(); + if (!glewIsSupported("GL_VERSION_2_0 GL_VERSION_1_5 GL_ARB_multitexture GL_ARB_vertex_buffer_object")) { + fprintf(stderr, "Required OpenGL extensions missing."); + exit(-1); + } +#endif //__APPLE__ + + m_shaderProgram = _compileProgram(vertexShader, spherePixelShader); + m_pWorld->initCLKernels(m_argc, m_argv); +} + + + + + + +void ParticlesDemo::mouseFunc(int button, int state, int x, int y) +{ + + if (!m_dialogDynamicsWorld->mouseFunc(button,state,x,y)) + { + DemoApplication::mouseFunc(button,state,x,y); + } +} + +void ParticlesDemo::mouseMotionFunc(int x,int y) +{ + m_dialogDynamicsWorld->mouseMotionFunc(x,y); + DemoApplication::mouseMotionFunc(x,y); +} + + + +void ParticlesDemo::reshape(int w, int h) +{ + if (m_dialogDynamicsWorld) + m_dialogDynamicsWorld->setScreenSize(w,h); + GlutDemoApplication::reshape(w,h); +} + diff --git a/Demos/ParticlesOpenCL/ParticlesDemo.h b/Demos/ParticlesOpenCL/ParticlesDemo.h new file mode 100644 index 000000000..76c165b83 --- /dev/null +++ b/Demos/ParticlesOpenCL/ParticlesDemo.h @@ -0,0 +1,128 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef SPHERES_GRID_DEMO_H +#define SPHERES_GRID_DEMO_H + +#define USE_BULLET_BODIES 0 + +///enable one or both options (NVidia profiler doesn't support multi-OpenCL context) +#define INTEGRATION_DEMO 1 +#define SPHERES_DEMO 1 + +#include "DemoApplication.h" +#include "LinearMath/btAlignedObjectArray.h" +#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h" + +#include "btParticlesDynamicsWorld.h" + +class btBroadphaseInterface; +class btCollisionShape; +class btOverlappingPairCache; +class btCollisionDispatcher; +class btConstraintSolver; +struct btCollisionAlgorithmCreateFunc; +class btDefaultCollisionConfiguration; +#include "../OpenGL/GlutDemoApplication.h" + + +///BasicDemo is good starting point for learning the code base and porting. +class ParticlesDemo : public GlutDemoApplication +{ + + class GL_DialogDynamicsWorld* m_dialogDynamicsWorld; + //keep the collision shapes, for deletion/cleanup + btAlignedObjectArray m_collisionShapes; + + btBroadphaseInterface* m_broadphase; + + btCollisionDispatcher* m_dispatcher; + + btConstraintSolver* m_solver; + + btDefaultCollisionConfiguration* m_collisionConfiguration; + + btOverlappingPairCache* m_pairCache; + + int m_mouseButtons; + int m_mouseOldX; + int m_mouseOldY; + + int m_argc; + char** m_argv; + + public: + + int m_drawGridMode; + int m_GpuCpuTogglePtr; + + btParticlesDynamicsWorld* m_pWorld; + + + // shader + GLuint m_shaderProgram; + + ParticlesDemo(int argc, char** argv) + { + m_argc = argc; + m_argv = argv; + m_drawGridMode = 0; + m_GpuCpuTogglePtr = SIMSTAGE_NONE; + m_dialogDynamicsWorld = 0; + } + virtual ~ParticlesDemo() + { + exitPhysics(); + } + void initPhysics(); + + void exitPhysics(); + + virtual void clientMoveAndDisplay(); + + virtual void displayCallback(); + + virtual void keyboardCallback(unsigned char key, int x, int y); + + virtual void clientResetScene(); + + virtual void mouseFunc(int button, int state, int x, int y); + virtual void mouseMotionFunc(int x,int y); + virtual void reshape(int w, int h); +/* + static DemoApplication* Create() + { + SpheresGridDemo* demo = new SpheresGridDemo; + demo->myinit(); + demo->initPhysics(); + demo->m_mouseButtons = 0; + demo->m_mouseOldX = 0; + demo->m_mouseOldY = 0; + return demo; + } +*/ + + void outputDebugInfo(int & xOffset,int & yStart, int yIncr); + + + virtual void renderme(); + virtual void myinit(); + void init_scene_directly(); + +}; + + +#endif // PARTICLES_DEMO_H + diff --git a/Demos/ParticlesOpenCL/ParticlesOCL.cl b/Demos/ParticlesOpenCL/ParticlesOCL.cl new file mode 100644 index 000000000..16407cedc --- /dev/null +++ b/Demos/ParticlesOpenCL/ParticlesOCL.cl @@ -0,0 +1,468 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#if defined(GUID_ARG) + extern int gMiniCLNumOutstandingTasks; +#else + #define GUID_ARG + #define GUID_ARG_VAL +#endif + + +int4 getGridPos(float4 worldPos, __global float4* pParams) +{ + int4 gridPos; + gridPos.x = (int)floor((worldPos.x - pParams[1].x) / pParams[3].x); + gridPos.y = (int)floor((worldPos.y - pParams[1].y) / pParams[3].y); + gridPos.z = (int)floor((worldPos.z - pParams[1].z) / pParams[3].z); + return gridPos; +} + +unsigned int getPosHash(int4 gridPos, __global float4* pParams) +{ + int4 gridDim = *((__global int4*)(pParams + 4)); + if(gridPos.x < 0) gridPos.x = 0; + if(gridPos.x >= gridDim.x) gridPos.x = gridDim.x - 1; + if(gridPos.y < 0) gridPos.y = 0; + if(gridPos.y >= gridDim.y) gridPos.y = gridDim.y - 1; + if(gridPos.z < 0) gridPos.z = 0; + if(gridPos.z >= gridDim.z) gridPos.z = gridDim.z - 1; + unsigned int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; + return hash; +} + + +__kernel void kComputeCellId( int numParticles, + __global float4* pPos, + __global int2* pPosHash, + __global float4* pParams GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numParticles) + { + return; + } + float4 pos = pPos[index]; + int4 gridPos = getGridPos(pos, pParams); + unsigned int hash = getPosHash(gridPos, pParams); + pPosHash[index].x = hash; + pPosHash[index].y = index; +} + +__kernel void kClearCellStart( int numCells, + __global int* pCellStart GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numCells) + { + return; + } + pCellStart[index] = -1; +} + +__kernel void kFindCellStart( int numParticles, + __global int2* pHash, + __global int* cellStart, + __global float4* pPos, + __global float4* pVel, + __global float4* pSortedPos, + __global float4* pSortedVel GUID_ARG) +{ + int index = get_global_id(0); + __local int sharedHash[513]; + int2 sortedData; + + if(index < numParticles) + { + + sortedData = pHash[index]; + // Load hash data into shared memory so that we can look + // at neighboring body's hash value without loading + // two hash values per thread + sharedHash[get_local_id(0) + 1] = sortedData.x; + if((index > 0) && (get_local_id(0) == 0)) + { + // first thread in block must load neighbor body hash + sharedHash[0] = pHash[index-1].x; + } + + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(index < numParticles) + { + if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) + { + cellStart[sortedData.x] = index; + } + int unsortedIndex = sortedData.y; + float4 pos = pPos[unsortedIndex]; + float4 vel = pVel[unsortedIndex]; + pSortedPos[index] = pos; + pSortedVel[index] = vel; + } +} + +__kernel void kIntegrateMotion( int numParticles, + __global float4* pPos, + __global float4* pVel, + __global float4* pParams, + float timeStep GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numParticles) + { + return; + } + float4 pos = pPos[index]; + float4 vel = pVel[index]; + pos.w = 1.0f; + vel.w = 0.0f; + // apply gravity + float4 gravity = *((__global float4*)(pParams + 0)); + float particleRad = pParams[5].x; + float globalDamping = pParams[5].y; + float boundaryDamping = pParams[5].z; + vel += gravity * timeStep; + vel *= globalDamping; + // integrate position + pos += vel * timeStep; + // collide with world boundaries + float4 worldMin = *((__global float4*)(pParams + 1)); + float4 worldMax = *((__global float4*)(pParams + 2)); + if(pos.x < (worldMin.x + particleRad)) + { + pos.x = worldMin.x + particleRad; + vel.x *= boundaryDamping; + } + if(pos.x > (worldMax.x - particleRad)) + { + pos.x = worldMax.x - particleRad; + vel.x *= boundaryDamping; + } + if(pos.y < (worldMin.y + particleRad)) + { + pos.y = worldMin.y + particleRad; + vel.y *= boundaryDamping; + } + if(pos.y > (worldMax.y - particleRad)) + { + pos.y = worldMax.y - particleRad; + vel.y *= boundaryDamping; + } + if(pos.z < (worldMin.z + particleRad)) + { + pos.z = worldMin.z + particleRad; + vel.z *= boundaryDamping; + } + if(pos.z > (worldMax.z - particleRad)) + { + pos.z = worldMax.z - particleRad; + vel.z *= boundaryDamping; + } + // write back position and velocity + pPos[index] = pos; + pVel[index] = vel; +} + + +float4 collideTwoParticles( + float4 posA, + float4 posB, + float4 velA, + float4 velB, + float radiusA, + float radiusB, + float spring, + float damping, + float shear, + float attraction +) +{ + //Calculate relative position + float4 relPos = posB - posA; relPos.w = 0.f; + float dist = sqrt(relPos.x * relPos.x + relPos.y * relPos.y + relPos.z * relPos.z); + float collideDist = radiusA + radiusB; + + float4 force = (float4)0.f; + if(dist < collideDist){ + float4 norm = relPos * (1.f / dist); norm.w = 0.f; + + //Relative velocity + float4 relVel = velB - velA; relVel.w = 0.f; + + //Relative tangential velocity + float relVelDotNorm = relVel.x * norm.x + relVel.y * norm.y + relVel.z * norm.z; + float4 tanVel = relVel - norm * relVelDotNorm; tanVel.w = 0.f; + + //Spring force (potential) + float springFactor = -spring * (collideDist - dist); + force = springFactor * norm + damping * relVel + shear * tanVel + attraction * relPos; + force.w = 0.f; + } + return force; +} + + + +__kernel void kCollideParticles(int numParticles, + __global float4* pVel, //output: new velocity + __global const float4* pSortedPos, //input: reordered positions + __global const float4* pSortedVel, //input: reordered velocities + __global const int2 *pPosHash, //input: reordered particle indices + __global const int *pCellStart, //input: cell boundaries + __global float4* pParams GUID_ARG) +{ + int index = get_global_id(0); + if(index >= numParticles) + { + return; + } + + float4 posA = pSortedPos[index]; + float4 velA = pSortedVel[index]; + float4 force = (float4)0.f; + float particleRad = pParams[5].x; + float collisionDamping = pParams[5].w; + float spring = pParams[6].x; + float shear = pParams[6].y; + float attraction = pParams[6].z; + int unsortedIndex = pPosHash[index].y; + + //Get address in grid + int4 gridPosA = getGridPos(posA, pParams); + + //Accumulate surrounding cells + int4 gridPosB; + for(int z = -1; z <= 1; z++) + { + gridPosB.z = gridPosA.z + z; + for(int y = -1; y <= 1; y++) + { + gridPosB.y = gridPosA.y + y; + for(int x = -1; x <= 1; x++) + { + gridPosB.x = gridPosA.x + x; + //Get start particle index for this cell + uint hashB = getPosHash(gridPosB, pParams); + int startI = pCellStart[hashB]; + //Skip empty cell + if(startI < 0) + { + continue; + } + //Iterate over particles in this cell + int endI = startI + 8; + if(endI >= numParticles) endI = numParticles - 1; + for(int j = startI; j < endI; j++) + { + uint hashC = pPosHash[j].x; + if(hashC != hashB) + { + break; + } + if(j == index) + { + continue; + } + float4 posB = pSortedPos[j]; + float4 velB = pSortedVel[j]; + //Collide two spheres + force += collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, + spring, collisionDamping, shear, attraction); + } + } + } + } + //Write new velocity back to original unsorted location + pVel[unsortedIndex] = velA + force; +} + + + + + + +/* + * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. + * + * NVIDIA Corporation and its licensors retain all intellectual property and + * proprietary rights in and to this software and related documentation. + * Any use, reproduction, disclosure, or distribution of this software + * and related documentation without an express license agreement from + * NVIDIA Corporation is strictly prohibited. + * + * Please refer to the applicable NVIDIA end user license agreement (EULA) + * associated with this source code for terms and conditions that govern + * your use of this NVIDIA software. + * + */ + +//#define LOCAL_SIZE_LIMIT 1024U +#define LOCAL_SIZE_MAX 1024U + +inline void ComparatorPrivate(int2* keyA, int2* keyB, uint dir) +{ + if((keyA[0].x > keyB[0].x) == dir) + { + int2 tmp = *keyA; + *keyA = *keyB; + *keyB = tmp; + } +} + +inline void ComparatorLocal(__local int2* keyA, __local int2* keyB, uint dir) +{ + if((keyA[0].x > keyB[0].x) == dir) + { + int2 tmp = *keyA; + *keyA = *keyB; + *keyB = tmp; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Monolithic bitonic sort kernel for short arrays fitting into local memory +//////////////////////////////////////////////////////////////////////////////// +__kernel void kBitonicSortCellIdLocal(__global int2* pKey, uint arrayLength, uint dir GUID_ARG) +{ + __local int2 l_key[LOCAL_SIZE_MAX]; + int localSizeLimit = get_local_size(0) * 2; + + //Offset to the beginning of subbatch and load data + pKey += get_group_id(0) * localSizeLimit + get_local_id(0); + l_key[get_local_id(0) + 0] = pKey[ 0]; + l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; + + for(uint size = 2; size < arrayLength; size <<= 1) + { + //Bitonic merge + uint ddd = dir ^ ( (get_local_id(0) & (size / 2)) != 0 ); + for(uint stride = size / 2; stride > 0; stride >>= 1) + { + barrier(CLK_LOCAL_MEM_FENCE); + uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); + ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); + } + } + + //ddd == dir for the last bitonic merge step + { + for(uint stride = arrayLength / 2; stride > 0; stride >>= 1) + { + barrier(CLK_LOCAL_MEM_FENCE); + uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); + ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], dir); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + pKey[ 0] = l_key[get_local_id(0) + 0]; + pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; +} + +//////////////////////////////////////////////////////////////////////////////// +// Bitonic sort kernel for large arrays (not fitting into local memory) +//////////////////////////////////////////////////////////////////////////////// +//Bottom-level bitonic sort +//Almost the same as bitonicSortLocal with the only exception +//of even / odd subarrays (of LOCAL_SIZE_LIMIT points) being +//sorted in opposite directions +__kernel void kBitonicSortCellIdLocal1(__global int2* pKey GUID_ARG) +{ + __local int2 l_key[LOCAL_SIZE_MAX]; + uint localSizeLimit = get_local_size(0) * 2; + + //Offset to the beginning of subarray and load data + pKey += get_group_id(0) * localSizeLimit + get_local_id(0); + l_key[get_local_id(0) + 0] = pKey[ 0]; + l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; + + uint comparatorI = get_global_id(0) & ((localSizeLimit / 2) - 1); + + for(uint size = 2; size < localSizeLimit; size <<= 1) + { + //Bitonic merge + uint ddd = (comparatorI & (size / 2)) != 0; + for(uint stride = size / 2; stride > 0; stride >>= 1) + { + barrier(CLK_LOCAL_MEM_FENCE); + uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); + ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); + } + } + + //Odd / even arrays of localSizeLimit elements + //sorted in opposite directions + { + uint ddd = (get_group_id(0) & 1); + for(uint stride = localSizeLimit / 2; stride > 0; stride >>= 1) + { + barrier(CLK_LOCAL_MEM_FENCE); + uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); + ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + pKey[ 0] = l_key[get_local_id(0) + 0]; + pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; +} + +//Bitonic merge iteration for 'stride' >= LOCAL_SIZE_LIMIT +__kernel void kBitonicSortCellIdMergeGlobal(__global int2* pKey, uint arrayLength, uint size, uint stride, uint dir GUID_ARG) +{ + uint global_comparatorI = get_global_id(0); + uint comparatorI = global_comparatorI & (arrayLength / 2 - 1); + + //Bitonic merge + uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 ); + uint pos = 2 * global_comparatorI - (global_comparatorI & (stride - 1)); + + int2 keyA = pKey[pos + 0]; + int2 keyB = pKey[pos + stride]; + + ComparatorPrivate(&keyA, &keyB, ddd); + + pKey[pos + 0] = keyA; + pKey[pos + stride] = keyB; +} + +//Combined bitonic merge steps for +//'size' > LOCAL_SIZE_LIMIT and 'stride' = [1 .. LOCAL_SIZE_LIMIT / 2] +__kernel void kBitonicSortCellIdMergeLocal(__global int2* pKey, uint arrayLength, uint stride, uint size, uint dir GUID_ARG) +{ + __local int2 l_key[LOCAL_SIZE_MAX]; + int localSizeLimit = get_local_size(0) * 2; + + pKey += get_group_id(0) * localSizeLimit + get_local_id(0); + l_key[get_local_id(0) + 0] = pKey[ 0]; + l_key[get_local_id(0) + (localSizeLimit / 2)] = pKey[(localSizeLimit / 2)]; + + //Bitonic merge + uint comparatorI = get_global_id(0) & ((arrayLength / 2) - 1); + uint ddd = dir ^ ( (comparatorI & (size / 2)) != 0 ); + for(; stride > 0; stride >>= 1) + { + barrier(CLK_LOCAL_MEM_FENCE); + uint pos = 2 * get_local_id(0) - (get_local_id(0) & (stride - 1)); + ComparatorLocal(&l_key[pos + 0], &l_key[pos + stride], ddd); + } + + barrier(CLK_LOCAL_MEM_FENCE); + pKey[ 0] = l_key[get_local_id(0) + 0]; + pKey[(localSizeLimit / 2)] = l_key[get_local_id(0) + (localSizeLimit / 2)]; +} + diff --git a/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp new file mode 100644 index 000000000..766e54a9a --- /dev/null +++ b/Demos/ParticlesOpenCL/btParticlesDemoDynamicsWorld.cpp @@ -0,0 +1,1028 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include +#ifdef __APPLE__ +//CL_PLATFORM_MINI_CL could be defined in build system +#else +#include +#ifdef USE_MINICL + +#include //for CL_PLATFORM_MINI_CL definition +#else +#include //for CL_PLATFORM_MINI_CL definition +#endif +#endif //__APPLE__ + + +#include "btOclCommon.h" +#include "btOclUtils.h" + +#include "btBulletDynamicsCommon.h" +#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h" +#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h" +#include "BulletCollision/CollisionShapes/btCollisionShape.h" +#include "BulletDynamics/Dynamics/btRigidBody.h" +#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h" +#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h" +#include "LinearMath/btQuickprof.h" +#include "GlutStuff.h" +#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h" +#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h" + +#include "btParticlesDynamicsWorld.h" +#include "GL_DialogWindow.h" + +btParticlesDynamicsWorld::~btParticlesDynamicsWorld() +{ +} + +static int gStepNum = 0; + +int btParticlesDynamicsWorld::stepSimulation( btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep) +{ + startProfiling(timeStep); + m_timeStep = timeStep; + BT_PROFILE("stepSimulation"); +// printf("Step : %d\n", gStepNum); + { + BT_PROFILE("IntegrateMotion"); + runIntegrateMotionKernel(); + } + { + runComputeCellIdKernel(); + } + { + BT_PROFILE("SortHash"); + runSortHashKernel(); + } + { + BT_PROFILE("FindCellStart"); + runFindCellStartKernel(); + } + { + BT_PROFILE("CollideParticles"); + runCollideParticlesKernel(); + } + gStepNum++; + +#ifndef BT_NO_PROFILE + CProfileManager::Increment_Frame_Counter(); +#endif //BT_NO_PROFILE + return 1; +} + +static unsigned int getMaxPowOf2(unsigned int num) +{ + unsigned int maxPowOf2 = 1; + for(int bit = 1; bit < 32; bit++) + { + if(maxPowOf2 >= num) + { + break; + } + maxPowOf2 <<= 1; + } + return maxPowOf2; +} + + +void btParticlesDynamicsWorld::initDeviceData() +{ + getShapeData(); +} + + + +void btParticlesDynamicsWorld::postInitDeviceData() +{ + m_hashSize = getMaxPowOf2(m_numParticles); + createVBO(); + allocateBuffers(); + adjustGrid(); + grabSimulationData(); +} + + +void btParticlesDynamicsWorld::getShapeData() +{ + int numObjects = getNumCollisionObjects(); + btCollisionObjectArray& collisionObjects = getCollisionObjectArray(); + for(int i = 0; i < numObjects; i++) + { + btCollisionObject* colObj = collisionObjects[i]; + btCollisionShape* pShape = colObj->getCollisionShape(); + int shapeType = pShape->getShapeType(); + if(shapeType == SPHERE_SHAPE_PROXYTYPE) + { + btSphereShape* pSph = (btSphereShape*)pShape; + btScalar sphRad = pSph->getRadius(); + if(!i) + { + m_particleRad = sphRad; + } + else + { + btAssert(m_particleRad == sphRad); + } + } + else + { + btAssert(0); + } + } + printf("Total number of particles : %d\n", m_numParticles); +} + +void btParticlesDynamicsWorld::allocateBuffers() +{ + cl_int ciErrNum; + // positions of spheres + m_hPos.resize(m_numParticles); + m_hVel.resize(m_numParticles); + m_hSortedPos.resize(m_numParticles); + m_hSortedVel.resize(m_numParticles); + m_hPosHash.resize(m_hashSize); + for(int i = 0; i < m_hashSize; i++) { m_hPosHash[i].x = 0x7FFFFFFF; m_hPosHash[i].y = 0; } + unsigned int memSize = sizeof(btVector3) * m_numParticles; + m_dPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + m_dVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + m_dSortedPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + m_dSortedVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = m_hashSize * sizeof(btInt2); + m_dPosHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + // global simulation parameters + memSize = sizeof(btSimParams); + m_dSimParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + +void btParticlesDynamicsWorld::adjustGrid() +{ + //btVector3 wmin( BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT); + //btVector3 wmax(-BT_LARGE_FLOAT, -BT_LARGE_FLOAT, -BT_LARGE_FLOAT); + + btVector3 wmin( BT_LARGE_FLOAT, BT_LARGE_FLOAT, BT_LARGE_FLOAT); + btVector3 wmax(-BT_LARGE_FLOAT, -BT_LARGE_FLOAT, -BT_LARGE_FLOAT); + btVector3 boxDiag(m_particleRad, m_particleRad, m_particleRad); + for(int i = 0; i < m_numParticles; i++) + { + btVector3 pos = m_hPos[i]; + btVector3 boxMin = pos - boxDiag; + btVector3 boxMax = pos + boxDiag; + wmin.setMin(boxMin); + wmax.setMax(boxMax); + } + m_worldMin = wmin; + m_worldMax = wmax; + btVector3 wsize = m_worldMax - m_worldMin; + wsize[3] = 1.0f; + + glBindBufferARB(GL_ARRAY_BUFFER, m_colVbo); + btVector3* color = (btVector3*)glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + for(int i = 0; i < m_numParticles; i++, color++) + { + *color = (m_hPos[i] - m_worldMin) / wsize; + (*color)[3] = 1.f; + } + glUnmapBufferARB(GL_ARRAY_BUFFER); + +/* + wsize[0] *= 0.5f; + wsize[1] *= 0.1f; + wsize[2] *= 0.5f; + m_worldMin -= wsize; + m_worldMax += wsize; +*/ + m_worldMin.setValue(-1.f, -1.f, -1.f); + m_worldMax.setValue( 1.f, 1.f, 1.f); + wsize = m_worldMax - m_worldMin; + + m_cellSize[0] = m_cellSize[1] = m_cellSize[2] = m_particleRad * btScalar(2.f); + + m_simParams.m_worldMin[0] = m_worldMin[0]; + m_simParams.m_worldMin[1] = m_worldMin[1]; + m_simParams.m_worldMin[2] = m_worldMin[2]; + + m_simParams.m_worldMax[0] = m_worldMax[0]; + m_simParams.m_worldMax[1] = m_worldMax[1]; + m_simParams.m_worldMax[2] = m_worldMax[2]; + + m_simParams.m_cellSize[0] = m_cellSize[0]; + m_simParams.m_cellSize[1] = m_cellSize[1]; + m_simParams.m_cellSize[2] = m_cellSize[2]; + + m_simParams.m_gridSize[0] = (int)(wsize[0] / m_cellSize[0]); + m_simParams.m_gridSize[1] = (int)(wsize[1] / m_cellSize[1]); + m_simParams.m_gridSize[2] = (int)(wsize[2] / m_cellSize[2]); + m_numGridCells = m_simParams.m_gridSize[0] * m_simParams.m_gridSize[1] * m_simParams.m_gridSize[2]; + m_hCellStart.resize(m_numGridCells); + unsigned int memSize = sizeof(int) * m_numGridCells; + cl_int ciErrNum; + m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + +} + + +void btParticlesDynamicsWorld::grabSimulationData() +{ +// const btVector3& gravity = getGravity(); + btVector3 gravity(0., -0.06, 0.); + m_simParams.m_gravity[0] = gravity[0]; + m_simParams.m_gravity[1] = gravity[1]; + m_simParams.m_gravity[2] = gravity[2]; + m_simParams.m_particleRad = m_particleRad; + m_simParams.m_globalDamping = 1.0f; + m_simParams.m_boundaryDamping = -0.5f; + +// m_simParams.m_collisionDamping = 0.02f; +// m_simParams.m_spring = 0.5f; +// m_simParams.m_shear = 0.1f; +// m_simParams.m_attraction = 0.0f; + m_simParams.m_collisionDamping = 0.02f; + m_simParams.m_spring = 0.5f; + m_simParams.m_shear = 0.1f; + m_simParams.m_attraction = 0.0f; + + + + // copy data to GPU + cl_int ciErrNum; + unsigned int memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = sizeof(btSimParams); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dSimParams, CL_TRUE, 0, memSize, &m_simParams, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = m_hashSize * sizeof(btInt2); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + + +void btParticlesDynamicsWorld::createVBO() +{ + // create buffer object + glGenBuffers(1, &m_vbo); + glBindBuffer(GL_ARRAY_BUFFER, m_vbo); + // positions of spheres + unsigned int memSize = sizeof(btVector3) * m_numParticles; + glBufferData(GL_ARRAY_BUFFER, memSize, 0, GL_DYNAMIC_DRAW); + // colors + GLuint vbo; + glGenBuffers(1, &vbo); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, memSize, 0, GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + m_colVbo = vbo; + // fill color buffer + glBindBufferARB(GL_ARRAY_BUFFER, m_colVbo); + float *data = (float*)glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + float *ptr = data; + for(int i = 0; i < m_numParticles; i++) + { + float t = i / (float)m_numParticles; + ptr[0] = 0.f; + ptr[1] = 1.f; + ptr[2] = 0.f; + ptr+=3; + *ptr++ = 1.0f; + } + glUnmapBufferARB(GL_ARRAY_BUFFER); + glBindBufferARB(GL_ARRAY_BUFFER, 0); +} + + + +void btParticlesDynamicsWorld::initCLKernels(int argc, char** argv) +{ + cl_int ciErrNum; + + if (!m_cxMainContext) + { +// m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); + m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext); + + btOclPrintDevInfo(m_cdDevice); + + // create a command-queue + m_cqCommandQue = clCreateCommandQueue(m_cxMainContext, m_cdDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + // Program Setup + size_t program_length; + char* fileName = "ParticlesOCL.cl"; + FILE * fp = fopen(fileName, "rb"); + char newFileName[512]; + + if (fp == NULL) + { + sprintf(newFileName,"..//%s",fileName); + fp = fopen(newFileName, "rb"); + if (fp) + fileName = newFileName; + } + + if (fp == NULL) + { + sprintf(newFileName,"Demos//ParticlesOpenCL//%s",fileName); + fp = fopen(newFileName, "rb"); + if (fp) + fileName = newFileName; + } + + if (fp == NULL) + { + sprintf(newFileName,"..//..//..//..//..//Demos//ParticlesOpenCL//%s",fileName); + fp = fopen(newFileName, "rb"); + if (fp) + fileName = newFileName; + else + { + printf("cannot find %s\n",newFileName); + exit(0); + } + } + +// char *source = oclLoadProgSource(".//Demos//SpheresGrid//SpheresGrid.cl", "", &program_length); + //char *source = btOclLoadProgSource(".//Demos//SpheresOpenCL//Shared//SpheresGrid.cl", "", &program_length); + + char *source = btOclLoadProgSource(fileName, "", &program_length); + if(source == NULL) + { + printf("ERROR : OpenCL can't load file %s\n", fileName); + } +// oclCHECKERROR (source == NULL, oclFALSE); + btAssert(source != NULL); + + // create the program + printf("OpenCL compiles %s ...", fileName); + m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&source, &program_length, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + free(source); + + // build the program + ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, "-I .", NULL, NULL); + if(ciErrNum != CL_SUCCESS) + { + // write out standard error +// oclLog(LOGBOTH | ERRORMSG, (double)ciErrNum, STDERROR); + // write out the build log and ptx, then exit + char cBuildLog[10240]; +// char* cPtx; +// size_t szPtxLength; + clGetProgramBuildInfo(m_cpProgram, btOclGetFirstDev(m_cxMainContext), CL_PROGRAM_BUILD_LOG, + sizeof(cBuildLog), cBuildLog, NULL ); +// oclGetProgBinary(m_cpProgram, oclGetFirstDev(m_cxMainContext), &cPtx, &szPtxLength); +// oclLog(LOGBOTH | CLOSELOG, 0.0, "\n\nLog:\n%s\n\n\n\n\nPtx:\n%s\n\n\n", cBuildLog, cPtx); + printf("\n\n%s\n\n\n", cBuildLog); + printf("Press ENTER key to terminate the program\n"); + getchar(); + exit(-1); + } + printf("OK\n"); + + // create the kernels + + postInitDeviceData(); + + initKernel(PARTICLES_KERNEL_COMPUTE_CELL_ID, "kComputeCellId"); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 1, sizeof(cl_mem), (void*) &m_dPos); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 2, sizeof(cl_mem), (void*) &m_dPosHash); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 3, sizeof(cl_mem), (void*) &m_dSimParams); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + initKernel(PARTICLES_KERNEL_INTEGRATE_MOTION, "kIntegrateMotion"); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 1, sizeof(cl_mem), (void *) &m_dPos); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 2, sizeof(cl_mem), (void *) &m_dVel); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 3, sizeof(cl_mem), (void *) &m_dSimParams); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + + initKernel(PARTICLES_KERNEL_CLEAR_CELL_START, "kClearCellStart"); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_CLEAR_CELL_START].m_kernel, 0, sizeof(int), (void *) &m_numGridCells); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_CLEAR_CELL_START].m_kernel, 1, sizeof(cl_mem), (void*) &m_dCellStart); + + initKernel(PARTICLES_KERNEL_FIND_CELL_START, "kFindCellStart"); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 1, sizeof(cl_mem), (void*) &m_dPosHash); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 2, sizeof(cl_mem), (void*) &m_dCellStart); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 3, sizeof(cl_mem), (void*) &m_dPos); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 4, sizeof(cl_mem), (void*) &m_dVel); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 5, sizeof(cl_mem), (void*) &m_dSortedPos); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 6, sizeof(cl_mem), (void*) &m_dSortedVel); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + initKernel(PARTICLES_KERNEL_COLLIDE_PARTICLES, "kCollideParticles"); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 1, sizeof(cl_mem), (void*) &m_dVel); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 2, sizeof(cl_mem), (void*) &m_dSortedPos); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 3, sizeof(cl_mem), (void*) &m_dSortedVel); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 4, sizeof(cl_mem), (void*) &m_dPosHash); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 5, sizeof(cl_mem), (void*) &m_dCellStart); + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 6, sizeof(cl_mem), (void*) &m_dSimParams); + + initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL, "kBitonicSortCellIdLocal"); + initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL_1, "kBitonicSortCellIdLocal1"); + initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL, "kBitonicSortCellIdMergeGlobal"); + initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL, "kBitonicSortCellIdMergeLocal"); +} + +static btInt4 cpu_getGridPos(btVector3& worldPos, btSimParams* pParams) +{ + btInt4 gridPos; + gridPos.x = (int)floor((worldPos[0] - pParams->m_worldMin[0]) / pParams->m_cellSize[0]); + gridPos.y = (int)floor((worldPos[1] - pParams->m_worldMin[1]) / pParams->m_cellSize[1]); + gridPos.z = (int)floor((worldPos[2] - pParams->m_worldMin[2]) / pParams->m_cellSize[2]); + return gridPos; +} + +static unsigned int cpu_getPosHash(btInt4& gridPos, btSimParams* pParams) +{ + btInt4 gridDim = *((btInt4*)(pParams->m_gridSize)); + if(gridPos.x < 0) gridPos.x = 0; + if(gridPos.x >= gridDim.x) gridPos.x = gridDim.x - 1; + if(gridPos.y < 0) gridPos.y = 0; + if(gridPos.y >= gridDim.y) gridPos.y = gridDim.y - 1; + if(gridPos.z < 0) gridPos.z = 0; + if(gridPos.z >= gridDim.z) gridPos.z = gridDim.z - 1; + unsigned int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; + return hash; +} + + + + +void btParticlesDynamicsWorld::runComputeCellIdKernel() +{ + cl_int ciErrNum; +#if 0 + if(m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID]->m_active) + { // CPU version + unsigned int memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + for(int index = 0; index < m_numParticles; index++) + { + btVector3 pos = m_hPos[index]; + btInt4 gridPos = cpu_getGridPos(pos, &m_simParams); + unsigned int hash = cpu_getPosHash(gridPos, &m_simParams); + m_hPosHash[index].x = hash; + m_hPosHash[index].y = index; + } + memSize = sizeof(btInt2) * m_numParticles; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else +#endif + { + BT_PROFILE("ComputeCellId"); + runKernelWithWorkgroupSize(PARTICLES_KERNEL_COMPUTE_CELL_ID, m_numParticles); + ciErrNum = clFinish(m_cqCommandQue); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +/* + // check + int memSize = sizeof(btInt2) * m_hashSize; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + memSize = sizeof(float) * 4 * m_numParticles; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +*/ + + { + BT_PROFILE("Copy VBO"); + // Explicit Copy (until OpenGL interop will work) + // map the PBO to copy data from the CL buffer via host + glBindBufferARB(GL_ARRAY_BUFFER, m_vbo); + // map the buffer object into client's memory + void* ptr = glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, sizeof(float) * 4 * m_numParticles, ptr, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + glUnmapBufferARB(GL_ARRAY_BUFFER); + glBindBufferARB(GL_ARRAY_BUFFER,0); + } +} + + + +static btVector3 cpu_collideTwoParticles( + btVector3& posA, + btVector3& posB, + btVector3& velA, + btVector3& velB, + float radiusA, + float radiusB, + float spring, + float damping, + float shear, + float attraction +) +{ + //Calculate relative position + btVector3 relPos = posB - posA; relPos[3] = 0.f; + float dist = sqrt(relPos[0] * relPos[0] + relPos[1] * relPos[1] + relPos[2] * relPos[2]); + float collideDist = radiusA + radiusB; + + btVector3 force = btVector3(0, 0, 0); + if(dist < collideDist) + { + btVector3 norm = relPos / dist; + + //Relative velocity + btVector3 relVel = velB - velA; relVel[3] = 0.f;; + + //Relative tangential velocity + float relVelDotNorm = relVel.dot(norm); + btVector3 tanVel = relVel - relVelDotNorm * norm; + //Spring force (potential) + float springFactor = -spring * (collideDist - dist); + force = springFactor * norm + damping * relVel + shear * tanVel + attraction * relPos; + } + return force; +} + + +void btParticlesDynamicsWorld::runCollideParticlesKernel() +{ + cl_int ciErrNum; + if(m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES]->m_active) + { // CPU version + int memSize = sizeof(btVector3) * m_numParticles; + { + BT_PROFILE("Copy from GPU"); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = sizeof(btInt2) * m_numParticles; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + memSize = m_numGridCells * sizeof(int); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + for(int index = 0; index < m_numParticles; index++) + { + btVector3 posA = m_hSortedPos[index]; + btVector3 velA = m_hSortedVel[index]; + btVector3 force = btVector3(0, 0, 0); + float particleRad = m_simParams.m_particleRad; + float collisionDamping = m_simParams.m_collisionDamping; + float spring = m_simParams.m_spring; + float shear = m_simParams.m_shear; + float attraction = m_simParams.m_attraction; + int unsortedIndex = m_hPosHash[index].y; + //Get address in grid + btInt4 gridPosA = cpu_getGridPos(posA, &m_simParams); + //Accumulate surrounding cells + btInt4 gridPosB; + for(int z = -1; z <= 1; z++) + { + gridPosB.z = gridPosA.z + z; + for(int y = -1; y <= 1; y++) + { + gridPosB.y = gridPosA.y + y; + for(int x = -1; x <= 1; x++) + { + gridPosB.x = gridPosA.x + x; + //Get start particle index for this cell + unsigned int hashB = cpu_getPosHash(gridPosB, &m_simParams); + int startI = m_hCellStart[hashB]; + //Skip empty cell + if(startI < 0) + { + continue; + } + //Iterate over particles in this cell + int endI = startI + 8; + for(int j = startI; j < endI; j++) + { + unsigned int hashC = m_hPosHash[j].x; + if(hashC != hashB) + { + break; + } + if(j == index) + { + continue; + } + btVector3 posB = m_hSortedPos[j]; + btVector3 velB = m_hSortedVel[j]; + //Collide two spheres + force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, + spring, collisionDamping, shear, attraction); + } + } + } + } + //Write new velocity back to original unsorted location + m_hVel[unsortedIndex] = velA + force; + } + memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { + runKernelWithWorkgroupSize(PARTICLES_KERNEL_COLLIDE_PARTICLES, m_numParticles); + cl_int ciErrNum = clFinish(m_cqCommandQue); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +} + + +void btParticlesDynamicsWorld::runIntegrateMotionKernel() +{ + cl_int ciErrNum; + if(m_useCpuControls[SIMSTAGE_INTEGRATE_MOTION]->m_active) + { + // CPU version +#if 1 + // read from GPU + unsigned int memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + for(int index = 0; index < m_numParticles; index++) + { + btVector3 pos = m_hPos[index]; + btVector3 vel = m_hVel[index]; + pos[3] = 1.0f; + vel[3] = 0.0f; + // apply gravity + btVector3 gravity = *((btVector3*)(m_simParams.m_gravity)); + float particleRad = m_simParams.m_particleRad; + float globalDamping = m_simParams.m_globalDamping; + float boundaryDamping = m_simParams.m_boundaryDamping; + vel += gravity * m_timeStep; + vel *= globalDamping; + // integrate position + pos += vel * m_timeStep; + // collide with world boundaries + btVector3 worldMin = *((btVector3*)(m_simParams.m_worldMin)); + btVector3 worldMax = *((btVector3*)(m_simParams.m_worldMax)); + for(int j = 0; j < 3; j++) + { + if(pos[j] < (worldMin[j] + particleRad)) + { + pos[j] = worldMin[j] + particleRad; + vel[j] *= boundaryDamping; + } + if(pos[j] > (worldMax[j] - particleRad)) + { + pos[j] = worldMax[j] - particleRad; + vel[j] *= boundaryDamping; + } + } + // write back position and velocity + m_hPos[index] = pos; + m_hVel[index] = vel; + } +#endif + // write back to GPU + memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { + // Set work size and execute the kernel + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 4, sizeof(float), &m_timeStep); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + runKernelWithWorkgroupSize(PARTICLES_KERNEL_INTEGRATE_MOTION, m_numParticles); + ciErrNum = clFinish(m_cqCommandQue); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +} + +void btParticlesDynamicsWorld::runSortHashKernel() +{ + cl_int ciErrNum; + int memSize = m_numParticles * sizeof(btInt2); + if(m_useCpuControls[SIMSTAGE_SORT_CELL_ID]->m_active) + { + // CPU version + // get hash from GPU + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + // sort + class btHashPosKey + { + public: + unsigned int hash; + unsigned int index; + void quickSort(btHashPosKey* pData, int lo, int hi) + { + int i=lo, j=hi; + btHashPosKey x = pData[(lo+hi)/2]; + do + { + while(pData[i].hash < x.hash) i++; + while(x.hash < pData[j].hash) j--; + if(i <= j) + { + btHashPosKey t = pData[i]; + pData[i] = pData[j]; + pData[j] = t; + i++; j--; + } + } while(i <= j); + if(lo < j) pData->quickSort(pData, lo, j); + if(i < hi) pData->quickSort(pData, i, hi); + } + void bitonicSort(btHashPosKey* pData, int lo, int n, bool dir) + { + if(n > 1) + { + int m = n / 2; + bitonicSort(pData, lo, m, !dir); + bitonicSort(pData, lo + m, n - m, dir); + bitonicMerge(pData, lo, n, dir); + } + } + void bitonicMerge(btHashPosKey* pData, int lo, int n, bool dir) + { + if(n > 1) + { + int m = greatestPowerOfTwoLessThan(n); + for(int i = lo; i < (lo + n - m); i++) + { + compare(pData, i, i + m, dir); + } + bitonicMerge(pData, lo, m, dir); + bitonicMerge(pData, lo + m, n - m, dir); + } + } + void compare(btHashPosKey* pData, int i, int j, bool dir) + { + if(dir == (pData[i].hash > pData[j].hash)) + { + btHashPosKey t = pData[i]; + pData[i] = pData[j]; + pData[j] = t; + } + } + int greatestPowerOfTwoLessThan(int n) + { + int k = 1; + while(k < n) + { + k = k << 1; + } + return k>>1; + } + }; + btHashPosKey* pHash = (btHashPosKey*)(&m_hPosHash[0]); + pHash->quickSort(pHash, 0, m_numParticles - 1); + // pHash->bitonicSort(pHash, 0, m_hashSize, true); + // write back to GPU + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { + // bitonic sort on GPU (shared memory) + int dir = 1; + bitonicSortNv(m_dPosHash, 1, m_hashSize, dir); + ciErrNum = clFinish(m_cqCommandQue); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +#if 0 + // check order + memSize = m_numParticles * sizeof(btInt2); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + for(int i = 1; i < m_hashSize; i++) + { + if(m_hPosHash[i-1].x > m_hPosHash[i].x) + { + printf("Hash sort error at %d\n", i); + } + } +#endif +} + + +void btParticlesDynamicsWorld::runFindCellStartKernel() +{ + cl_int ciErrNum; + if(m_useCpuControls[SIMSTAGE_FIND_CELL_START]->m_active) + { + // CPU version + // get hash from GPU + int memSize = m_numParticles * sizeof(btInt2); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + // clear cells + for(int i = 0; i < m_numGridCells; i++) + { + m_hCellStart[i] = -1; + } + // find start of each cell in sorted hash + btInt2 hash = m_hPosHash[0]; + m_hCellStart[hash.x] = 0; + int unsortedIndex = hash.y; + btVector3 pos = m_hPos[unsortedIndex]; + btVector3 vel = m_hVel[unsortedIndex]; + m_hSortedPos[0] = pos; + m_hSortedVel[0] = vel; + for(int i = 1; i < m_numParticles; i++) + { + if(m_hPosHash[i-1].x != m_hPosHash[i].x) + { + m_hCellStart[m_hPosHash[i].x] = i; + } + unsortedIndex = m_hPosHash[i].y; + pos = m_hPos[unsortedIndex]; + vel = m_hVel[unsortedIndex]; + m_hSortedPos[i] = pos; + m_hSortedVel[i] = vel; + } + // write back to GPU + memSize = m_numGridCells * sizeof(int); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + memSize = sizeof(btVector3) * m_numParticles; + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { // GPU + runKernelWithWorkgroupSize(PARTICLES_KERNEL_CLEAR_CELL_START, m_numGridCells); + runKernelWithWorkgroupSize(PARTICLES_KERNEL_FIND_CELL_START, m_numParticles); + ciErrNum = clFinish(m_cqCommandQue); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } +} + + +void btParticlesDynamicsWorld::initKernel(int kernelId, char* pName) +{ + + cl_int ciErrNum; + cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + size_t wgSize; + ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); +// if (wgSize > 64) +// wgSize = 64; + m_kernels[kernelId].m_Id = kernelId; + m_kernels[kernelId].m_kernel = kernel; + m_kernels[kernelId].m_name = pName; + m_kernels[kernelId].m_workgroupSize = wgSize; + + return; +} + +void btParticlesDynamicsWorld::runKernelWithWorkgroupSize(int kernelId, int globalSize) +{ + if(globalSize <= 0) + { + return; + } + cl_kernel kernelFunc = m_kernels[kernelId].m_kernel; + cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + int workgroupSize = m_kernels[kernelId].m_workgroupSize; + if(workgroupSize <= 0) + { // let OpenCL library calculate workgroup size + size_t globalWorkSize[2]; + globalWorkSize[0] = globalSize; + globalWorkSize[1] = 1; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 ); + } + else + { + size_t localWorkSize[2], globalWorkSize[2]; + workgroupSize = btMin(workgroupSize, globalSize); + int num_t = globalSize / workgroupSize; + int num_g = num_t * workgroupSize; + if(num_g < globalSize) + { + num_t++; + } + localWorkSize[0] = workgroupSize; + globalWorkSize[0] = num_t * workgroupSize; + localWorkSize[1] = 1; + globalWorkSize[1] = 1; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 ); + } + oclCHECKERROR(ciErrNum, CL_SUCCESS); +} + + +//Note: logically shared with BitonicSort OpenCL code! +// TODO : get parameter from OpenCL and pass it to kernel (needed for platforms other than NVIDIA) +//static const unsigned int LOCAL_SIZE_LIMIT = 1024U; + +void btParticlesDynamicsWorld::bitonicSortNv(cl_mem pKey, unsigned int batch, unsigned int arrayLength, unsigned int dir) +{ + unsigned int localSizeLimit = m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL].m_workgroupSize * 2; + if(arrayLength < 2) + return; + //Only power-of-two array lengths are supported so far + dir = (dir != 0); + cl_int ciErrNum; + size_t localWorkSize, globalWorkSize; + if(arrayLength <= localSizeLimit) + { + btAssert( (batch * arrayLength) % localSizeLimit == 0); + //Launch bitonicSortLocal + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL].m_kernel, 0, sizeof(cl_mem), (void *)&pKey); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL].m_kernel, 1, sizeof(cl_uint), (void *)&arrayLength); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL].m_kernel, 2, sizeof(cl_uint), (void *)&dir); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + localWorkSize = localSizeLimit / 2; + globalWorkSize = batch * arrayLength / 2; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL].m_kernel, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { + //Launch bitonicSortLocal1 + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL_1].m_kernel, 0, sizeof(cl_mem), (void *)&pKey); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + localWorkSize = localSizeLimit / 2; + globalWorkSize = batch * arrayLength / 2; + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL_1].m_kernel, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + for(unsigned int size = 2 * localSizeLimit; size <= arrayLength; size <<= 1) + { + for(unsigned stride = size / 2; stride > 0; stride >>= 1) + { + if(stride >= localSizeLimit) + { + //Launch bitonicMergeGlobal + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 0, sizeof(cl_mem), (void *)&pKey); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 1, sizeof(cl_uint), (void *)&arrayLength); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 2, sizeof(cl_uint), (void *)&size); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 3, sizeof(cl_uint), (void *)&stride); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 4, sizeof(cl_uint), (void *)&dir); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + localWorkSize = localSizeLimit / 4; + globalWorkSize = batch * arrayLength / 2; + + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL].m_kernel, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + } + else + { + //Launch bitonicMergeLocal + ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 0, sizeof(cl_mem), (void *)&pKey); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 1, sizeof(cl_uint), (void *)&arrayLength); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 2, sizeof(cl_uint), (void *)&stride); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 3, sizeof(cl_uint), (void *)&size); + ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 4, sizeof(cl_uint), (void *)&dir); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + localWorkSize = localSizeLimit / 2; + globalWorkSize = batch * arrayLength / 2; + + ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_kernels[PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL].m_kernel, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + break; + } + } + } + } +} + diff --git a/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h b/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h new file mode 100644 index 000000000..8a3be82f7 --- /dev/null +++ b/Demos/ParticlesOpenCL/btParticlesDynamicsWorld.h @@ -0,0 +1,181 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#ifndef BT_PARTICLES_DYNAMICS_WORLD_H +#define BT_PARTICLES_DYNAMICS_WORLD_H + + +#ifdef USE_MINICL +#include +#include +#else +#ifdef __APPLE__ + #include +#else + #include + #include +#endif __APPLE__ +#endif + + + + + + +#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h" +#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h" +#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h" + +#include "btParticlesSharedDefs.h" +#include "btParticlesSharedTypes.h" + +#define PARTICLES_MAX_PARTICLES (65536) +#define PARTICLES_MAX_NEIGHBORS (32) +#define DEF_PARTICLE_RADIUS (0.023f) + +enum +{ + PARTICLES_KERNEL_INTEGRATE_MOTION = 0, + PARTICLES_KERNEL_COMPUTE_CELL_ID, + PARTICLES_KERNEL_CLEAR_CELL_START, + PARTICLES_KERNEL_FIND_CELL_START, + PARTICLES_KERNEL_COLLIDE_PARTICLES, + PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL, + PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL_1, + PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL, + PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL, + PARTICLES_KERNEL_TOTAL +}; + + +enum +{ + SIMSTAGE_NONE = 0, + SIMSTAGE_INTEGRATE_MOTION, + SIMSTAGE_COMPUTE_CELL_ID, + SIMSTAGE_SORT_CELL_ID, + SIMSTAGE_FIND_CELL_START, + SIMSTAGE_COLLIDE_PARTICLES, + SIMSTAGE_TOTAL +}; + +struct btKernelInfo +{ + int m_Id; + cl_kernel m_kernel; + char* m_name; + int m_workgroupSize; +}; + +class btParticlesDynamicsWorld : public btDiscreteDynamicsWorld +{ +public: + int m_numParticles; + int m_usedDevice; + btScalar m_particleRad; + struct GL_ToggleControl* m_useCpuControls[SIMSTAGE_TOTAL]; + +protected: + int m_hashSize; // power of 2 >= m_numSpheres; + int m_numGridCells; + int m_maxNeighbors; + int m_numSolverIterations; + // CPU side data +public: + btAlignedObjectArray m_hPos; + btAlignedObjectArray m_hVel; + btAlignedObjectArray m_hSortedPos; + btAlignedObjectArray m_hSortedVel; +protected: + btAlignedObjectArray m_hPosHash; + btAlignedObjectArray m_hCellStart; + // GPU side data + cl_mem m_dPos; + cl_mem m_dVel; + cl_mem m_dPosHash; + cl_mem m_dCellStart; + cl_mem m_dSimParams; // copy of m_simParams : global simulation paramerers such as gravity, etc. + cl_mem m_dSortedPos; + cl_mem m_dSortedVel; + // OpenCL +public: + cl_context m_cxMainContext; + cl_device_id m_cdDevice; + cl_command_queue m_cqCommandQue; + cl_program m_cpProgram; +protected: + btKernelInfo m_kernels[PARTICLES_KERNEL_TOTAL]; + + btVector3 m_cellSize; + +public: + btVector3 m_worldMin; + btVector3 m_worldMax; + // vbo variables + GLuint m_vbo; + unsigned int m_posVbo; + unsigned int m_colVbo; + btSimParams m_simParams; + float m_timeStep; + + int getNumParticles() { return m_numParticles; } + float* getPosBuffer() { return (float*)&(m_hPos[0]); } + + + btParticlesDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration, + int maxObjs = PARTICLES_MAX_PARTICLES, int maxNeighbors = PARTICLES_MAX_NEIGHBORS) + : btDiscreteDynamicsWorld(dispatcher, pairCache, constraintSolver, collisionConfiguration) + { + m_cxMainContext = 0; + m_usedDevice = 1; +// m_particleRad = btScalar(0.5f); + m_particleRad = DEF_PARTICLE_RADIUS; + m_simParams.m_gravity[0] = 0.f; + m_simParams.m_gravity[1] = -10.f; + m_simParams.m_gravity[2] = 0.f; + m_simParams.m_gravity[3] = 0.f; + m_numSolverIterations = 4; + } + virtual ~btParticlesDynamicsWorld(); + virtual int stepSimulation( btScalar timeStep,int maxSubSteps=1, btScalar fixedTimeStep=btScalar(1.)/btScalar(60.)); + + void initDeviceData(); + void initCLKernels(int argc, char** argv); + void createVBO(); + void postInitDeviceData(); + void getShapeData(); + void allocateBuffers(); + void grabSimulationData(); + void adjustGrid(); + void runIntegrateMotionKernel(); + void runComputeCellIdKernel(); + void runSortHashKernel(); + void runFindCellStartKernel(); + void runCollideParticlesKernel(); + + void initKernel(int kernelId, char* pName); + void runKernelWithWorkgroupSize(int kernelId, int globalSize); + void bitonicSortNv(cl_mem pKey, unsigned int batch, unsigned int arrayLength, unsigned int dir); + + void scanExclusiveLocal1(cl_mem d_Dst, cl_mem d_Src, unsigned int n, unsigned int size); + void scanExclusiveLocal2(cl_mem d_Buffer, cl_mem d_Dst, cl_mem d_Src, unsigned int n, unsigned int size); + void uniformUpdate(cl_mem d_Dst, cl_mem d_Buffer, unsigned int n); + void scanExclusive(cl_mem d_Dst, cl_mem d_Src, unsigned int arrayLength); + +}; + + +#endif //BT_PARTICLES_DYNAMICS_WORLD_H diff --git a/Demos/ParticlesOpenCL/btParticlesSharedDefs.h b/Demos/ParticlesOpenCL/btParticlesSharedDefs.h new file mode 100644 index 000000000..247075b43 --- /dev/null +++ b/Demos/ParticlesOpenCL/btParticlesSharedDefs.h @@ -0,0 +1,14 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ diff --git a/Demos/ParticlesOpenCL/btParticlesSharedTypes.h b/Demos/ParticlesOpenCL/btParticlesSharedTypes.h new file mode 100644 index 000000000..8ee8690f3 --- /dev/null +++ b/Demos/ParticlesOpenCL/btParticlesSharedTypes.h @@ -0,0 +1,54 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +#ifndef BT_SPHERES_GRID_DEMO_SHARED_TYPES +#define BT_SPHERES_GRID_DEMO_SHARED_TYPES + +struct btSimParams +{ + float m_gravity[4]; + float m_worldMin[4]; + float m_worldMax[4]; + float m_cellSize[4]; + int m_gridSize[4]; + + float m_particleRad; + float m_globalDamping; + float m_boundaryDamping; + float m_collisionDamping; + + float m_spring; + float m_shear; + float m_attraction; + float m_dummy; +}; + +struct btInt2 +{ + int x; + int y; +}; + +struct btInt4 +{ + int x; + int y; + int z; + int w; +}; + + + +#endif + diff --git a/Demos/ParticlesOpenCL/main.cpp b/Demos/ParticlesOpenCL/main.cpp new file mode 100644 index 000000000..037bba7e1 --- /dev/null +++ b/Demos/ParticlesOpenCL/main.cpp @@ -0,0 +1,51 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "ParticlesDemo.h" +#include "GlutStuff.h" +#include "GLDebugDrawer.h" + +#include "btBulletDynamicsCommon.h" +#include "LinearMath/btHashMap.h" + + +// standard utility and system includes +//#include +// Extra CL/GL include +//#include + + +GLDebugDrawer gDebugDrawer; + +int main(int argc,char** argv) +{ + // start logs +// oclSetLogFileName ("appSpheresGrid.txt"); +// oclLog(LOGBOTH, 0.0, "appSpheresGrid.exe Starting...\n\n"); + + + ParticlesDemo ccdDemo(argc, argv); + ccdDemo.initPhysics(); + ccdDemo.getDynamicsWorld()->setDebugDrawer(&gDebugDrawer); + +#ifdef CHECK_MEMORY_LEAKS + ccdDemo.exitPhysics(); +#else + return glutmain(argc, argv,640,480,"Bullet Physics Demo. http://bulletphysics.com",&ccdDemo); +#endif + + //default glut doesn't return from mainloop + return 0; +} \ No newline at end of file diff --git a/Demos/ParticlesOpenCL/shaders.cpp b/Demos/ParticlesOpenCL/shaders.cpp new file mode 100644 index 000000000..644aa8147 --- /dev/null +++ b/Demos/ParticlesOpenCL/shaders.cpp @@ -0,0 +1,53 @@ +#define STRINGIFY(A) #A + +// vertex shader +const char *vertexShader = STRINGIFY( +uniform float pointRadius; // point size in world space +uniform float pointScale; // scale to calculate size in pixels +uniform float densityScale; +uniform float densityOffset; +varying vec3 posEye; +void main() +{ + // calculate window-space point size + posEye = vec3(gl_ModelViewMatrix * vec4(gl_Vertex.xyz, 1.0)); + float dist = length(posEye); + gl_PointSize = pointRadius * (pointScale / dist); +// gl_PointSize = 4.0; + + gl_TexCoord[0] = gl_MultiTexCoord0; + gl_Position = gl_ModelViewProjectionMatrix * vec4(gl_Vertex.xyz, 1.0); + + gl_FrontColor = gl_Color; +} +); + +// pixel shader for rendering points as shaded spheres +const char *spherePixelShader = STRINGIFY( +uniform float pointRadius; // point size in world space +varying vec3 posEye; // position of center in eye space +void main() +{ + const vec3 lightDir = vec3(0.577, 0.577, 0.577); + const float shininess = 40.0; + + // calculate normal from texture coordinates + vec3 N; + N.xy = gl_TexCoord[0].xy*vec2(2.0, -2.0) + vec2(-1.0, 1.0); + float mag = dot(N.xy, N.xy); + if (mag > 1.0) discard; // kill pixels outside circle + N.z = sqrt(1.0-mag); + + // point on surface of sphere in eye space + vec3 spherePosEye = posEye + N*pointRadius; + + // calculate lighting + float diffuse = max(0.0, dot(lightDir, N)); +// gl_FragColor = gl_Color * diffuse; + + vec3 v = normalize(-spherePosEye); + vec3 h = normalize(lightDir + v); + float specular = pow(max(0.0, dot(N, h)), shininess); + gl_FragColor = gl_Color * diffuse + specular; +} +); diff --git a/Demos/ParticlesOpenCL/shaders.h b/Demos/ParticlesOpenCL/shaders.h new file mode 100644 index 000000000..d90b40cef --- /dev/null +++ b/Demos/ParticlesOpenCL/shaders.h @@ -0,0 +1,2 @@ +extern const char *vertexShader; +extern const char *spherePixelShader; diff --git a/Demos/SharedOpenCL/btOclCommon.cpp b/Demos/SharedOpenCL/btOclCommon.cpp new file mode 100644 index 000000000..d412ef3c6 --- /dev/null +++ b/Demos/SharedOpenCL/btOclCommon.cpp @@ -0,0 +1,95 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include + +#include "btOclCommon.h" + + +static char* spPlatformVendor = +#if defined(CL_PLATFORM_MINI_CL) +"MiniCL, SCEA"; +#elif defined(CL_PLATFORM_AMD) +"Advanced Micro Devices, Inc."; +#elif defined(CL_PLATFORM_NVIDIA) +"NVIDIA Corporation"; +#else +"Unknown Vendor"; +#endif + + + + +cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum) +{ + cl_uint numPlatforms; + cl_platform_id platform = NULL; + cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) *pErrNum = ciErrNum; + return NULL; + } + if(numPlatforms > 0) + { + cl_platform_id* platforms = new cl_platform_id[numPlatforms]; + ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) *pErrNum = ciErrNum; + return NULL; + } + for (unsigned i = 0; i < numPlatforms; ++i) + { + char pbuf[128]; + ciErrNum = clGetPlatformInfo( platforms[i], + CL_PLATFORM_VENDOR, + sizeof(pbuf), + pbuf, + NULL); + if(ciErrNum != CL_SUCCESS) + { + if(pErrNum != NULL) *pErrNum = ciErrNum; + return NULL; + } + platform = platforms[i]; + if(!strcmp(pbuf, spPlatformVendor)) + { + break; + } + } + delete[] platforms; + } + /* + * If we could find our platform, use it. Otherwise pass a NULL and get whatever the + * implementation thinks we should be using. + */ + cl_context_properties cps[3] = + { + CL_CONTEXT_PLATFORM, + (cl_context_properties)platform, + 0 + }; + /* Use NULL for backward compatibility */ + cl_context_properties* cprops = (NULL == platform) ? NULL : cps; + cl_context retContext = clCreateContextFromType(cprops, + CL_DEVICE_TYPE_ALL, + NULL, + NULL, + &ciErrNum); + if(pErrNum != NULL) *pErrNum = ciErrNum; + return retContext; +} + diff --git a/Demos/SharedOpenCL/btOclCommon.h b/Demos/SharedOpenCL/btOclCommon.h new file mode 100644 index 000000000..a4baa1dcb --- /dev/null +++ b/Demos/SharedOpenCL/btOclCommon.h @@ -0,0 +1,42 @@ +/* +Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org +Copyright (C) 2006 - 2010 Sony Computer Entertainment Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef BTOCLCOMMON_H +#define BTOCLCOMMON_H + +#ifdef __APPLE__ +#ifdef USE_MINICL + #include +#else + #include +#endif +#else + #ifdef USE_MINICL + #include + #else + #include + #endif +#endif //__APPLE__ + + +class btOclCommon +{ +public: + static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum); +}; + + + +#endif // BTOCLCOMMON_H \ No newline at end of file diff --git a/Demos/SharedOpenCL/btOclUtils.cpp b/Demos/SharedOpenCL/btOclUtils.cpp new file mode 100644 index 000000000..bca361449 --- /dev/null +++ b/Demos/SharedOpenCL/btOclUtils.cpp @@ -0,0 +1,330 @@ +#include +#include +#include + + + + +#include "btOclUtils.h" + + + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the nth device from the context +//! +//! @return the id or -1 when out of range +//! @param cxMainContext OpenCL context +//! @param device_idx index of the device of interest +////////////////////////////////////////////////////////////////////////////// +cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr) +{ + size_t szParmDataBytes; + cl_device_id* cdDevices; + + // get the list of GPU devices associated with context + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); + + if( szParmDataBytes / sizeof(cl_device_id) < nr ) { + return (cl_device_id)-1; + } + + cdDevices = (cl_device_id*) malloc(szParmDataBytes); + + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); + + cl_device_id device = cdDevices[nr]; + free(cdDevices); + + return device; +} + + + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of device with maximal FLOPS from the context +//! +//! @return the id +//! @param cxMainContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext) +{ + size_t szParmDataBytes; + cl_device_id* cdDevices; + + // get the list of GPU devices associated with context + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); + cdDevices = (cl_device_id*) malloc(szParmDataBytes); + size_t device_count = szParmDataBytes / sizeof(cl_device_id); + + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); + + cl_device_id max_flops_device = cdDevices[0]; + int max_flops = 0; + + size_t current_device = 0; + + // CL_DEVICE_MAX_COMPUTE_UNITS + cl_uint compute_units; + clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); + + // CL_DEVICE_MAX_CLOCK_FREQUENCY + cl_uint clock_frequency; + clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); + + max_flops = compute_units * clock_frequency; + ++current_device; + + while( current_device < device_count ) + { + // CL_DEVICE_MAX_COMPUTE_UNITS + cl_uint compute_units; + clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); + + // CL_DEVICE_MAX_CLOCK_FREQUENCY + cl_uint clock_frequency; + clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); + + int flops = compute_units * clock_frequency; + if( flops > max_flops ) + { + max_flops = flops; + max_flops_device = cdDevices[current_device]; + } + ++current_device; + } + + free(cdDevices); + + return max_flops_device; +} + + +////////////////////////////////////////////////////////////////////////////// +//! Loads a Program file and prepends the cPreamble to the code. +//! +//! @return the source string if succeeded, 0 otherwise +//! @param cFilename program filename +//! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header +//! @param szFinalLength returned length of the code string +////////////////////////////////////////////////////////////////////////////// +char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) +{ + // locals + FILE* pFileStream = NULL; + size_t szSourceLength; + + // open the OpenCL source code file + #ifdef _WIN32 // Windows version + if(fopen_s(&pFileStream, cFilename, "rb") != 0) + { + return NULL; + } + #else // Linux version + pFileStream = fopen(cFilename, "rb"); + if(pFileStream == 0) + { + return NULL; + } + #endif + + size_t szPreambleLength = strlen(cPreamble); + + // get the length of the source code + fseek(pFileStream, 0, SEEK_END); + szSourceLength = ftell(pFileStream); + fseek(pFileStream, 0, SEEK_SET); + + // allocate a buffer for the source code string and read it in + char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); + memcpy(cSourceString, cPreamble, szPreambleLength); + fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); + + // close the file and return the total length of the combined (preamble + source) string + fclose(pFileStream); + if(szFinalLength != 0) + { + *szFinalLength = szSourceLength + szPreambleLength; + } + cSourceString[szSourceLength + szPreambleLength] = '\0'; + + return cSourceString; +} + + +////////////////////////////////////////////////////////////////////////////// +//! Gets the id of the first device from the context +//! +//! @return the id +//! @param cxMainContext OpenCL context +////////////////////////////////////////////////////////////////////////////// +cl_device_id btOclGetFirstDev(cl_context cxMainContext) +{ + size_t szParmDataBytes; + cl_device_id* cdDevices; + + // get the list of GPU devices associated with context + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); + cdDevices = (cl_device_id*) malloc(szParmDataBytes); + + clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); + + cl_device_id first = cdDevices[0]; + free(cdDevices); + + return first; +} + + + +////////////////////////////////////////////////////////////////////////////// +//! Print info about the device +//! +//! @param device OpenCL id of the device +////////////////////////////////////////////////////////////////////////////// +void btOclPrintDevInfo(cl_device_id device) +{ + char device_string[1024]; + bool nv_device_attibute_query = false; + + // CL_DEVICE_NAME + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); + printf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string); + + // CL_DEVICE_VENDOR + clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL); + printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); + + // CL_DRIVER_VERSION + clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL); + printf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string); + + // CL_DEVICE_INFO + cl_device_type type; + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); + if( type & CL_DEVICE_TYPE_CPU ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); + if( type & CL_DEVICE_TYPE_GPU ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); + if( type & CL_DEVICE_TYPE_ACCELERATOR ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); + if( type & CL_DEVICE_TYPE_DEFAULT ) + printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); + + // CL_DEVICE_MAX_COMPUTE_UNITS + cl_uint compute_units; + clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); + printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); + + // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS + size_t workitem_dims; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL); + printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims); + + // CL_DEVICE_MAX_WORK_ITEM_SIZES + size_t workitem_size[3]; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); + printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); + + // CL_DEVICE_MAX_WORK_GROUP_SIZE + size_t workgroup_size; + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL); + printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size); + + // CL_DEVICE_MAX_CLOCK_FREQUENCY + cl_uint clock_frequency; + clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); + printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); + + // CL_DEVICE_ADDRESS_BITS + cl_uint addr_bits; + clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL); + printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); + + // CL_DEVICE_MAX_MEM_ALLOC_SIZE + cl_ulong max_mem_alloc_size; + clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); + printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); + + // CL_DEVICE_GLOBAL_MEM_SIZE + cl_ulong mem_size; + clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); + printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); + + // CL_DEVICE_ERROR_CORRECTION_SUPPORT + cl_bool error_correction_support; + clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL); + printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); + + // CL_DEVICE_LOCAL_MEM_TYPE + cl_device_local_mem_type local_mem_type; + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL); + printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); + + // CL_DEVICE_LOCAL_MEM_SIZE + clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); + printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); + + // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE + clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL); + printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); + + // CL_DEVICE_QUEUE_PROPERTIES + cl_command_queue_properties queue_properties; + clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL); + if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) + printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); + if( queue_properties & CL_QUEUE_PROFILING_ENABLE ) + printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); + + // CL_DEVICE_IMAGE_SUPPORT + cl_bool image_support; + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); + printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); + + // CL_DEVICE_MAX_READ_IMAGE_ARGS + cl_uint max_read_image_args; + clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL); + printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); + + // CL_DEVICE_MAX_WRITE_IMAGE_ARGS + cl_uint max_write_image_args; + clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL); + printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); + + // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH + size_t szMaxDims[5]; + printf("\n CL_DEVICE_IMAGE "); + clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL); + printf("\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]); + clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL); + printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL); + printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL); + printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]); + clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL); + printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]); + + // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines + clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL); + if (device_string != 0) + { + printf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string); + } + else + { + printf(" CL_DEVICE_EXTENSIONS: None\n"); + } + + // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ + printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); + cl_uint vec_width [6]; + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL); + clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL); + printf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n\n", + vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4]); +} diff --git a/Demos/SharedOpenCL/btOclUtils.h b/Demos/SharedOpenCL/btOclUtils.h new file mode 100644 index 000000000..fba65d8c6 --- /dev/null +++ b/Demos/SharedOpenCL/btOclUtils.h @@ -0,0 +1,24 @@ + +#ifndef BT_OCL_UTILS_H +#define BT_OCL_UTILS_H + +#ifdef USE_MINICL + #include +#else + #ifdef __APPLE__ + #include + #else + #include + #endif __APPLE__ +#endif + +//#define oclCHECKERROR(a, b) btAssert((a) == (b)) +#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); btAssert((a) == (b)); } + + +void btOclPrintDevInfo(cl_device_id device); +cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr); +cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext); +char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); +cl_device_id btOclGetFirstDev(cl_context cxMainContext); +#endif //BT_OCL_UTILS_H diff --git a/Demos/ThreadingDemo/CMakeLists.txt b/Demos/ThreadingDemo/CMakeLists.txt new file mode 100644 index 000000000..2eb9afe7a --- /dev/null +++ b/Demos/ThreadingDemo/CMakeLists.txt @@ -0,0 +1,45 @@ +# This is basically the overall name of the project in Visual Studio this is the name of the Solution File + + +# For every executable you have with a main method you should have an add_executable line below. +# For every add executable line you should list every .cpp and .h file you have associated with that executable. + + +# This is the variable for Windows. I use this to define the root of my directory structure. +SET(GLUT_ROOT ${BULLET_PHYSICS_SOURCE_DIR}/Glut) + +# You shouldn't have to modify anything below this line +######################################################## + +#currently this demo has only been tested under Windows 32bit +IF (WIN32) + +INCLUDE_DIRECTORIES( + ${BULLET_PHYSICS_SOURCE_DIR}/src ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL + ${VECTOR_MATH_INCLUDE} +) + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport BulletMultiThreaded BulletDynamics BulletCollision LinearMath ${GLUT_glut_LIBRARY} ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY} + ) + + IF (WIN32) + ADD_EXECUTABLE(AppThreadingDemo + main.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/msvc/bullet.rc + ) + ELSE() + ADD_EXECUTABLE(AppThreadingDemo + main.cpp + ) + ENDIF() +ENDIF (USE_GLUT) + +IF (INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) + SET_TARGET_PROPERTIES(AppThreadingDemo PROPERTIES DEBUG_POSTFIX "_Debug") + SET_TARGET_PROPERTIES(AppThreadingDemo PROPERTIES MINSIZEREL_POSTFIX "_MinsizeRel") + SET_TARGET_PROPERTIES(AppThreadingDemo PROPERTIES RELWITHDEBINFO_POSTFIX "_RelWithDebugInfo") +ENDIF(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) + +ENDIF(WIN32) diff --git a/Demos/ThreadingDemo/main.cpp b/Demos/ThreadingDemo/main.cpp new file mode 100644 index 000000000..5ee0b4e4d --- /dev/null +++ b/Demos/ThreadingDemo/main.cpp @@ -0,0 +1,123 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2010 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +/// ThreadingDemo shows how to use the cross platform thread support interface. +/// You can start threads and perform a blocking wait for completion +/// Under Windows it uses Win32 Threads. On Mac and Linux it uses pthreads. On PlayStation 3 Cell SPU it uses SPURS. + +/// June 2010 +/// New: critical section/barriers and non-blocking pollingn for completion, currently Windows only + +#include "BulletMultiThreaded/Win32ThreadSupport.h" + +struct SampleArgs +{ + btCriticalSection* m_cs; +}; + +struct SampleThreadLocalStorage +{ + int threadId; +}; + + +void SampleThreadFunc(void* userPtr,void* lsMemory) +{ + SampleThreadLocalStorage* localStorage = (SampleThreadLocalStorage*) lsMemory; + + SampleArgs* args = (SampleArgs*) userPtr; + int workLeft = true; + while (workLeft) + { + args->m_cs->lock(); + int count = args->m_cs->getSharedParam(0); + args->m_cs->setSharedParam(0,count-1); + args->m_cs->unlock(); + if (count>0) + { + printf("thread %d processed number %d\n",localStorage->threadId, count); + } + workLeft = count>0; + } + printf("finished\n"); + //do nothing +} + + +void* SamplelsMemoryFunc() +{ + //don't create local store memory, just return 0 + return new SampleThreadLocalStorage; +} + + + +int main(int argc,char** argv) +{ + int numThreads = 4; + + Win32ThreadSupport::Win32ThreadConstructionInfo threadConstructionInfo("testThreads",SampleThreadFunc,SamplelsMemoryFunc,numThreads); + + Win32ThreadSupport* threadSupport = new Win32ThreadSupport(threadConstructionInfo); + + threadSupport->startSPU(); + + for (int i=0;igetNumTasks();i++) + { + SampleThreadLocalStorage* storage = (SampleThreadLocalStorage*)threadSupport->getThreadLocalMemory(i); + storage->threadId = i; + } + + + SampleArgs args; + args.m_cs = threadSupport->createCriticalSection(); + args.m_cs->setSharedParam(0,100); + + + unsigned int arg0,arg1; + int i; + for (i=0;isendRequest(1, (ppu_address_t) &args, i); + } + + bool blockingWait = false; + if (blockingWait) + { + for (i=0;iwaitForResponse(&arg0,&arg1); + } + } else + { + int numActiveThreads = numThreads; + while (numActiveThreads) + { + if (threadSupport->isTaskCompleted(&arg0,&arg1,0)) + { + numActiveThreads--; + printf("numActiveThreads = %d\n",numActiveThreads); + + } else + { + printf("polling\n"); + } + }; + } + + threadSupport->stopSPU(); + delete threadSupport; + return 0; +} \ No newline at end of file diff --git a/src/BulletMultiThreaded/Win32ThreadSupport.cpp b/src/BulletMultiThreaded/Win32ThreadSupport.cpp index 815ad3268..1197bbe0f 100644 --- a/src/BulletMultiThreaded/Win32ThreadSupport.cpp +++ b/src/BulletMultiThreaded/Win32ThreadSupport.cpp @@ -176,6 +176,53 @@ void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned in } +///check for messages from SPUs +bool Win32ThreadSupport::isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds) +{ + ///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response + + ///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback' + + + btAssert(m_activeSpuStatus.size()); + + int last = -1; +#ifndef SINGLE_THREADED + DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, timeOutInMilliseconds); + + if ((res != STATUS_TIMEOUT) && (res != WAIT_FAILED)) + { + + btAssert(res != WAIT_FAILED); + last = res - WAIT_OBJECT_0; + + btSpuStatus& spuStatus = m_activeSpuStatus[last]; + btAssert(spuStatus.m_threadHandle); + btAssert(spuStatus.m_eventCompletetHandle); + + //WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE); + btAssert(spuStatus.m_status > 1); + spuStatus.m_status = 0; + + ///need to find an active spu + btAssert(last>=0); + + #else + last=0; + btSpuStatus& spuStatus = m_activeSpuStatus[last]; + #endif //SINGLE_THREADED + + + + *puiArgument0 = spuStatus.m_taskId; + *puiArgument1 = spuStatus.m_status; + + return true; + } + + return false; +} + void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo) { @@ -259,4 +306,141 @@ void Win32ThreadSupport::stopSPU() } + + +class btWin32Barrier : public btBarrier +{ +private: + CRITICAL_SECTION mExternalCriticalSection; + CRITICAL_SECTION mLocalCriticalSection; + HANDLE mRunEvent,mNotifyEvent; + int mCounter,mEnableCounter; + int mMaxCount; + +public: + btWin32Barrier() + { + mCounter = 0; + mMaxCount = 1; + mEnableCounter = 0; + InitializeCriticalSection(&mExternalCriticalSection); + InitializeCriticalSection(&mLocalCriticalSection); + mRunEvent = CreateEvent(NULL,TRUE,FALSE,NULL); + mNotifyEvent = CreateEvent(NULL,TRUE,FALSE,NULL); + } + + virtual ~btWin32Barrier() + { + DeleteCriticalSection(&mExternalCriticalSection); + DeleteCriticalSection(&mLocalCriticalSection); + CloseHandle(mRunEvent); + CloseHandle(mNotifyEvent); + } + + void sync() + { + int eventId; + + EnterCriticalSection(&mExternalCriticalSection); + + //PFX_PRINTF("enter taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter); + + if(mEnableCounter > 0) { + ResetEvent(mNotifyEvent); + LeaveCriticalSection(&mExternalCriticalSection); + WaitForSingleObject(mNotifyEvent,INFINITE); + EnterCriticalSection(&mExternalCriticalSection); + } + + eventId = mCounter; + mCounter++; + + if(eventId == mMaxCount-1) { + SetEvent(mRunEvent); + + mEnableCounter = mCounter-1; + mCounter = 0; + } + else { + ResetEvent(mRunEvent); + LeaveCriticalSection(&mExternalCriticalSection); + WaitForSingleObject(mRunEvent,INFINITE); + EnterCriticalSection(&mExternalCriticalSection); + mEnableCounter--; + } + + if(mEnableCounter == 0) { + SetEvent(mNotifyEvent); + } + + //PFX_PRINTF("leave taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter); + + LeaveCriticalSection(&mExternalCriticalSection); + } + + virtual void setMaxCount(int n) {mMaxCount = n;} + virtual int getMaxCount() {return mMaxCount;} +}; + +class btWin32CriticalSection : public btCriticalSection +{ +private: + CRITICAL_SECTION mCriticalSection; + +public: + btWin32CriticalSection() + { + InitializeCriticalSection(&mCriticalSection); + } + + ~btWin32CriticalSection() + { + DeleteCriticalSection(&mCriticalSection); + } + + unsigned int getSharedParam(int i) + { + btAssert(i>=0&&i<31); + return mCommonBuff[i+1]; + } + + void setSharedParam(int i,unsigned int p) + { + btAssert(i>=0&&i<31); + mCommonBuff[i+1] = p; + } + + void lock() + { + EnterCriticalSection(&mCriticalSection); + mCommonBuff[0] = 1; + } + + void unlock() + { + mCommonBuff[0] = 0; + LeaveCriticalSection(&mCriticalSection); + } +}; + + +btBarrier* Win32ThreadSupport::createBarrier() +{ + unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32Barrier),16); + btWin32Barrier* barrier = new(mem) btWin32Barrier(); + barrier->setMaxCount(getNumTasks()); + return barrier; +} + +btCriticalSection* Win32ThreadSupport::createCriticalSection() +{ + unsigned char* mem = (unsigned char*) btAlignedAlloc(sizeof(btWin32CriticalSection),16); + btWin32CriticalSection* cs = new(mem) btWin32CriticalSection(); + return cs; +} + + + #endif //USE_WIN32_THREADING + + diff --git a/src/BulletMultiThreaded/Win32ThreadSupport.h b/src/BulletMultiThreaded/Win32ThreadSupport.h index c61ad901c..be5561ccd 100644 --- a/src/BulletMultiThreaded/Win32ThreadSupport.h +++ b/src/BulletMultiThreaded/Win32ThreadSupport.h @@ -30,10 +30,6 @@ typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory); typedef void* (*Win32lsMemorySetupFunc)(); - - - - ///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication class Win32ThreadSupport : public btThreadSupportInterface { @@ -109,6 +105,8 @@ public: ///check for messages from SPUs virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1); + virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds); + ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) virtual void startSPU(); @@ -125,6 +123,14 @@ public: return m_maxNumTasks; } + virtual void* getThreadLocalMemory(int taskId) + { + return m_activeSpuStatus[taskId].m_lsMemory; + } + virtual btBarrier* createBarrier(); + + virtual btCriticalSection* createCriticalSection(); + }; #endif //WIN32_THREAD_SUPPORT_H diff --git a/src/BulletMultiThreaded/btThreadSupportInterface.h b/src/BulletMultiThreaded/btThreadSupportInterface.h index 730ffa9ea..eb5c9bbe4 100644 --- a/src/BulletMultiThreaded/btThreadSupportInterface.h +++ b/src/BulletMultiThreaded/btThreadSupportInterface.h @@ -17,10 +17,35 @@ subject to the following restrictions: #define THREAD_SUPPORT_INTERFACE_H -//#include //for uint32_t etc. +#include //for ATTRIBUTE_ALIGNED16 #include "PlatformDefinitions.h" #include "PpuAddressSpace.h" +class btBarrier { +public: + btBarrier() {} + virtual ~btBarrier() {} + + virtual void sync() = 0; + virtual void setMaxCount(int n) = 0; + virtual int getMaxCount() = 0; +}; + +class btCriticalSection { +public: + btCriticalSection() {} + virtual ~btCriticalSection() {} + + ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]); + + virtual unsigned int getSharedParam(int i) = 0; + virtual void setSharedParam(int i,unsigned int p) = 0; + + virtual void lock() = 0; + virtual void unlock() = 0; +}; + + class btThreadSupportInterface { public: @@ -33,6 +58,10 @@ public: ///check for messages from SPUs virtual void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0; + + ///non-blocking test if a task is completed. First implement all versions, and then enable this API + ///virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds)=0; + ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) virtual void startSPU() =0; @@ -44,6 +73,10 @@ public: virtual int getNumTasks() const = 0; + virtual btBarrier* createBarrier() = 0; + + virtual btCriticalSection* createCriticalSection() = 0; + }; #endif //THREAD_SUPPORT_INTERFACE_H diff --git a/src/MiniCL/cl_MiniCL_Defs.h b/src/MiniCL/cl_MiniCL_Defs.h index cad0b2590..ffdac1026 100644 --- a/src/MiniCL/cl_MiniCL_Defs.h +++ b/src/MiniCL/cl_MiniCL_Defs.h @@ -28,6 +28,9 @@ subject to the following restrictions: #define get_local_size(a) (gMiniCLNumOutstandingTasks) #define get_group_id(a) ((__guid_arg) / gMiniCLNumOutstandingTasks) +static unsigned int as_uint(float val) { return *((unsigned int*)&val); } + + #define CLK_LOCAL_MEM_FENCE 0x01 #define CLK_GLOBAL_MEM_FENCE 0x02 @@ -36,7 +39,8 @@ static void barrier(unsigned int a) // TODO : implement } -ATTRIBUTE_ALIGNED16(struct) float8 +//ATTRIBUTE_ALIGNED16(struct) float8 +struct float8 { float s0; float s1; @@ -53,7 +57,8 @@ ATTRIBUTE_ALIGNED16(struct) float8 } }; -ATTRIBUTE_ALIGNED16(struct) float4 +//ATTRIBUTE_ALIGNED16(struct) float4 +struct float4 { float x,y,z,w; float4() {} diff --git a/src/MiniCL/cl_gl.h b/src/MiniCL/cl_gl.h index 71bdaaa6e..0a69d6ecb 100644 --- a/src/MiniCL/cl_gl.h +++ b/src/MiniCL/cl_gl.h @@ -27,7 +27,7 @@ #ifdef __APPLE__ #include #else -#include +#include #endif #ifdef __cplusplus