diff --git a/Demos/DX11ClothDemo/cloth_renderer.cpp b/Demos/DX11ClothDemo/cloth_renderer.cpp index a0bbb665e..afd9db299 100644 --- a/Demos/DX11ClothDemo/cloth_renderer.cpp +++ b/Demos/DX11ClothDemo/cloth_renderer.cpp @@ -31,7 +31,7 @@ class btDX11SIMDAwareSoftBodySolver; #include "BulletSoftBody/btSoftBodySolvers.h" #include "BulletSoftBody/btDefaultSoftBodySolver.h" -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" + #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h" diff --git a/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt index 9fb579210..a4d3b5b7b 100644 --- a/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt +++ b/Demos/OpenCLClothDemo/MiniCL/CMakeLists.txt @@ -21,7 +21,6 @@ IF (USE_GLUT) LINK_LIBRARIES( OpenGLSupport BulletSoftBodySolvers_OpenCL_Mini - BulletSoftBodySolvers_CPU MiniCL BulletMultiThreaded BulletSoftBody diff --git a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp index e8c713569..8a354e37b 100644 --- a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp +++ b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp @@ -18,15 +18,18 @@ subject to the following restrictions: #endif + #ifndef USE_MINICL -#define USE_SIMDAWARE_SOLVER -#ifndef __APPLE__ +//#define USE_SIMDAWARE_SOLVER +#endif + +#if !defined (__APPLE__) #define USE_GPU_SOLVER -#if defined (_WIN32) +#if defined (_WIN32) && !defined(USE_MINICL) #define USE_GPU_COPY //only tested on Windows -#endif //_WIN32 -#endif //__APPLE__ -#endif //USE_MINICL +#endif //_WIN32 && !USE_MINICL +#endif //!__APPLE__ + @@ -43,13 +46,7 @@ const int numFlags = 5; const int clothWidth = 40; const int clothHeight = 60;//60; float _windAngle = 1.0;//0.4; -float _windStrength = 10.; - - - - - - +float _windStrength = 0.; @@ -57,7 +54,6 @@ float _windStrength = 10.; #include "LinearMath/btHashMap.h" #include "BulletSoftBody/btSoftRigidDynamicsWorld.h" #include "vectormath/vmInclude.h" -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h" #include "BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h" @@ -95,7 +91,6 @@ btCollisionDispatcher* m_dispatcher; btConstraintSolver* m_solver; btDefaultCollisionConfiguration* m_collisionConfiguration; -btCPUSoftBodySolver *g_cpuSolver = NULL; btOpenCLSoftBodySolver *g_openCLSolver = NULL; btOpenCLSoftBodySolverSIMDAware *g_openCLSIMDSolver = NULL; @@ -368,9 +363,8 @@ void initBullet(void) #endif // #ifdef USE_GPU_COPY #endif #else - g_cpuSolver = new btCPUSoftBodySolver; - g_solver = g_cpuSolver; - g_softBodyOutput = new btSoftBodySolverOutputCPUtoCPU; + g_openCLSolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext ); + g_solver = g_openCLSolver; #endif //m_collisionConfiguration = new btDefaultCollisionConfiguration(); @@ -471,11 +465,11 @@ void initBullet(void) #endif -#ifdef USE_GPU_SOLVER +//#ifdef USE_GPU_SOLVER createFlag( *g_openCLSolver, clothWidth, clothHeight, m_flags ); -#else - createFlag( *g_cpuSolver, clothWidth, clothHeight, m_flags ); -#endif +//#else + +//#endif // Create output buffer descriptions for ecah flag // These describe where the simulation should send output data to @@ -544,7 +538,7 @@ void doFlags() //debugDraw.setDebugMode(btIDebugDraw::DBG_DrawWireframe); //g_solver->copyBackToSoftBodies(); - //m_dynamicsWorld->debugDrawWorld(); + m_dynamicsWorld->debugDrawWorld(); } @@ -607,8 +601,6 @@ int main(int argc, char *argv[]) goGL(); - if( g_cpuSolver ) - delete g_cpuSolver; if( g_openCLSolver ) delete g_openCLSolver; if( g_openCLSIMDSolver ) diff --git a/Demos/SerializeDemo/AMD/CMakeLists.txt b/Demos/SerializeDemo/AMD/CMakeLists.txt new file mode 100644 index 000000000..38ae8501f --- /dev/null +++ b/Demos/SerializeDemo/AMD/CMakeLists.txt @@ -0,0 +1,131 @@ + + +INCLUDE_DIRECTORIES( +${BULLET_PHYSICS_SOURCE_DIR}/src +${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL +${BULLET_PHYSICS_SOURCE_DIR}/Extras/Serialize/BulletFileLoader +${BULLET_PHYSICS_SOURCE_DIR}/Extras/Serialize/BulletWorldImporter +${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL +${AMD_OPENCL_INCLUDES} +) + +ADD_DEFINITIONS(-DDESERIALIZE_SOFT_BODIES) +ADD_DEFINITIONS(-DUSE_AMD_OPENCL) +ADD_DEFINITIONS(-DCL_PLATFORM_AMD) +IF (CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY + ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64.lib ) +ELSE(CMAKE_CL_64) + SET(CMAK_GLEW_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32.lib ) +ENDIF(CMAKE_CL_64) + + +IF (USE_GLUT) + LINK_LIBRARIES( + OpenGLSupport + BulletWorldImporter + BulletSoftBody + BulletDynamics + BulletCollision + BulletFileLoader + LinearMath + BulletSoftBodySolvers_OpenCL_AMD + BulletMultiThreaded + ${GLUT_glut_LIBRARY} + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAKE_ATISTREAMSDK_LIBPATH}/OpenCL.lib + ) + + IF (WIN32) + ADD_EXECUTABLE(AppSerializeDemo_AMD + ../main.cpp + ../SerializeDemo.cpp + ../SerializeDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/msvc/bullet.rc + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/clew.c + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/clew.h + + ) + ELSE() + ADD_EXECUTABLE(AppSerializeDemo_AMD + ../main.cpp + ../SerializeDemo.cpp + ../SerializeDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/clew.c + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/clew.h + ) + ENDIF() + IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + IF (WIN32) + IF (CMAKE_CL_64) + ADD_CUSTOM_COMMAND( + TARGET AppSerializeDemo_AMD + POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/glut64.dll ${CMAKE_CURRENT_BINARY_DIR} + ) + ELSE(CMAKE_CL_64) + ADD_CUSTOM_COMMAND( + TARGET AppSerializeDemo_AMD + POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/GLUT32.DLL ${CMAKE_CURRENT_BINARY_DIR} + ) + ENDIF(CMAKE_CL_64) + ENDIF(WIN32) + ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) + +ELSE (USE_GLUT) + + LINK_LIBRARIES( + OpenGLSupport + BulletWorldImporter + BulletSoftBody + BulletDynamics + BulletCollision + BulletFileLoader + LinearMath + BulletSoftBodySolvers_OpenCL_AMD + BulletMultiThreaded + ${OPENGL_gl_LIBRARY} + ${OPENGL_glu_LIBRARY} + ${CMAK_GLEW_LIBRARY} + ${CMAKE_ATISTREAMSDK_LIBPATH}/OpenCL.lib + ) + + ADD_EXECUTABLE(AppSerializeDemo_AMD + WIN32 + ../../OpenGL/Win32AppMain.cpp + ../Win32SerializeDemo.cpp + ../SerializeDemo.cpp + ../SerializeDemo.h + ${BULLET_PHYSICS_SOURCE_DIR}/msvc/bullet.rc + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.h + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclUtils.cpp + ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SharedOpenCL/btOclCommon.cpp + ) +ENDIF (USE_GLUT) + +IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES AND NOT INTERNAL_UPDATE_SERIALIZATION_STRUCTURES) + ADD_CUSTOM_COMMAND( + TARGET AppSerializeDemo_AMD + POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${BULLET_PHYSICS_SOURCE_DIR}/Demos/SerializeDemo/testFile.bullet ${CMAKE_CURRENT_BINARY_DIR}/testFile.bullet + ) +ENDIF () + + +IF (INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) + SET_TARGET_PROPERTIES(AppSerializeDemo_AMD PROPERTIES DEBUG_POSTFIX "_Debug") + SET_TARGET_PROPERTIES(AppSerializeDemo_AMD PROPERTIES MINSIZEREL_POSTFIX "_MinsizeRel") + SET_TARGET_PROPERTIES(AppSerializeDemo_AMD PROPERTIES RELWITHDEBINFO_POSTFIX "_RelWithDebugInfo") +ENDIF(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) diff --git a/Demos/SerializeDemo/AMD/premake4.lua b/Demos/SerializeDemo/AMD/premake4.lua new file mode 100644 index 000000000..2f59b49e5 --- /dev/null +++ b/Demos/SerializeDemo/AMD/premake4.lua @@ -0,0 +1,65 @@ + + hasCL = findOpenCL_AMD() + + if (hasCL) then + + project "AppOpenCLClothDemo_AMD" + + defines { "USE_AMD_OPENCL","CL_PLATFORM_AMD"} + + initOpenCL_AMD() + + language "C++" + + kind "ConsoleApp" + targetdir "../../.." + + libdirs {"../../../Glut"} + + links { + "LinearMath", + "BulletCollision", + "BulletDynamics", + "BulletSoftBody", + "BulletSoftBodySolvers_OpenCL_AMD", + "opengl32" + } + + configuration "x64" + links { + "glut64", + "glew64" + } + configuration "x32" + links { + "glut32", + "glew32" + } + + configuration{} + + + includedirs { + "../../../src", + "../../../Glut", + "../../SharedOpenCL", + "../../OpenGL" + } + + files { + "../cl_cloth_demo.cpp", + "../../SharedOpenCL/btOclUtils.h", + "../../SharedOpenCL/btOclCommon.h", + "../../SharedOpenCL/btOclUtils.cpp", + "../../SharedOpenCL/btOclCommon.cpp", + "../../OpenGL/GLDebugDrawer.cpp", + "../../OpenGL/stb_image.cpp", + "../../OpenGL/stb_image.h", + "../gl_win.cpp", + "../clstuff.cpp", + "../clstuff.h", + "../gl_win.h", + "../cloth.h" + } + + end \ No newline at end of file diff --git a/Demos/SerializeDemo/CMakeLists.txt b/Demos/SerializeDemo/CMakeLists.txt index c4e96ceca..b03175073 100644 --- a/Demos/SerializeDemo/CMakeLists.txt +++ b/Demos/SerializeDemo/CMakeLists.txt @@ -11,6 +11,10 @@ SET(GLUT_ROOT ${BULLET_PHYSICS_SOURCE_DIR}/Glut) # You shouldn't have to modify anything below this line ######################################################## +IF(BUILD_AMD_OPENCL_DEMOS) + SUBDIRS(AMD) +ENDIF() + INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src ${BULLET_PHYSICS_SOURCE_DIR}/Demos/OpenGL @@ -86,4 +90,6 @@ IF (INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) SET_TARGET_PROPERTIES(AppSerializeDemo PROPERTIES DEBUG_POSTFIX "_Debug") SET_TARGET_PROPERTIES(AppSerializeDemo PROPERTIES MINSIZEREL_POSTFIX "_MinsizeRel") SET_TARGET_PROPERTIES(AppSerializeDemo PROPERTIES RELWITHDEBINFO_POSTFIX "_RelWithDebugInfo") -ENDIF(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) \ No newline at end of file +ENDIF(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES) + + diff --git a/Demos/SerializeDemo/SerializeDemo.cpp b/Demos/SerializeDemo/SerializeDemo.cpp index bf1d6d43b..8772f4616 100644 --- a/Demos/SerializeDemo/SerializeDemo.cpp +++ b/Demos/SerializeDemo/SerializeDemo.cpp @@ -51,12 +51,26 @@ subject to the following restrictions: #ifdef DESERIALIZE_SOFT_BODIES +#include "BulletSoftBody/btSoftBodySolvers.h" + + +#ifdef USE_AMD_OPENCL + #include + #include + extern cl_context g_cxMainContext; + extern cl_device_id g_cdDevice; + extern cl_command_queue g_cqCommandQue; +#endif + +btSoftBodySolver* fSoftBodySolver=0; + #include "BulletSoftBody/btSoftBodyHelpers.h" #include "BulletSoftBody/btSoftRigidDynamicsWorld.h" #include "BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h" #endif + void SerializeDemo::clientMoveAndDisplay() { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -69,8 +83,27 @@ void SerializeDemo::clientMoveAndDisplay() { m_dynamicsWorld->stepSimulation(ms / 1000000.f); - //optional but useful: debug drawing + + if (fSoftBodySolver) + fSoftBodySolver->copyBackToSoftBodies(); + m_dynamicsWorld->debugDrawWorld(); + + if (m_dynamicsWorld->getWorldType()==BT_SOFT_RIGID_DYNAMICS_WORLD) + { + //optional but useful: debug drawing + btSoftRigidDynamicsWorld* softWorld = (btSoftRigidDynamicsWorld*)m_dynamicsWorld; + + for ( int i=0;igetSoftBodyArray().size();i++) + { + btSoftBody* psb=(btSoftBody*)softWorld->getSoftBodyArray()[i]; + if (softWorld->getDebugDrawer() && !(softWorld->getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe))) + { + btSoftBodyHelpers::DrawFrame(psb,softWorld->getDebugDrawer()); + btSoftBodyHelpers::Draw(psb,softWorld->getDebugDrawer(),softWorld->getDrawFlags()); + } + } + } } renderme(); @@ -87,6 +120,22 @@ void SerializeDemo::displayCallback(void) { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + if (m_dynamicsWorld->getWorldType()==BT_SOFT_RIGID_DYNAMICS_WORLD) + { + //optional but useful: debug drawing + btSoftRigidDynamicsWorld* softWorld = (btSoftRigidDynamicsWorld*)m_dynamicsWorld; + + for ( int i=0;igetSoftBodyArray().size();i++) + { + btSoftBody* psb=(btSoftBody*)softWorld->getSoftBodyArray()[i]; + if (softWorld->getDebugDrawer() && !(softWorld->getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe))) + { + btSoftBodyHelpers::DrawFrame(psb,softWorld->getDebugDrawer()); + btSoftBodyHelpers::Draw(psb,softWorld->getDebugDrawer(),softWorld->getDrawFlags()); + } + } + } + renderme(); //optional but useful: debug drawing to detect problems @@ -97,7 +146,12 @@ void SerializeDemo::displayCallback(void) { swapBuffers(); } - +enum SolverType +{ + kSolverAccelerationOpenCL_CPU = 1, + kSolverAccelerationOpenCL_GPU = 2, + kSolverAccelerationNone = 3 +}; void SerializeDemo::setupEmptyDynamicsWorld() @@ -123,7 +177,62 @@ void SerializeDemo::setupEmptyDynamicsWorld() m_solver = sol; #ifdef DESERIALIZE_SOFT_BODIES - btSoftRigidDynamicsWorld* world = new btSoftRigidDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration); + + + + #ifdef USE_AMD_OPENCL + + int solverAccel = kSolverAccelerationOpenCL_GPU; + + if ( 1 ) { + switch (solverAccel) + { + case kSolverAccelerationOpenCL_GPU: + { + fSoftBodySolver + = new btOpenCLSoftBodySolverSIMDAware( g_cqCommandQue, + g_cxMainContext ); + // fSoftBodySolver = new btOpenCLSoftBodySolver( g_cqCommandQue, g_cxMainContext); + + /*if (!fSoftBodySolver->checkInitialized()) + { + btAssert(0); + delete fSoftBodySolver; + fSoftBodySolver = NULL; + } + */ + + break; + } + case kSolverAccelerationOpenCL_CPU: + { + //fSoftBodySolver = new btCPUSoftBodySolver(); + break; + }; + case kSolverAccelerationNone: + default: + { + fSoftBodySolver = NULL; + } + }; + } + else + { + if ( solverAccel != kSolverAccelerationNone ) + { + } + else + { + } + fSoftBodySolver = NULL; + } +#else + + fSoftBodySolver = NULL; +#endif + + btSoftRigidDynamicsWorld* world = new btSoftRigidDynamicsWorld(m_dispatcher, m_broadphase, m_solver, + m_collisionConfiguration, fSoftBodySolver); m_dynamicsWorld = world; //world->setDrawFlags(world->getDrawFlags()^fDrawFlags::Clusters); #else @@ -510,6 +619,7 @@ SerializeDemo::~SerializeDemo() void SerializeDemo::initPhysics() { + m_idle = true; setTexturing(true); setShadows(true); diff --git a/Demos/SerializeDemo/main.cpp b/Demos/SerializeDemo/main.cpp index d19b0aa8f..9a8e6d798 100644 --- a/Demos/SerializeDemo/main.cpp +++ b/Demos/SerializeDemo/main.cpp @@ -19,69 +19,93 @@ subject to the following restrictions: #include "btBulletDynamicsCommon.h" #include "LinearMath/btHashMap.h" -class OurValue - { - int m_uid; - public: - OurValue(const btVector3& initialPos) - :m_position(initialPos) - { - static int gUid=0; - m_uid=gUid; - gUid++; - } - btVector3 m_position; - int getUid() const - { - return m_uid; - } - }; +#ifdef USE_AMD_OPENCL + +#ifdef _DEBUG + bool bDebug = true; +#else + bool bDebug = false; +#endif + + +#include "btOclCommon.h" +#include "btOclUtils.h" +#include + +cl_context g_cxMainContext; +cl_device_id g_cdDevice; +cl_command_queue g_cqCommandQue; + + +// Returns true if OpenCL is initialized properly, false otherwise. +bool initCL( void* glCtx, void* glDC ) +{ + int ciErrNum = 0; + +#ifdef BT_USE_CLEW + ciErrNum = clewInit( "OpenCL.dll" ); + if ( ciErrNum != CLEW_SUCCESS ) { + return false; + } +#endif + +#if defined(CL_PLATFORM_MINI_CL) + cl_device_type deviceType = CL_DEVICE_TYPE_CPU; +#elif defined(CL_PLATFORM_AMD) + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; +#elif defined(CL_PLATFORM_NVIDIA) + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; +#else + cl_device_type deviceType = CL_DEVICE_TYPE_CPU; +#endif + + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); + //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum); + //try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel +//#ifdef USE_MINICL +// g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum); +//#else + g_cxMainContext = btOclCommon::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); +//#endif + + oclCHECKERROR(ciErrNum, CL_SUCCESS); + g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext); + + if ( bDebug ) { + btOclPrintDevInfo(g_cdDevice); + } + + // create a command-queue + g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + + return true; +} + +#endif //#ifdef USE_AMD_OPENCL int main(int argc,char** argv) { GLDebugDrawer gDebugDrawer; +#ifdef USE_AMD_OPENCL + bool initialized = initCL(0,0); + btAssert(initialized); +#endif //USE_AMD_OPENCL - ///testing the btHashMap - btHashMap,OurValue> map; - OurValue value1(btVector3(2,3,4)); - btHashKey key1(value1.getUid()); - map.insert(key1,value1); - - - OurValue value2(btVector3(5,6,7)); - btHashKey key2(value2.getUid()); - map.insert(key2,value2); - - - { - OurValue value3(btVector3(7,8,9)); - btHashKey key3(value3.getUid()); - map.insert(key3,value3); - } - - - map.remove(key2); - -// const OurValue* ourPtr = map.find(key1); -// for (int i=0;im_position.getX(),tmp->m_position.getY(),tmp->m_position.getZ()); -// } - - SerializeDemo ccdDemo; - ccdDemo.initPhysics(); - ccdDemo.getDynamicsWorld()->setDebugDrawer(&gDebugDrawer); + SerializeDemo serializeDemo; + serializeDemo.initPhysics(); + serializeDemo.getDynamicsWorld()->setDebugDrawer(&gDebugDrawer); #ifdef CHECK_MEMORY_LEAKS - ccdDemo.exitPhysics(); + serializeDemo.exitPhysics(); #else - return glutmain(argc, argv,640,480,"Bullet Physics Demo. http://bulletphysics.org",&ccdDemo); + return glutmain(argc, argv,640,480,"Bullet Physics Demo. http://bulletphysics.org",&serializeDemo); #endif //default glut doesn't return from mainloop diff --git a/Demos/SerializeDemo/testFile.bullet b/Demos/SerializeDemo/testFile.bullet index 81134e276..3a7e6a5bd 100644 Binary files a/Demos/SerializeDemo/testFile.bullet and b/Demos/SerializeDemo/testFile.bullet differ diff --git a/Demos/SharedOpenCL/btOclCommon.cpp b/Demos/SharedOpenCL/btOclCommon.cpp index c00059cbf..ed8180f6d 100644 --- a/Demos/SharedOpenCL/btOclCommon.cpp +++ b/Demos/SharedOpenCL/btOclCommon.cpp @@ -18,7 +18,7 @@ subject to the following restrictions: #include "btOclCommon.h" -static const char* spPlatformVendor = +static char* spPlatformVendor = #if defined(CL_PLATFORM_MINI_CL) "MiniCL, SCEA"; #elif defined(CL_PLATFORM_INTEL) @@ -37,6 +37,7 @@ static const char* spPlatformVendor = #endif //_WIN32 #endif + cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC ) { cl_uint numPlatforms; @@ -91,8 +92,9 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* 0, 0 }; -#ifndef CL_PLATFORM_MINI_CL +#ifndef CL_PLATFORM_MINI_CL #ifdef _WIN32 +#ifndef BT_USE_CLEW // If we have a gl context then enable interop if( pGLContext ) { @@ -101,7 +103,8 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* cps[4] = CL_WGL_HDC_KHR; cps[5] = (cl_context_properties)pGLDC; } -#endif +#endif // DONT_USE_CLEW +#endif //_WIN32 #endif //CL_PLATFORM_MINI_CL /* Use NULL for backward compatibility */ @@ -115,3 +118,4 @@ cl_context btOclCommon::createContextFromType(cl_device_type deviceType, cl_int* return retContext; } + diff --git a/Demos/SharedOpenCL/btOclCommon.h b/Demos/SharedOpenCL/btOclCommon.h index 42a222d9f..7248adb24 100644 --- a/Demos/SharedOpenCL/btOclCommon.h +++ b/Demos/SharedOpenCL/btOclCommon.h @@ -16,20 +16,7 @@ subject to the following restrictions: #ifndef BTOCLCOMMON_H #define BTOCLCOMMON_H -#ifdef __APPLE__ -#ifdef USE_MINICL - #include -#else - #include -#endif -#else - #ifdef USE_MINICL - #include - #else - #include - #endif -#endif //__APPLE__ - +#include "btOclUtils.h" class btOclCommon { @@ -38,8 +25,9 @@ public: // to have to understand GL types. // It is a HGLRC in _WIN32 or a GLXContext otherwise. static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0); + }; -#endif // BTOCLCOMMON_H \ No newline at end of file +#endif // BTOCLCOMMON_H diff --git a/Demos/SharedOpenCL/btOclUtils.cpp b/Demos/SharedOpenCL/btOclUtils.cpp index f8a22f502..80918e982 100644 --- a/Demos/SharedOpenCL/btOclUtils.cpp +++ b/Demos/SharedOpenCL/btOclUtils.cpp @@ -13,11 +13,14 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ + #include #include #include +#define myprintf printf + #include "btOclUtils.h" @@ -56,92 +59,6 @@ cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr) -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of device with maximal FLOPS from the context -//! -//! @return the id -//! @param cxMainContext OpenCL context -////////////////////////////////////////////////////////////////////////////// -cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext) -{ - size_t szParmDataBytes; - cl_device_id* cdDevices; - - // get the list of GPU devices associated with context - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); - cdDevices = (cl_device_id*) malloc(szParmDataBytes); - size_t device_count = szParmDataBytes / sizeof(cl_device_id); - - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); - - cl_device_id max_flops_device = cdDevices[0]; - int max_flops = 0; - - size_t current_device = 0; - - // CL_DEVICE_MAX_COMPUTE_UNITS - cl_uint compute_units; - clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); - - // CL_DEVICE_MAX_CLOCK_FREQUENCY - cl_uint clock_frequency; - clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); - - cl_device_type device_type; - clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL); - - int SIMDmultiplier = 1; - - if( device_type == CL_DEVICE_TYPE_CPU ) - { - // For simplicity assume that the CPU is running single SSE instructions - // This will of course depend on the kernel - SIMDmultiplier = 4; - } else if( device_type == CL_DEVICE_TYPE_GPU ) { - // Approximation to GPU compute power - // As long as this beats the CPU number that's the important thing, really -#if defined(CL_PLATFORM_INTEL) - // SSE - 4, AVX1,2 - 8 : TODO: detect AVX? - SIMDmultiplier = 4; -#elif defined(CL_PLATFORM_AMD) - // 16 processing elements, 5 ALUs each - SIMDmultiplier = 80; -#elif defined(CL_PLATFORM_NVIDIA) - // 8 processing elements, dual issue - pre-Fermi at least - SIMDmultiplier = 16; -#else - SIMDmultiplier = 1; -#endif - } - - - max_flops = compute_units * clock_frequency * SIMDmultiplier; - ++current_device; - - while( current_device < device_count ) - { - // CL_DEVICE_MAX_COMPUTE_UNITS - cl_uint compute_units; - clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); - - // CL_DEVICE_MAX_CLOCK_FREQUENCY - cl_uint clock_frequency; - clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); - - int flops = compute_units * clock_frequency; - if( flops > max_flops ) - { - max_flops = flops; - max_flops_device = cdDevices[current_device]; - } - ++current_device; - } - - free(cdDevices); - - return max_flops_device; -} - ////////////////////////////////////////////////////////////////////////////// //! Loads a Program file and prepends the cPreamble to the code. @@ -221,140 +138,139 @@ cl_device_id btOclGetFirstDev(cl_context cxMainContext) void btOclPrintDevInfo(cl_device_id device) { char device_string[1024]; - bool nv_device_attibute_query = false; // CL_DEVICE_NAME clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); - printf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string); + myprintf(" CL_DEVICE_NAME: \t\t\t%s\n", device_string); // CL_DEVICE_VENDOR clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL); - printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); + myprintf(" CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); // CL_DRIVER_VERSION clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL); - printf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string); + myprintf(" CL_DRIVER_VERSION: \t\t\t%s\n", device_string); // CL_DEVICE_INFO cl_device_type type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if( type & CL_DEVICE_TYPE_CPU ) - printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); + myprintf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); if( type & CL_DEVICE_TYPE_GPU ) - printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); + myprintf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); if( type & CL_DEVICE_TYPE_ACCELERATOR ) - printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); + myprintf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); if( type & CL_DEVICE_TYPE_DEFAULT ) - printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); + myprintf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); - printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); + myprintf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS size_t workitem_dims; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL); - printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%d\n", workitem_dims); + myprintf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims); // CL_DEVICE_MAX_WORK_ITEM_SIZES size_t workitem_size[3]; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); - printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%d / %d / %d \n", workitem_size[0], workitem_size[1], workitem_size[2]); + myprintf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); // CL_DEVICE_MAX_WORK_GROUP_SIZE size_t workgroup_size; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL); - printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%d\n", workgroup_size); + myprintf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); - printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); + myprintf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); // CL_DEVICE_ADDRESS_BITS cl_uint addr_bits; clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL); - printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); + myprintf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); // CL_DEVICE_MAX_MEM_ALLOC_SIZE cl_ulong max_mem_alloc_size; clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); - printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); + myprintf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); // CL_DEVICE_GLOBAL_MEM_SIZE cl_ulong mem_size; clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); - printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); + myprintf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); // CL_DEVICE_ERROR_CORRECTION_SUPPORT cl_bool error_correction_support; clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL); - printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); + myprintf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); // CL_DEVICE_LOCAL_MEM_TYPE cl_device_local_mem_type local_mem_type; clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL); - printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); + myprintf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); // CL_DEVICE_LOCAL_MEM_SIZE clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); - printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); + myprintf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL); - printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); + myprintf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_QUEUE_PROPERTIES cl_command_queue_properties queue_properties; clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL); if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) - printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); + myprintf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); if( queue_properties & CL_QUEUE_PROFILING_ENABLE ) - printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); + myprintf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); // CL_DEVICE_IMAGE_SUPPORT cl_bool image_support; clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); - printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); + myprintf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); // CL_DEVICE_MAX_READ_IMAGE_ARGS cl_uint max_read_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL); - printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); + myprintf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); // CL_DEVICE_MAX_WRITE_IMAGE_ARGS cl_uint max_write_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL); - printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); + myprintf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH size_t szMaxDims[5]; - printf("\n CL_DEVICE_IMAGE "); + myprintf("\n CL_DEVICE_IMAGE \n"); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL); - printf("\t\t\t2D_MAX_WIDTH\t %d\n", szMaxDims[0]); + myprintf("\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL); - printf("\t\t\t\t\t2D_MAX_HEIGHT\t %d\n", szMaxDims[1]); + myprintf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL); - printf("\t\t\t\t\t3D_MAX_WIDTH\t %d\n", szMaxDims[2]); + myprintf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL); - printf("\t\t\t\t\t3D_MAX_HEIGHT\t %d\n", szMaxDims[3]); + myprintf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL); - printf("\t\t\t\t\t3D_MAX_DEPTH\t %d\n", szMaxDims[4]); + myprintf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]); // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL); if (device_string != 0) { - printf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string); + myprintf("\n CL_DEVICE_EXTENSIONS:%s\n",device_string); } else { - printf(" CL_DEVICE_EXTENSIONS: None\n"); + myprintf(" CL_DEVICE_EXTENSIONS: None\n"); } // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ - printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); + myprintf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t\n"); cl_uint vec_width [6]; clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL); @@ -362,6 +278,7 @@ void btOclPrintDevInfo(cl_device_id device) clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL); - printf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n\n", + myprintf("CHAR %u, SHORT %u, INT %u, FLOAT %u, DOUBLE %u\n\n", vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4]); } + diff --git a/Demos/SharedOpenCL/btOclUtils.h b/Demos/SharedOpenCL/btOclUtils.h index 9362cf84e..1636dd2b1 100644 --- a/Demos/SharedOpenCL/btOclUtils.h +++ b/Demos/SharedOpenCL/btOclUtils.h @@ -18,15 +18,17 @@ subject to the following restrictions: #ifdef USE_MINICL #include +#else //USE_MINICL +#ifdef BT_USE_CLEW + #include "clew.h" #else - #ifdef __APPLE__ - #include - #else - #include - #endif __APPLE__ -#endif - -#include +#ifdef __APPLE__ + #include +#else + #include +#endif //__APPLE__ +#endif //BT_USE_CLEW +#endif //USE_MINICL //#define oclCHECKERROR(a, b) btAssert((a) == (b)) @@ -35,7 +37,6 @@ subject to the following restrictions: void btOclPrintDevInfo(cl_device_id device); cl_device_id btOclGetDev(cl_context cxMainContext, unsigned int nr); -cl_device_id btOclGetMaxFlopsDev(cl_context cxMainContext); char* btOclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); cl_device_id btOclGetFirstDev(cl_context cxMainContext); #endif //BT_OCL_UTILS_H diff --git a/Demos/SharedOpenCL/clew.c b/Demos/SharedOpenCL/clew.c new file mode 100644 index 000000000..cfc6ed0b4 --- /dev/null +++ b/Demos/SharedOpenCL/clew.c @@ -0,0 +1,313 @@ +////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009 Organic Vectory B.V. +// Written by George van Venrooij +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file license.txt) +////////////////////////////////////////////////////////////////////////// +#ifndef USE_MINICL +#include "clew.h" + +//! \file clew.c +//! \brief OpenCL run-time loader source + +#ifndef CLCC_GENERATE_DOCUMENTATION +#ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #define VC_EXTRALEAN + #define NOMINMAX + #include + + typedef HMODULE CLCC_DYNLIB_HANDLE; + + #define CLCC_DYNLIB_OPEN LoadLibrary + #define CLCC_DYNLIB_CLOSE FreeLibrary + #define CLCC_DYNLIB_IMPORT GetProcAddress +#else + #include + + typedef void* CLCC_DYNLIB_HANDLE; + + #define CLCC_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL) + #define CLCC_DYNLIB_CLOSE dlclose + #define CLCC_DYNLIB_IMPORT dlsym +#endif +#else + //typedef implementation_defined CLCC_DYNLIB_HANDLE; + //#define CLCC_DYNLIB_OPEN(path) implementation_defined + //#define CLCC_DYNLIB_CLOSE implementation_defined + //#define CLCC_DYNLIB_IMPORT implementation_defined +#endif + +#include + +//! \brief module handle +static CLCC_DYNLIB_HANDLE module = NULL; + +// Variables holding function entry points +#ifndef CLCC_GENERATE_DOCUMENTATION +PFNCLGETPLATFORMIDS __clewGetPlatformIDs = NULL; +PFNCLGETPLATFORMINFO __clewGetPlatformInfo = NULL; +PFNCLGETDEVICEIDS __clewGetDeviceIDs = NULL; +PFNCLGETDEVICEINFO __clewGetDeviceInfo = NULL; +PFNCLCREATECONTEXT __clewCreateContext = NULL; +PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType = NULL; +PFNCLRETAINCONTEXT __clewRetainContext = NULL; +PFNCLRELEASECONTEXT __clewReleaseContext = NULL; +PFNCLGETCONTEXTINFO __clewGetContextInfo = NULL; +PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue = NULL; +PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue = NULL; +PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue = NULL; +PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo = NULL; +PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty = NULL; +PFNCLCREATEBUFFER __clewCreateBuffer = NULL; +PFNCLCREATEIMAGE2D __clewCreateImage2D = NULL; +PFNCLCREATEIMAGE3D __clewCreateImage3D = NULL; +PFNCLRETAINMEMOBJECT __clewRetainMemObject = NULL; +PFNCLRELEASEMEMOBJECT __clewReleaseMemObject = NULL; +PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats = NULL; +PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo = NULL; +PFNCLGETIMAGEINFO __clewGetImageInfo = NULL; +PFNCLCREATESAMPLER __clewCreateSampler = NULL; +PFNCLRETAINSAMPLER __clewRetainSampler = NULL; +PFNCLRELEASESAMPLER __clewReleaseSampler = NULL; +PFNCLGETSAMPLERINFO __clewGetSamplerInfo = NULL; +PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource = NULL; +PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary = NULL; +PFNCLRETAINPROGRAM __clewRetainProgram = NULL; +PFNCLRELEASEPROGRAM __clewReleaseProgram = NULL; +PFNCLBUILDPROGRAM __clewBuildProgram = NULL; +PFNCLUNLOADCOMPILER __clewUnloadCompiler = NULL; +PFNCLGETPROGRAMINFO __clewGetProgramInfo = NULL; +PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo = NULL; +PFNCLCREATEKERNEL __clewCreateKernel = NULL; +PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram = NULL; +PFNCLRETAINKERNEL __clewRetainKernel = NULL; +PFNCLRELEASEKERNEL __clewReleaseKernel = NULL; +PFNCLSETKERNELARG __clewSetKernelArg = NULL; +PFNCLGETKERNELINFO __clewGetKernelInfo = NULL; +PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo = NULL; +PFNCLWAITFOREVENTS __clewWaitForEvents = NULL; +PFNCLGETEVENTINFO __clewGetEventInfo = NULL; +PFNCLRETAINEVENT __clewRetainEvent = NULL; +PFNCLRELEASEEVENT __clewReleaseEvent = NULL; +PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo = NULL; +PFNCLFLUSH __clewFlush = NULL; +PFNCLFINISH __clewFinish = NULL; +PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer = NULL; +PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer = NULL; +PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer = NULL; +PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage = NULL; +PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage = NULL; +PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage = NULL; +PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer = NULL; +PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage = NULL; +PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer = NULL; +PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage = NULL; +PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject = NULL; +PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel = NULL; +PFNCLENQUEUETASK __clewEnqueueTask = NULL; +PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel = NULL; +PFNCLENQUEUEMARKER __clewEnqueueMarker = NULL; +PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents = NULL; +PFNCLENQUEUEBARRIER __clewEnqueueBarrier = NULL; +PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL; +#endif // CLCC_GENERATE_DOCUMENTATION + + +//! \brief Unloads OpenCL dynamic library, should not be called directly +static void clewExit(void) +{ + if (module != NULL) + { + // Ignore errors + CLCC_DYNLIB_CLOSE(module); + module = NULL; + } +} + +//! \param path path to dynamic library to load +//! \return CLEW_ERROR_OPEN_FAILED if the library could not be opened +//! CLEW_ERROR_ATEXIT_FAILED if atexit(clewExit) failed +//! CLEW_SUCCESS when the library was succesfully loaded +int clewInit(const char* path) +{ + int error = 0; + + // Check if already initialized + if (module != NULL) + { + return CLEW_SUCCESS; + } + + // Load library + module = CLCC_DYNLIB_OPEN(path); + + // Check for errors + if (module == NULL) + { + return CLEW_ERROR_OPEN_FAILED; + } + + // Set unloading + error = atexit(clewExit); + + if (error) + { + // Failure queing atexit, shutdown with error + CLCC_DYNLIB_CLOSE(module); + module = NULL; + + return CLEW_ERROR_ATEXIT_FAILED; + } + + // Determine function entry-points + __clewGetPlatformIDs = (PFNCLGETPLATFORMIDS )CLCC_DYNLIB_IMPORT(module, "clGetPlatformIDs"); + __clewGetPlatformInfo = (PFNCLGETPLATFORMINFO )CLCC_DYNLIB_IMPORT(module, "clGetPlatformInfo"); + __clewGetDeviceIDs = (PFNCLGETDEVICEIDS )CLCC_DYNLIB_IMPORT(module, "clGetDeviceIDs"); + __clewGetDeviceInfo = (PFNCLGETDEVICEINFO )CLCC_DYNLIB_IMPORT(module, "clGetDeviceInfo"); + __clewCreateContext = (PFNCLCREATECONTEXT )CLCC_DYNLIB_IMPORT(module, "clCreateContext"); + __clewCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE )CLCC_DYNLIB_IMPORT(module, "clCreateContextFromType"); + __clewRetainContext = (PFNCLRETAINCONTEXT )CLCC_DYNLIB_IMPORT(module, "clRetainContext"); + __clewReleaseContext = (PFNCLRELEASECONTEXT )CLCC_DYNLIB_IMPORT(module, "clReleaseContext"); + __clewGetContextInfo = (PFNCLGETCONTEXTINFO )CLCC_DYNLIB_IMPORT(module, "clGetContextInfo"); + __clewCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clCreateCommandQueue"); + __clewRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clRetainCommandQueue"); + __clewReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE )CLCC_DYNLIB_IMPORT(module, "clReleaseCommandQueue"); + __clewGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO )CLCC_DYNLIB_IMPORT(module, "clGetCommandQueueInfo"); + __clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY )CLCC_DYNLIB_IMPORT(module, "clSetCommandQueueProperty"); + __clewCreateBuffer = (PFNCLCREATEBUFFER )CLCC_DYNLIB_IMPORT(module, "clCreateBuffer"); + __clewCreateImage2D = (PFNCLCREATEIMAGE2D )CLCC_DYNLIB_IMPORT(module, "clCreateImage2D"); + __clewCreateImage3D = (PFNCLCREATEIMAGE3D )CLCC_DYNLIB_IMPORT(module, "clCreateImage3D"); + __clewRetainMemObject = (PFNCLRETAINMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clRetainMemObject"); + __clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clReleaseMemObject"); + __clewGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS )CLCC_DYNLIB_IMPORT(module, "clGetSupportedImageFormats"); + __clewGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO )CLCC_DYNLIB_IMPORT(module, "clGetMemObjectInfo"); + __clewGetImageInfo = (PFNCLGETIMAGEINFO )CLCC_DYNLIB_IMPORT(module, "clGetImageInfo"); + __clewCreateSampler = (PFNCLCREATESAMPLER )CLCC_DYNLIB_IMPORT(module, "clCreateSampler"); + __clewRetainSampler = (PFNCLRETAINSAMPLER )CLCC_DYNLIB_IMPORT(module, "clRetainSampler"); + __clewReleaseSampler = (PFNCLRELEASESAMPLER )CLCC_DYNLIB_IMPORT(module, "clReleaseSampler"); + __clewGetSamplerInfo = (PFNCLGETSAMPLERINFO )CLCC_DYNLIB_IMPORT(module, "clGetSamplerInfo"); + __clewCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithSource"); + __clewCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY )CLCC_DYNLIB_IMPORT(module, "clCreateProgramWithBinary"); + __clewRetainProgram = (PFNCLRETAINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clRetainProgram"); + __clewReleaseProgram = (PFNCLRELEASEPROGRAM )CLCC_DYNLIB_IMPORT(module, "clReleaseProgram"); + __clewBuildProgram = (PFNCLBUILDPROGRAM )CLCC_DYNLIB_IMPORT(module, "clBuildProgram"); + __clewUnloadCompiler = (PFNCLUNLOADCOMPILER )CLCC_DYNLIB_IMPORT(module, "clUnloadCompiler"); + __clewGetProgramInfo = (PFNCLGETPROGRAMINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramInfo"); + __clewGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO )CLCC_DYNLIB_IMPORT(module, "clGetProgramBuildInfo"); + __clewCreateKernel = (PFNCLCREATEKERNEL )CLCC_DYNLIB_IMPORT(module, "clCreateKernel"); + __clewCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM )CLCC_DYNLIB_IMPORT(module, "clCreateKernelsInProgram"); + __clewRetainKernel = (PFNCLRETAINKERNEL )CLCC_DYNLIB_IMPORT(module, "clRetainKernel"); + __clewReleaseKernel = (PFNCLRELEASEKERNEL )CLCC_DYNLIB_IMPORT(module, "clReleaseKernel"); + __clewSetKernelArg = (PFNCLSETKERNELARG )CLCC_DYNLIB_IMPORT(module, "clSetKernelArg"); + __clewGetKernelInfo = (PFNCLGETKERNELINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelInfo"); + __clewGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO )CLCC_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo"); + __clewWaitForEvents = (PFNCLWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clWaitForEvents"); + __clewGetEventInfo = (PFNCLGETEVENTINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventInfo"); + __clewRetainEvent = (PFNCLRETAINEVENT )CLCC_DYNLIB_IMPORT(module, "clRetainEvent"); + __clewReleaseEvent = (PFNCLRELEASEEVENT )CLCC_DYNLIB_IMPORT(module, "clReleaseEvent"); + __clewGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO )CLCC_DYNLIB_IMPORT(module, "clGetEventProfilingInfo"); + __clewFlush = (PFNCLFLUSH )CLCC_DYNLIB_IMPORT(module, "clFlush"); + __clewFinish = (PFNCLFINISH )CLCC_DYNLIB_IMPORT(module, "clFinish"); + __clewEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadBuffer"); + __clewEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer"); + __clewEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer"); + __clewEnqueueReadImage = (PFNCLENQUEUEREADIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueReadImage"); + __clewEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueWriteImage"); + __clewEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImage"); + __clewEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer"); + __clewEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage"); + __clewEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapBuffer"); + __clewEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE )CLCC_DYNLIB_IMPORT(module, "clEnqueueMapImage"); + __clewEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT )CLCC_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject"); + __clewEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel"); + __clewEnqueueTask = (PFNCLENQUEUETASK )CLCC_DYNLIB_IMPORT(module, "clEnqueueTask"); + __clewEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL )CLCC_DYNLIB_IMPORT(module, "clEnqueueNativeKernel"); + __clewEnqueueMarker = (PFNCLENQUEUEMARKER )CLCC_DYNLIB_IMPORT(module, "clEnqueueMarker"); + __clewEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS )CLCC_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents"); + __clewEnqueueBarrier = (PFNCLENQUEUEBARRIER )CLCC_DYNLIB_IMPORT(module, "clEnqueueBarrier"); + __clewGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS )CLCC_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress"); + + return CLEW_SUCCESS; +} + +//! \param error CL error code +//! \return a string representation of the error code +const char* clewErrorString(cl_int error) +{ + static const char* strings[] = + { + // Error Codes + "CL_SUCCESS" // 0 + , "CL_DEVICE_NOT_FOUND" // -1 + , "CL_DEVICE_NOT_AVAILABLE" // -2 + , "CL_COMPILER_NOT_AVAILABLE" // -3 + , "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4 + , "CL_OUT_OF_RESOURCES" // -5 + , "CL_OUT_OF_HOST_MEMORY" // -6 + , "CL_PROFILING_INFO_NOT_AVAILABLE" // -7 + , "CL_MEM_COPY_OVERLAP" // -8 + , "CL_IMAGE_FORMAT_MISMATCH" // -9 + , "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10 + , "CL_BUILD_PROGRAM_FAILURE" // -11 + , "CL_MAP_FAILURE" // -12 + + , "" // -13 + , "" // -14 + , "" // -15 + , "" // -16 + , "" // -17 + , "" // -18 + , "" // -19 + + , "" // -20 + , "" // -21 + , "" // -22 + , "" // -23 + , "" // -24 + , "" // -25 + , "" // -26 + , "" // -27 + , "" // -28 + , "" // -29 + + , "CL_INVALID_VALUE" // -30 + , "CL_INVALID_DEVICE_TYPE" // -31 + , "CL_INVALID_PLATFORM" // -32 + , "CL_INVALID_DEVICE" // -33 + , "CL_INVALID_CONTEXT" // -34 + , "CL_INVALID_QUEUE_PROPERTIES" // -35 + , "CL_INVALID_COMMAND_QUEUE" // -36 + , "CL_INVALID_HOST_PTR" // -37 + , "CL_INVALID_MEM_OBJECT" // -38 + , "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39 + , "CL_INVALID_IMAGE_SIZE" // -40 + , "CL_INVALID_SAMPLER" // -41 + , "CL_INVALID_BINARY" // -42 + , "CL_INVALID_BUILD_OPTIONS" // -43 + , "CL_INVALID_PROGRAM" // -44 + , "CL_INVALID_PROGRAM_EXECUTABLE" // -45 + , "CL_INVALID_KERNEL_NAME" // -46 + , "CL_INVALID_KERNEL_DEFINITION" // -47 + , "CL_INVALID_KERNEL" // -48 + , "CL_INVALID_ARG_INDEX" // -49 + , "CL_INVALID_ARG_VALUE" // -50 + , "CL_INVALID_ARG_SIZE" // -51 + , "CL_INVALID_KERNEL_ARGS" // -52 + , "CL_INVALID_WORK_DIMENSION" // -53 + , "CL_INVALID_WORK_GROUP_SIZE" // -54 + , "CL_INVALID_WORK_ITEM_SIZE" // -55 + , "CL_INVALID_GLOBAL_OFFSET" // -56 + , "CL_INVALID_EVENT_WAIT_LIST" // -57 + , "CL_INVALID_EVENT" // -58 + , "CL_INVALID_OPERATION" // -59 + , "CL_INVALID_GL_OBJECT" // -60 + , "CL_INVALID_BUFFER_SIZE" // -61 + , "CL_INVALID_MIP_LEVEL" // -62 + , "CL_INVALID_GLOBAL_WORK_SIZE" // -63 + }; + + return strings[-error]; +} +#endif diff --git a/Demos/SharedOpenCL/clew.h b/Demos/SharedOpenCL/clew.h new file mode 100644 index 000000000..27198ae90 --- /dev/null +++ b/Demos/SharedOpenCL/clew.h @@ -0,0 +1,1316 @@ +#ifndef CLCC_CLEW_HPP_INCLUDED +#define CLCC_CLEW_HPP_INCLUDED +#ifndef USE_MINICL +////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009 Organic Vectory B.V. +// Written by George van Venrooij +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file license.txt) +////////////////////////////////////////////////////////////////////////// + +//! \file clew.h +//! \brief OpenCL run-time loader header +//! +//! This file contains a copy of the contents of CL.H and CL_PLATFORM.H from the +//! official OpenCL spec. The purpose of this code is to load the OpenCL dynamic +//! library at run-time and thus allow the executable to function on many +//! platforms regardless of the vendor of the OpenCL driver actually installed. +//! Some of the techniques used here were inspired by work done in the GLEW +//! library (http://glew.sourceforge.net/) + +// Run-time dynamic linking functionality based on concepts used in GLEW +#ifdef __OPENCL_CL_H +#error cl.h included before clew.h +#endif + +#ifdef __OPENCL_CL_PLATFORM_H +#error cl_platform.h included before clew.h +#endif + +#ifndef CLCC_GENERATE_DOCUMENTATION +// Prevent cl.h inclusion +#define __OPENCL_CL_H +// Prevent cl_platform.h inclusion +#define __CL_PLATFORM_H +#endif // CLCC_GENERATE_DOCUMENTATION + +/******************************************************************************* +* Copyright (c) 2008-2009 The Khronos Group Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and/or associated documentation files (the +* "Materials"), to deal in the Materials without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Materials, and to +* permit persons to whom the Materials are furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Materials. +* +* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +******************************************************************************/ +#ifdef __APPLE__ +/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef CLCC_GENERATE_DOCUMENTATION + +#if defined(_WIN32) +#define CL_API_ENTRY +#define CL_API_CALL __stdcall +#else +#define CL_API_ENTRY +#define CL_API_CALL +#endif + +#if defined(__APPLE__) +#define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) +#else +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXTENSION_WEAK_LINK +#endif + +#if defined(_WIN32) && defined(_MSC_VER) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + + +/* +* Vector types +* +* Note: OpenCL requires that all types be naturally aligned. +* This means that vector types must be naturally aligned. +* For example, a vector of four floats must be aligned to +* a 16 byte boundary (calculated as 4 * the natural 4-byte +* alignment of the float). The alignment qualifiers here +* will only function properly if your compiler supports them +* and if you don't actively work to defeat them. For example, +* in order for a cl_float4 to be 16 byte aligned in a struct, +* the start of the struct must itself be 16-byte aligned. +* +* Maintaining proper alignment is the user's responsibility. +*/ +typedef signed __int8 cl_char2[2]; +typedef signed __int8 cl_char4[4]; +typedef signed __int8 cl_char8[8]; +typedef signed __int8 cl_char16[16]; +typedef unsigned __int8 cl_uchar2[2]; +typedef unsigned __int8 cl_uchar4[4]; +typedef unsigned __int8 cl_uchar8[8]; +typedef unsigned __int8 cl_uchar16[16]; + +typedef signed __int16 cl_short2[2]; +typedef signed __int16 cl_short4[4]; +typedef signed __int16 cl_short8[8]; +typedef signed __int16 cl_short16[16]; +typedef unsigned __int16 cl_ushort2[2]; +typedef unsigned __int16 cl_ushort4[4]; +typedef unsigned __int16 cl_ushort8[8]; +typedef unsigned __int16 cl_ushort16[16]; + +typedef signed __int32 cl_int2[2]; +typedef signed __int32 cl_int4[4]; +typedef signed __int32 cl_int8[8]; +typedef signed __int32 cl_int16[16]; +typedef unsigned __int32 cl_uint2[2]; +typedef unsigned __int32 cl_uint4[4]; +typedef unsigned __int32 cl_uint8[8]; +typedef unsigned __int32 cl_uint16[16]; + +typedef signed __int64 cl_long2[2]; +typedef signed __int64 cl_long4[4]; +typedef signed __int64 cl_long8[8]; +typedef signed __int64 cl_long16[16]; +typedef unsigned __int64 cl_ulong2[2]; +typedef unsigned __int64 cl_ulong4[4]; +typedef unsigned __int64 cl_ulong8[8]; +typedef unsigned __int64 cl_ulong16[16]; + +typedef float cl_float2[2]; +typedef float cl_float4[4]; +typedef float cl_float8[8]; +typedef float cl_float16[16]; + +typedef double cl_double2[2]; +typedef double cl_double4[4]; +typedef double cl_double8[8]; +typedef double cl_double16[16]; +/* There are no vector types for half */ + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short __attribute__((aligned(2))); +typedef uint16_t cl_ushort __attribute__((aligned(2))); +typedef int32_t cl_int __attribute__((aligned(4))); +typedef uint32_t cl_uint __attribute__((aligned(4))); +typedef int64_t cl_long __attribute__((aligned(8))); +typedef uint64_t cl_ulong __attribute__((aligned(8))); + +typedef uint16_t cl_half __attribute__((aligned(2))); +typedef float cl_float __attribute__((aligned(4))); +typedef double cl_double __attribute__((aligned(8))); + +/* +* Vector types +* +* Note: OpenCL requires that all types be naturally aligned. +* This means that vector types must be naturally aligned. +* For example, a vector of four floats must be aligned to +* a 16 byte boundary (calculated as 4 * the natural 4-byte +* alignment of the float). The alignment qualifiers here +* will only function properly if your compiler supports them +* and if you don't actively work to defeat them. For example, +* in order for a cl_float4 to be 16 byte aligned in a struct, +* the start of the struct must itself be 16-byte aligned. +* +* Maintaining proper alignment is the user's responsibility. +*/ +typedef int8_t cl_char2[2] __attribute__((aligned(2))); +typedef int8_t cl_char4[4] __attribute__((aligned(4))); +typedef int8_t cl_char8[8] __attribute__((aligned(8))); +typedef int8_t cl_char16[16] __attribute__((aligned(16))); +typedef uint8_t cl_uchar2[2] __attribute__((aligned(2))); +typedef uint8_t cl_uchar4[4] __attribute__((aligned(4))); +typedef uint8_t cl_uchar8[8] __attribute__((aligned(8))); +typedef uint8_t cl_uchar16[16] __attribute__((aligned(16))); + +typedef int16_t cl_short2[2] __attribute__((aligned(4))); +typedef int16_t cl_short4[4] __attribute__((aligned(8))); +typedef int16_t cl_short8[8] __attribute__((aligned(16))); +typedef int16_t cl_short16[16] __attribute__((aligned(32))); +typedef uint16_t cl_ushort2[2] __attribute__((aligned(4))); +typedef uint16_t cl_ushort4[4] __attribute__((aligned(8))); +typedef uint16_t cl_ushort8[8] __attribute__((aligned(16))); +typedef uint16_t cl_ushort16[16] __attribute__((aligned(32))); + +typedef int32_t cl_int2[2] __attribute__((aligned(8))); +typedef int32_t cl_int4[4] __attribute__((aligned(16))); +typedef int32_t cl_int8[8] __attribute__((aligned(32))); +typedef int32_t cl_int16[16] __attribute__((aligned(64))); +typedef uint32_t cl_uint2[2] __attribute__((aligned(8))); +typedef uint32_t cl_uint4[4] __attribute__((aligned(16))); +typedef uint32_t cl_uint8[8] __attribute__((aligned(32))); +typedef uint32_t cl_uint16[16] __attribute__((aligned(64))); + +typedef int64_t cl_long2[2] __attribute__((aligned(16))); +typedef int64_t cl_long4[4] __attribute__((aligned(32))); +typedef int64_t cl_long8[8] __attribute__((aligned(64))); +typedef int64_t cl_long16[16] __attribute__((aligned(128))); +typedef uint64_t cl_ulong2[2] __attribute__((aligned(16))); +typedef uint64_t cl_ulong4[4] __attribute__((aligned(32))); +typedef uint64_t cl_ulong8[8] __attribute__((aligned(64))); +typedef uint64_t cl_ulong16[16] __attribute__((aligned(128))); + +typedef float cl_float2[2] __attribute__((aligned(8))); +typedef float cl_float4[4] __attribute__((aligned(16))); +typedef float cl_float8[8] __attribute__((aligned(32))); +typedef float cl_float16[16] __attribute__((aligned(64))); + +typedef double cl_double2[2] __attribute__((aligned(16))); +typedef double cl_double4[4] __attribute__((aligned(32))); +typedef double cl_double8[8] __attribute__((aligned(64))); +typedef double cl_double16[16] __attribute__((aligned(128))); + +/* There are no vector types for half */ + +#endif + +/******************************************************************************/ + +// Macro names and corresponding values defined by OpenCL + +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#if defined(_MSC_VER) +// MSVC doesn't understand hex floats +#define CL_FLT_MAX 3.402823466e+38F +#define CL_FLT_MIN 1.175494351e-38F +#define CL_FLT_EPSILON 1.192092896e-07F +#else +#define CL_FLT_MAX 0x1.fffffep127f +#define CL_FLT_MIN 0x1.0p-126f +#define CL_FLT_EPSILON 0x1.0p-23f +#endif + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#if defined(_MSC_VER) +// MSVC doesn't understand hex floats +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.2250738585072014e-308 +#define CL_DBL_EPSILON 2.2204460492503131e-016 +#else +#define CL_DBL_MAX 0x1.fffffffffffffp1023 +#define CL_DBL_MIN 0x1.0p-1022 +#define CL_DBL_EPSILON 0x1.0p-52 +#endif + +#include + + +// CL.h contents +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_address_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_uint cl_image_info; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + + + +/******************************************************************************/ + +// Error Codes +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 + +// OpenCL Version +#define CL_VERSION_1_0 1 + +// cl_bool +#define CL_FALSE 0 +#define CL_TRUE 1 + +// cl_platform_info +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 + +// cl_device_type - bitfield +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +// cl_device_info +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 + +// cl_device_fp_config - bitfield +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) + +// cl_device_mem_cache_type +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +// cl_device_local_mem_type +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +// cl_device_exec_capabilities - bitfield +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +// cl_command_queue_properties - bitfield +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) + +// cl_context_info +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 + +// cl_context_properties +#define CL_CONTEXT_PLATFORM 0x1084 + +// cl_command_queue_info +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 + +// cl_mem_flags - bitfield +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) + +// cl_channel_order +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 + +// cl_channel_type +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE + +// cl_mem_object_type +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 + +// cl_mem_info +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 + +// cl_image_info +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 + +// cl_addressing_mode +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 + +// cl_filter_mode +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +// cl_sampler_info +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 + +// cl_map_flags - bitfield +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) + +// cl_program_info +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 + +// cl_program_build_info +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 + +// cl_build_status +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +// cl_kernel_info +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 + +// cl_kernel_work_group_info +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 + +// cl_event_info +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 + +// cl_command_type +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 + +// command execution status +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +// cl_profiling_info +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 + +/********************************************************************************************************/ + +/********************************************************************************************************/ + +// Function signature typedef's + +// Platform API +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETPLATFORMIDS)(cl_uint /* num_entries */, + cl_platform_id * /* platforms */, + cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETPLATFORMINFO)(cl_platform_id /* platform */, + cl_platform_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETDEVICEIDS)(cl_platform_id /* platform */, + cl_device_type /* device_type */, + cl_uint /* num_entries */, + cl_device_id * /* devices */, + cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETDEVICEINFO)(cl_device_id /* device */, + cl_device_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Context APIs +typedef CL_API_ENTRY cl_context (CL_API_CALL * +PFNCLCREATECONTEXT)(const cl_context_properties * /* properties */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_context (CL_API_CALL * +PFNCLCREATECONTEXTFROMTYPE)(const cl_context_properties * /* properties */, + cl_device_type /* device_type */, + void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINCONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASECONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETCONTEXTINFO)(cl_context /* context */, + cl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +typedef CL_API_ENTRY cl_command_queue (CL_API_CALL * +PFNCLCREATECOMMANDQUEUE)(cl_context /* context */, + cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINCOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASECOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETCOMMANDQUEUEINFO)(cl_command_queue /* command_queue */, + cl_command_queue_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLSETCOMMANDQUEUEPROPERTY)(cl_command_queue /* command_queue */, + cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +typedef CL_API_ENTRY cl_mem (CL_API_CALL * +PFNCLCREATEBUFFER)(cl_context /* context */, + cl_mem_flags /* flags */, + size_t /* size */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL * +PFNCLCREATEIMAGE2D)(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL * +PFNCLCREATEIMAGE3D)(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASEMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETSUPPORTEDIMAGEFORMATS)(cl_context /* context */, + cl_mem_flags /* flags */, + cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, + cl_image_format * /* image_formats */, + cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETMEMOBJECTINFO)(cl_mem /* memobj */, + cl_mem_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETIMAGEINFO)(cl_mem /* image */, + cl_image_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Sampler APIs +typedef CL_API_ENTRY cl_sampler (CL_API_CALL * +PFNCLCREATESAMPLER)(cl_context /* context */, + cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, + cl_filter_mode /* filter_mode */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINSAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASESAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETSAMPLERINFO)(cl_sampler /* sampler */, + cl_sampler_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Program Object APIs +typedef CL_API_ENTRY cl_program (CL_API_CALL * +PFNCLCREATEPROGRAMWITHSOURCE)(cl_context /* context */, + cl_uint /* count */, + const char ** /* strings */, + const size_t * /* lengths */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_program (CL_API_CALL * +PFNCLCREATEPROGRAMWITHBINARY)(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const size_t * /* lengths */, + const unsigned char ** /* binaries */, + cl_int * /* binary_status */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASEPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLBUILDPROGRAM)(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + void (*pfn_notify)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLUNLOADCOMPILER)(void) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETPROGRAMINFO)(cl_program /* program */, + cl_program_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETPROGRAMBUILDINFO)(cl_program /* program */, + cl_device_id /* device */, + cl_program_build_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +typedef CL_API_ENTRY cl_kernel (CL_API_CALL * +PFNCLCREATEKERNEL)(cl_program /* program */, + const char * /* kernel_name */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLCREATEKERNELSINPROGRAM)(cl_program /* program */, + cl_uint /* num_kernels */, + cl_kernel * /* kernels */, + cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASEKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLSETKERNELARG)(cl_kernel /* kernel */, + cl_uint /* arg_index */, + size_t /* arg_size */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETKERNELINFO)(cl_kernel /* kernel */, + cl_kernel_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETKERNELWORKGROUPINFO)(cl_kernel /* kernel */, + cl_device_id /* device */, + cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Event Object APIs +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLWAITFOREVENTS)(cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETEVENTINFO)(cl_event /* event */, + cl_event_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRETAINEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLRELEASEEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLGETEVENTPROFILINGINFO)(cl_event /* event */, + cl_profiling_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLFLUSH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLFINISH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEREADBUFFER)(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_read */, + size_t /* offset */, + size_t /* cb */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEWRITEBUFFER)(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_write */, + size_t /* offset */, + size_t /* cb */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUECOPYBUFFER)(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_buffer */, + size_t /* src_offset */, + size_t /* dst_offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEREADIMAGE)(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_read */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* row_pitch */, + size_t /* slice_pitch */, + void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEWRITEIMAGE)(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_write */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, + const void * /* ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUECOPYIMAGE)(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_image */, + const size_t * /* src_origin[3] */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUECOPYIMAGETOBUFFER)(cl_command_queue /* command_queue */, + cl_mem /* src_image */, + cl_mem /* dst_buffer */, + const size_t * /* src_origin[3] */, + const size_t * /* region[3] */, + size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUECOPYBUFFERTOIMAGE)(cl_command_queue /* command_queue */, + cl_mem /* src_buffer */, + cl_mem /* dst_image */, + size_t /* src_offset */, + const size_t * /* dst_origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void * (CL_API_CALL * +PFNCLENQUEUEMAPBUFFER)(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + size_t /* offset */, + size_t /* cb */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void * (CL_API_CALL * +PFNCLENQUEUEMAPIMAGE)(cl_command_queue /* command_queue */, + cl_mem /* image */, + cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + size_t * /* image_row_pitch */, + size_t * /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEUNMAPMEMOBJECT)(cl_command_queue /* command_queue */, + cl_mem /* memobj */, + void * /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUENDRANGEKERNEL)(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUETASK)(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUENATIVEKERNEL)(cl_command_queue /* command_queue */, + void (*user_func)(void *), + void * /* args */, + size_t /* cb_args */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_list */, + const void ** /* args_mem_loc */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEMARKER)(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEWAITFOREVENTS)(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +PFNCLENQUEUEBARRIER)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; + +// Extension function access +// +// Returns the extension function address for the given function name, +// or NULL if a valid function can not be found. The client must +// check to make sure the address is not NULL, before using or +// calling the returned function address. +// +typedef CL_API_ENTRY void * (CL_API_CALL * PFNCLGETEXTENSIONFUNCTIONADDRESS)(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; + + +#define CLEW_STATIC + +#ifdef CLEW_STATIC +# define CLEWAPI extern +#else +# ifdef CLEW_BUILD +# define CLEWAPI extern __declspec(dllexport) +# else +# define CLEWAPI extern __declspec(dllimport) +# endif +#endif + +#if defined(_WIN32) +#define CLEW_FUN_EXPORT extern +#else +#define CLEW_FUN_EXPORT CLEWAPI +#endif + +#define CLEW_GET_FUN(x) x + + +// Variables holding function entry points +CLEW_FUN_EXPORT PFNCLGETPLATFORMIDS __clewGetPlatformIDs ; +CLEW_FUN_EXPORT PFNCLGETPLATFORMINFO __clewGetPlatformInfo ; +CLEW_FUN_EXPORT PFNCLGETDEVICEIDS __clewGetDeviceIDs ; +CLEW_FUN_EXPORT PFNCLGETDEVICEINFO __clewGetDeviceInfo ; +CLEW_FUN_EXPORT PFNCLCREATECONTEXT __clewCreateContext ; +CLEW_FUN_EXPORT PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType ; +CLEW_FUN_EXPORT PFNCLRETAINCONTEXT __clewRetainContext ; +CLEW_FUN_EXPORT PFNCLRELEASECONTEXT __clewReleaseContext ; +CLEW_FUN_EXPORT PFNCLGETCONTEXTINFO __clewGetContextInfo ; +CLEW_FUN_EXPORT PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue ; +CLEW_FUN_EXPORT PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue ; +CLEW_FUN_EXPORT PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue ; +CLEW_FUN_EXPORT PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo ; +CLEW_FUN_EXPORT PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty ; +CLEW_FUN_EXPORT PFNCLCREATEBUFFER __clewCreateBuffer ; +CLEW_FUN_EXPORT PFNCLCREATEIMAGE2D __clewCreateImage2D ; +CLEW_FUN_EXPORT PFNCLCREATEIMAGE3D __clewCreateImage3D ; +CLEW_FUN_EXPORT PFNCLRETAINMEMOBJECT __clewRetainMemObject ; +CLEW_FUN_EXPORT PFNCLRELEASEMEMOBJECT __clewReleaseMemObject ; +CLEW_FUN_EXPORT PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats ; +CLEW_FUN_EXPORT PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo ; +CLEW_FUN_EXPORT PFNCLGETIMAGEINFO __clewGetImageInfo ; +CLEW_FUN_EXPORT PFNCLCREATESAMPLER __clewCreateSampler ; +CLEW_FUN_EXPORT PFNCLRETAINSAMPLER __clewRetainSampler ; +CLEW_FUN_EXPORT PFNCLRELEASESAMPLER __clewReleaseSampler ; +CLEW_FUN_EXPORT PFNCLGETSAMPLERINFO __clewGetSamplerInfo ; +CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource ; +CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary ; +CLEW_FUN_EXPORT PFNCLRETAINPROGRAM __clewRetainProgram ; +CLEW_FUN_EXPORT PFNCLRELEASEPROGRAM __clewReleaseProgram ; +CLEW_FUN_EXPORT PFNCLBUILDPROGRAM __clewBuildProgram ; +CLEW_FUN_EXPORT PFNCLUNLOADCOMPILER __clewUnloadCompiler ; +CLEW_FUN_EXPORT PFNCLGETPROGRAMINFO __clewGetProgramInfo ; +CLEW_FUN_EXPORT PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo ; +CLEW_FUN_EXPORT PFNCLCREATEKERNEL __clewCreateKernel ; +CLEW_FUN_EXPORT PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram ; +CLEW_FUN_EXPORT PFNCLRETAINKERNEL __clewRetainKernel ; +CLEW_FUN_EXPORT PFNCLRELEASEKERNEL __clewReleaseKernel ; +CLEW_FUN_EXPORT PFNCLSETKERNELARG __clewSetKernelArg ; +CLEW_FUN_EXPORT PFNCLGETKERNELINFO __clewGetKernelInfo ; +CLEW_FUN_EXPORT PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo ; +CLEW_FUN_EXPORT PFNCLWAITFOREVENTS __clewWaitForEvents ; +CLEW_FUN_EXPORT PFNCLGETEVENTINFO __clewGetEventInfo ; +CLEW_FUN_EXPORT PFNCLRETAINEVENT __clewRetainEvent ; +CLEW_FUN_EXPORT PFNCLRELEASEEVENT __clewReleaseEvent ; +CLEW_FUN_EXPORT PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo ; +CLEW_FUN_EXPORT PFNCLFLUSH __clewFlush ; +CLEW_FUN_EXPORT PFNCLFINISH __clewFinish ; +CLEW_FUN_EXPORT PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer ; +CLEW_FUN_EXPORT PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer ; +CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer ; +CLEW_FUN_EXPORT PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage ; +CLEW_FUN_EXPORT PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage ; +CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage ; +CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer ; +CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage ; +CLEW_FUN_EXPORT PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer ; +CLEW_FUN_EXPORT PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage ; +CLEW_FUN_EXPORT PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject ; +CLEW_FUN_EXPORT PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel ; +CLEW_FUN_EXPORT PFNCLENQUEUETASK __clewEnqueueTask ; +CLEW_FUN_EXPORT PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel ; +CLEW_FUN_EXPORT PFNCLENQUEUEMARKER __clewEnqueueMarker ; +CLEW_FUN_EXPORT PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents ; +CLEW_FUN_EXPORT PFNCLENQUEUEBARRIER __clewEnqueueBarrier ; +CLEW_FUN_EXPORT PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress ; + + +#define clGetPlatformIDs CLEW_GET_FUN(__clewGetPlatformIDs ) +#define clGetPlatformInfo CLEW_GET_FUN(__clewGetPlatformInfo ) +#define clGetDeviceIDs CLEW_GET_FUN(__clewGetDeviceIDs ) +#define clGetDeviceInfo CLEW_GET_FUN(__clewGetDeviceInfo ) +#define clCreateContext CLEW_GET_FUN(__clewCreateContext ) +#define clCreateContextFromType CLEW_GET_FUN(__clewCreateContextFromType ) +#define clRetainContext CLEW_GET_FUN(__clewRetainContext ) +#define clReleaseContext CLEW_GET_FUN(__clewReleaseContext ) +#define clGetContextInfo CLEW_GET_FUN(__clewGetContextInfo ) +#define clCreateCommandQueue CLEW_GET_FUN(__clewCreateCommandQueue ) +#define clRetainCommandQueue CLEW_GET_FUN(__clewRetainCommandQueue ) +#define clReleaseCommandQueue CLEW_GET_FUN(__clewReleaseCommandQueue ) +#define clGetCommandQueueInfo CLEW_GET_FUN(__clewGetCommandQueueInfo ) +#define clSetCommandQueueProperty CLEW_GET_FUN(__clewSetCommandQueueProperty ) +#define clCreateBuffer CLEW_GET_FUN(__clewCreateBuffer ) +#define clCreateImage2D CLEW_GET_FUN(__clewCreateImage2D ) +#define clCreateImage3D CLEW_GET_FUN(__clewCreateImage3D ) +#define clRetainMemObject CLEW_GET_FUN(__clewRetainMemObject ) +#define clReleaseMemObject CLEW_GET_FUN(__clewReleaseMemObject ) +#define clGetSupportedImageFormats CLEW_GET_FUN(__clewGetSupportedImageFormats ) +#define clGetMemObjectInfo CLEW_GET_FUN(__clewGetMemObjectInfo ) +#define clGetImageInfo CLEW_GET_FUN(__clewGetImageInfo ) +#define clCreateSampler CLEW_GET_FUN(__clewCreateSampler ) +#define clRetainSampler CLEW_GET_FUN(__clewRetainSampler ) +#define clReleaseSampler CLEW_GET_FUN(__clewReleaseSampler ) +#define clGetSamplerInfo CLEW_GET_FUN(__clewGetSamplerInfo ) +#define clCreateProgramWithSource CLEW_GET_FUN(__clewCreateProgramWithSource ) +#define clCreateProgramWithBinary CLEW_GET_FUN(__clewCreateProgramWithBinary ) +#define clRetainProgram CLEW_GET_FUN(__clewRetainProgram ) +#define clReleaseProgram CLEW_GET_FUN(__clewReleaseProgram ) +#define clBuildProgram CLEW_GET_FUN(__clewBuildProgram ) +#define clUnloadCompiler CLEW_GET_FUN(__clewUnloadCompiler ) +#define clGetProgramInfo CLEW_GET_FUN(__clewGetProgramInfo ) +#define clGetProgramBuildInfo CLEW_GET_FUN(__clewGetProgramBuildInfo ) +#define clCreateKernel CLEW_GET_FUN(__clewCreateKernel ) +#define clCreateKernelsInProgram CLEW_GET_FUN(__clewCreateKernelsInProgram ) +#define clRetainKernel CLEW_GET_FUN(__clewRetainKernel ) +#define clReleaseKernel CLEW_GET_FUN(__clewReleaseKernel ) +#define clSetKernelArg CLEW_GET_FUN(__clewSetKernelArg ) +#define clGetKernelInfo CLEW_GET_FUN(__clewGetKernelInfo ) +#define clGetKernelWorkGroupInfo CLEW_GET_FUN(__clewGetKernelWorkGroupInfo ) +#define clWaitForEvents CLEW_GET_FUN(__clewWaitForEvents ) +#define clGetEventInfo CLEW_GET_FUN(__clewGetEventInfo ) +#define clRetainEvent CLEW_GET_FUN(__clewRetainEvent ) +#define clReleaseEvent CLEW_GET_FUN(__clewReleaseEvent ) +#define clGetEventProfilingInfo CLEW_GET_FUN(__clewGetEventProfilingInfo ) +#define clFlush CLEW_GET_FUN(__clewFlush ) +#define clFinish CLEW_GET_FUN(__clewFinish ) +#define clEnqueueReadBuffer CLEW_GET_FUN(__clewEnqueueReadBuffer ) +#define clEnqueueWriteBuffer CLEW_GET_FUN(__clewEnqueueWriteBuffer ) +#define clEnqueueCopyBuffer CLEW_GET_FUN(__clewEnqueueCopyBuffer ) +#define clEnqueueReadImage CLEW_GET_FUN(__clewEnqueueReadImage ) +#define clEnqueueWriteImage CLEW_GET_FUN(__clewEnqueueWriteImage ) +#define clEnqueueCopyImage CLEW_GET_FUN(__clewEnqueueCopyImage ) +#define clEnqueueCopyImageToBuffer CLEW_GET_FUN(__clewEnqueueCopyImageToBuffer ) +#define clEnqueueCopyBufferToImage CLEW_GET_FUN(__clewEnqueueCopyBufferToImage ) +#define clEnqueueMapBuffer CLEW_GET_FUN(__clewEnqueueMapBuffer ) +#define clEnqueueMapImage CLEW_GET_FUN(__clewEnqueueMapImage ) +#define clEnqueueUnmapMemObject CLEW_GET_FUN(__clewEnqueueUnmapMemObject ) +#define clEnqueueNDRangeKernel CLEW_GET_FUN(__clewEnqueueNDRangeKernel ) +#define clEnqueueTask CLEW_GET_FUN(__clewEnqueueTask ) +#define clEnqueueNativeKernel CLEW_GET_FUN(__clewEnqueueNativeKernel ) +#define clEnqueueMarker CLEW_GET_FUN(__clewEnqueueMarker ) +#define clEnqueueWaitForEvents CLEW_GET_FUN(__clewEnqueueWaitForEvents ) +#define clEnqueueBarrier CLEW_GET_FUN(__clewEnqueueBarrier ) +#define clGetExtensionFunctionAddress CLEW_GET_FUN(__clewGetExtensionFunctionAddress ) + +#endif // CLCC_GENERATE_DOCUMENTATION + +#define CLEW_SUCCESS 0 //!< Success error code +#define CLEW_ERROR_OPEN_FAILED -1 //!< Error code for failing to open the dynamic library +#define CLEW_ERROR_ATEXIT_FAILED -2 //!< Error code for failing to queue the closing of the dynamic library to atexit() + +//! \brief Load OpenCL dynamic library and set function entry points +int clewInit (const char*); +//! \brief Convert an OpenCL error code to its string equivalent +const char* clewErrorString (cl_int error); + +#ifdef __cplusplus +} +#endif +#endif //USE_MINICL +#endif // CLCC_CLEW_HPP_INCLUDED diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt index 63cc88b7a..d8951762f 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt @@ -6,7 +6,6 @@ ${BULLET_PHYSICS_SOURCE_DIR}/src SUBDIRS ( OpenCL - CPU ) IF( USE_DX11 ) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/CMakeLists.txt deleted file mode 100644 index 908ff3736..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ - -INCLUDE_DIRECTORIES( -${BULLET_PHYSICS_SOURCE_DIR}/src -${VECTOR_MATH_INCLUDE} -) - - - -SET(BulletSoftBodyCPUSolvers_SRCS - btSoftBodySolver_CPU.cpp -) - -SET(BulletSoftBodyCPUSolvers_HDRS - btSoftBodySolver_CPU.h - btSoftBodySolverData.h -) - - -ADD_LIBRARY(BulletSoftBodySolvers_CPU ${BulletSoftBodyCPUSolvers_SRCS} ${BulletSoftBodyCPUSolvers_HDRS} ) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_CPU PROPERTIES VERSION ${BULLET_VERSION}) -SET_TARGET_PROPERTIES(BulletSoftBodySolvers_CPU PROPERTIES SOVERSION ${BULLET_VERSION}) -IF (BUILD_SHARED_LIBS) - TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_CPU BulletSoftBody) -ENDIF (BUILD_SHARED_LIBS) - -IF (INSTALL_LIBS) - IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) - IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_CPU DESTINATION .) - ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS BulletSoftBodySolvers_CPU DESTINATION lib${LIB_SUFFIX}) -#headers are already installed by BulletMultiThreaded library - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_CPU PROPERTIES FRAMEWORK true) - SET_TARGET_PROPERTIES(BulletSoftBodySolvers_CPU PROPERTIES PUBLIC_HEADER "${BulletSoftBodyCPUSolvers_HDRS}") - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) -ENDIF (INSTALL_LIBS) diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.cpp deleted file mode 100644 index 51a24baff..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.cpp +++ /dev/null @@ -1,979 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h" -#include "BulletCollision/CollisionDispatch/btCollisionObject.h" -#include "BulletCollision/CollisionShapes/btCollisionShape.h" -#include "vectormath/vmInclude.h" - -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h" -#include "BulletSoftBody/btSoftBody.h" -#include "BulletCollision/CollisionShapes/btCapsuleShape.h" - - -btCPUSoftBodySolver::btCPUSoftBodySolver() -{ - // Initial we will clearly need to update solver constants - // For now this is global for the cloths linked with this solver - we should probably make this body specific - // for performance in future once we understand more clearly when constants need to be updated - m_updateSolverConstants = true; -} - -btCPUSoftBodySolver::~btCPUSoftBodySolver() -{ -} - - - - -btSoftBodyLinkData &btCPUSoftBodySolver::getLinkData() -{ - return m_linkData; -} - -btSoftBodyVertexData &btCPUSoftBodySolver::getVertexData() -{ - return m_vertexData; -} - -btSoftBodyTriangleData &btCPUSoftBodySolver::getTriangleData() -{ - return m_triangleData; -} - - - - - - -static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec ) -{ - Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() ); - return outVec; -} - -static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform ) -{ - Vectormath::Aos::Transform3 outTransform; - outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0))); - outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1))); - outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2))); - outTransform.setCol(3, toVector3(transform.getOrigin())); - return outTransform; -} - -void btCPUSoftBodySolver::btAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound ) -{ - float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin(); - btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin ); - m_softBody->m_bounds[0] = lowerBound - vectorMargin; - m_softBody->m_bounds[1] = upperBound + vectorMargin; -} - - -void btCPUSoftBodySolver::copyBackToSoftBodies() -{ - // Loop over soft bodies, copying all the vertex positions back for each body in turn - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ]; - btSoftBody *softBody = softBodyInterface->getSoftBody(); - - int firstVertex = softBodyInterface->getFirstVertex(); - int numVertices = softBodyInterface->getNumVertices(); - - // Copy vertices from solver back into the softbody - for( int vertex = 0; vertex < numVertices; ++vertex ) - { - using Vectormath::Aos::Point3; - Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] ); - - softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() ); - softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() ); - softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() ); - - softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() ); - softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() ); - softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() ); - } - } -} // btCPUSoftBodySolver::copyBackToSoftBodies - -void btCPUSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate ) -{ - if( forceUpdate || m_softBodySet.size() != softBodies.size() ) - { - // Have a change in the soft body set so update, reloading all the data - getVertexData().clear(); - getTriangleData().clear(); - getLinkData().clear(); - m_softBodySet.resize(0); - - - for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex ) - { - btSoftBody *softBody = softBodies[ softBodyIndex ]; - using Vectormath::Aos::Matrix3; - using Vectormath::Aos::Point3; - - // Create SoftBody that will store the information within the solver - btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody ); - m_softBodySet.push_back( newSoftBody ); - - m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) ); - m_perClothDampingFactor.push_back(softBody->m_cfg.kDP); - m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF ); - m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); - m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); - m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); - m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) ); - - // Add space for new vertices and triangles in the default solver for now - // TODO: Include space here for tearing too later - int firstVertex = getVertexData().getNumVertices(); - int numVertices = softBody->m_nodes.size(); - int maxVertices = numVertices; - // Allocate space for new vertices in all the vertex arrays - getVertexData().createVertices( maxVertices, softBodyIndex ); - - int firstTriangle = getTriangleData().getNumTriangles(); - int numTriangles = softBody->m_faces.size(); - int maxTriangles = numTriangles; - getTriangleData().createTriangles( maxTriangles ); - - // Copy vertices from softbody into the solver - for( int vertex = 0; vertex < numVertices; ++vertex ) - { - Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ()); - btSoftBodyVertexData::VertexDescription desc; - - // TODO: Position in the softbody might be pre-transformed - // or we may need to adapt for the pose. - //desc.setPosition( cloth.getMeshTransform()*multPoint ); - desc.setPosition( multPoint ); - - float vertexInverseMass = softBody->m_nodes[vertex].m_im; - desc.setInverseMass(vertexInverseMass); - getVertexData().setVertexAt( desc, firstVertex + vertex ); - } - - // Copy triangles similarly - // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene - for( int triangle = 0; triangle < numTriangles; ++triangle ) - { - // Note that large array storage is relative to the array not to the cloth - // So we need to add firstVertex to each value - int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0])); - int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0])); - int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0])); - btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex); - getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle ); - - // Increase vertex triangle counts for this triangle - getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++; - getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++; - getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++; - } - - int firstLink = getLinkData().getNumLinks(); - int numLinks = softBody->m_links.size(); - int maxLinks = numLinks; - - // Allocate space for the links - getLinkData().createLinks( numLinks ); - - // Add the links - for( int link = 0; link < numLinks; ++link ) - { - int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]); - int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]); - - btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST); - newLink.setLinkStrength(1.f); - getLinkData().setLinkAt(newLink, firstLink + link); - } - - newSoftBody->setFirstVertex( firstVertex ); - newSoftBody->setFirstTriangle( firstTriangle ); - newSoftBody->setNumVertices( numVertices ); - newSoftBody->setMaxVertices( maxVertices ); - newSoftBody->setNumTriangles( numTriangles ); - newSoftBody->setMaxTriangles( maxTriangles ); - newSoftBody->setFirstLink( firstLink ); - newSoftBody->setNumLinks( numLinks ); - } - - - - updateConstants(0.f); - } -} - - - - -void btCPUSoftBodySolver::updateSoftBodies() -{ - using namespace Vectormath::Aos; - - int numVertices = m_vertexData.getNumVertices(); - int numTriangles = m_triangleData.getNumTriangles(); - - // Initialise normal and vertex counts - for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex ) - { - m_vertexData.getArea(vertexIndex) = 0.f; - m_vertexData.getNormal(vertexIndex) = Vector3(0.f, 0.f, 0.f); - } - - // Update the areas for the triangles and vertices. - for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex ) - { - float &triangleArea( m_triangleData.getTriangleArea( triangleIndex ) ); - const btSoftBodyTriangleData::TriangleNodeSet &vertices( m_triangleData.getVertexSet(triangleIndex) ); - - Point3 &vertexPosition0( m_vertexData.getPosition( vertices.vertex0 ) ); - Point3 &vertexPosition1( m_vertexData.getPosition( vertices.vertex1 ) ); - Point3 &vertexPosition2( m_vertexData.getPosition( vertices.vertex2 ) ); - - triangleArea = computeTriangleArea( vertexPosition0, vertexPosition1, vertexPosition2 ); - - // Add to areas for vertices and increase the count of the number of triangles affecting the vertex - m_vertexData.getArea(vertices.vertex0) += triangleArea; - m_vertexData.getArea(vertices.vertex1) += triangleArea; - m_vertexData.getArea(vertices.vertex2) += triangleArea; - - Point3 &vertex0( m_vertexData.getPosition(vertices.vertex0) ); - Point3 &vertex1( m_vertexData.getPosition(vertices.vertex1) ); - Point3 &vertex2( m_vertexData.getPosition(vertices.vertex2) ); - - Vector3 triangleNormal = cross( vertex1-vertex0, vertex2 - vertex0 ); - - m_triangleData.getNormal(triangleIndex) = normalize(triangleNormal); - - m_vertexData.getNormal(vertices.vertex0) += triangleNormal; - m_vertexData.getNormal(vertices.vertex1) += triangleNormal; - m_vertexData.getNormal(vertices.vertex2) += triangleNormal; - - } - - // Normalise the area and normals - for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex ) - { - m_vertexData.getArea(vertexIndex) /= m_vertexData.getTriangleCount(vertexIndex); - m_vertexData.getNormal(vertexIndex) = normalize( m_vertexData.getNormal(vertexIndex) ); - } - - - // Clear the collision shape array for the next frame - m_collisionObjectDetails.clear(); - -} // updateSoftBodies - - -Vectormath::Aos::Vector3 btCPUSoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ) -{ - return a*Vectormath::Aos::dot(v, a); -} - -void btCPUSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ) -{ - float dtInverseMass = solverdt*inverseMass; - if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) ) - { - vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass; - } else { - vertexForce += force; - } -} - -bool btCPUSoftBodySolver::checkInitialized() -{ - return true; -} - -void btCPUSoftBodySolver::applyForces( float solverdt ) -{ - using namespace Vectormath::Aos; - - int numVertices = m_vertexData.getNumVertices(); - for( int clothIndex = 0; clothIndex < m_softBodySet.size(); ++clothIndex ) - { - btAcceleratedSoftBodyInterface *currentCloth = m_softBodySet[clothIndex]; - const int startVertex = currentCloth->getFirstVertex(); - const int numVertices = currentCloth->getNumVertices(); - - Vector3 velocityChange = m_perClothAcceleration[clothIndex]*solverdt; - for( int vertexIndex = startVertex; vertexIndex < (startVertex + numVertices); ++vertexIndex ) - { - float inverseMass = m_vertexData.getInverseMass( vertexIndex ); - Vector3 &vertexVelocity( m_vertexData.getVelocity( vertexIndex ) ); - - // First apply the global acceleration to all vertices - if( inverseMass > 0 ) - vertexVelocity += velocityChange; - - // If it's a non-static vertex - if( m_vertexData.getInverseMass(vertexIndex) > 0 ) - { - // Wind effects on a wind-per-cloth basis - float liftFactor = m_perClothLiftFactor[clothIndex]; - float dragFactor = m_perClothDragFactor[clothIndex]; - if( (liftFactor > 0.f) || (dragFactor > 0.f) ) - { - Vector3 normal = m_vertexData.getNormal(vertexIndex); - Vector3 relativeWindVelocity = m_vertexData.getVelocity(vertexIndex) - m_perClothWindVelocity[clothIndex]; - float relativeSpeedSquared = lengthSqr(relativeWindVelocity); - if( relativeSpeedSquared > FLT_EPSILON ) - { - normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : +1.f); - float dvNormal = dot(normal, relativeWindVelocity); - if( dvNormal > 0 ) - { - Vector3 force( 0.f, 0.f, 0.f ); - float c0 = m_vertexData.getArea(vertexIndex) * dvNormal * relativeSpeedSquared / 2; - float c1 = c0 * m_perClothMediumDensity[clothIndex]; - force += normal * (-c1 * liftFactor); - force += normalize(relativeWindVelocity)*(-c1 * dragFactor); - - Vectormath::Aos::Vector3 &vertexForce( m_vertexData.getForceAccumulator(vertexIndex) ); - ApplyClampedForce( solverdt, force, vertexVelocity, inverseMass, vertexForce ); - } - } - } - } - } - } -} // btCPUSoftBodySolver::applyForces - -/** - * Integrate motion on the solver. - */ -void btCPUSoftBodySolver::integrate( float solverdt ) -{ - using namespace Vectormath::Aos; - int numVertices = m_vertexData.getNumVertices(); - for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex ) - { - Point3 &position( m_vertexData.getPosition(vertexIndex) ); - Point3 &previousPosition( m_vertexData.getPreviousPosition(vertexIndex) ); - Vector3 &forceAccumulator( m_vertexData.getForceAccumulator(vertexIndex) ); - Vector3 &velocity( m_vertexData.getVelocity(vertexIndex) ); - float inverseMass = m_vertexData.getInverseMass(vertexIndex); - - previousPosition = position; - velocity += forceAccumulator * inverseMass * solverdt; - position += velocity * solverdt; - forceAccumulator = Vector3(0.f, 0.f, 0.f); - } -} // btCPUSoftBodySolver::integrate - -float btCPUSoftBodySolver::computeTriangleArea( - const Vectormath::Aos::Point3 &vertex0, - const Vectormath::Aos::Point3 &vertex1, - const Vectormath::Aos::Point3 &vertex2 ) -{ - Vectormath::Aos::Vector3 a = vertex1 - vertex0; - Vectormath::Aos::Vector3 b = vertex2 - vertex0; - Vectormath::Aos::Vector3 crossProduct = cross(a, b); - float area = length( crossProduct ); - return area; -} - -void btCPUSoftBodySolver::updateConstants( float timeStep ) -{ - using namespace Vectormath::Aos; - - if( m_updateSolverConstants ) - { - m_updateSolverConstants = false; - - // Will have to redo this if we change the structure (tear, maybe) or various other possible changes - - // Initialise link constants - const int numLinks = m_linkData.getNumLinks(); - for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex ) - { - btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) ); - m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 ))); - float invMass0 = m_vertexData.getInverseMass(vertices.vertex0); - float invMass1 = m_vertexData.getInverseMass(vertices.vertex1); - float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex); - float massLSC = (invMass0 + invMass1)/linearStiffness; - m_linkData.getMassLSC(linkIndex) = massLSC; - float restLength = m_linkData.getRestLength(linkIndex); - float restLengthSquared = restLength*restLength; - m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared; - } - } -} // btCPUSoftBodySolver::updateConstants - - - - -void btCPUSoftBodySolver::updateBounds() -{ - using Vectormath::Aos::Point3; - - for( int clothIndex = 0; clothIndex < m_softBodySet.size(); ++clothIndex ) - { - btAcceleratedSoftBodyInterface *currentCloth = m_softBodySet[clothIndex]; - btVector3 startBound(FLT_MAX, FLT_MAX, FLT_MAX); - btVector3 endBound(FLT_MIN, FLT_MIN, FLT_MIN); - - const int startVertex = currentCloth->getFirstVertex(); - const int numVertices = currentCloth->getNumVertices(); - - int endVertex = startVertex + numVertices; - for(int vertexIndex = startVertex; vertexIndex < endVertex; ++vertexIndex) - { - btVector3 vertexPosition( m_vertexData.getVertexPositions()[vertexIndex].getX(), m_vertexData.getVertexPositions()[vertexIndex].getY(), m_vertexData.getVertexPositions()[vertexIndex].getZ() ); - startBound.setX( btMin( startBound.getX(), vertexPosition.getX() ) ); - startBound.setY( btMin( startBound.getY(), vertexPosition.getY() ) ); - startBound.setZ( btMin( startBound.getZ(), vertexPosition.getZ() ) ); - - endBound.setX( btMax( endBound.getX(), vertexPosition.getX() ) ); - endBound.setY( btMax( endBound.getY(), vertexPosition.getY() ) ); - endBound.setZ( btMax( endBound.getZ(), vertexPosition.getZ() ) ); - } - - m_softBodySet[clothIndex]->updateBounds( startBound, endBound ); - } -} - - -class btCPUSB_QuickSortCompare -{ - public: - - bool operator() ( const btCPUCollisionShapeDescription& a, const btCPUCollisionShapeDescription& b ) - { - return ( a.softBodyIdentifier < b.softBodyIdentifier ); - } -}; - -/** - * Sort the collision object details array and generate indexing into it for the per-cloth collision object array. - */ -void btCPUSoftBodySolver::prepareCollisionConstraints() -{ - // First do a simple sort on the collision objects - btAlignedObjectArray numObjectsPerClothPrefixSum; - btAlignedObjectArray numObjectsPerCloth; - numObjectsPerCloth.resize( m_softBodySet.size(), 0 ); - numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 ); - - if (!m_perClothCollisionObjects.size()) - return; - - m_collisionObjectDetails.quickSort( btCPUSB_QuickSortCompare() ); - - // Generating indexing for perClothCollisionObjects - // First clear the previous values with the "no collision object for cloth" constant - for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex ) - { - m_perClothCollisionObjects[clothIndex].firstObject = -1; - m_perClothCollisionObjects[clothIndex].endObject = -1; - } - int currentCloth = 0; - int startIndex = 0; - for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject ) - { - int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier; - if( nextCloth != currentCloth ) - { - // Changed cloth in the array - // Set the end index and the range is what we need for currentCloth - m_perClothCollisionObjects[currentCloth].firstObject = startIndex; - m_perClothCollisionObjects[currentCloth].endObject = collisionObject; - currentCloth = nextCloth; - startIndex = collisionObject; - } - } - - // And update last cloth - m_perClothCollisionObjects[currentCloth].firstObject = startIndex; - m_perClothCollisionObjects[currentCloth].endObject = m_collisionObjectDetails.size(); - -} // prepareCollisionConstraints - - -void btCPUSoftBodySolver::solveConstraints( float solverdt ) -{ - using Vectormath::Aos::Vector3; - using Vectormath::Aos::Point3; - using Vectormath::Aos::lengthSqr; - using Vectormath::Aos::dot; - - // Prepare links - int numLinks = m_linkData.getNumLinks(); - int numVertices = m_vertexData.getNumVertices(); - - float kst = 1.f; - - for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex ) - { - btSoftBodyLinkData::LinkNodePair &nodePair( m_linkData.getVertexPair(linkIndex) ); - Vector3 currentLength = m_vertexData.getPreviousPosition( nodePair.vertex1 ) - m_vertexData.getPreviousPosition( nodePair.vertex0 ); - m_linkData.getCurrentLength(linkIndex) = currentLength; - - // If mass at both ends of links is 0 (both static points) then we don't want this information. - // In reality this would be a fairly pointless link, but it could have been inserted - float linkLengthRatio = 0; - if( m_linkData.getMassLSC(linkIndex) > 0 ) - linkLengthRatio = 1.f/(lengthSqr(currentLength) * m_linkData.getMassLSC(linkIndex)); - m_linkData.getLinkLengthRatio(linkIndex) = linkLengthRatio; - - } - - - prepareCollisionConstraints(); - - - for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration ) - { - // Solve velocity - for(int linkIndex = 0; linkIndex < numLinks; ++linkIndex) - { - - int vertexIndex0 = m_linkData.getVertexPair(linkIndex).vertex0; - int vertexIndex1 = m_linkData.getVertexPair(linkIndex).vertex1; - - float j = -dot(m_linkData.getCurrentLength(linkIndex), m_vertexData.getVelocity(vertexIndex0) - m_vertexData.getVelocity(vertexIndex1)) * m_linkData.getLinkLengthRatio(linkIndex)*kst; - - // If both ends of the link have no mass then this will be zero. Catch that case. - // TODO: Should really catch the /0 in the link setup, too - //if(psb->m_linksc0[i]>0) - { - m_vertexData.getVelocity(vertexIndex0) = m_vertexData.getVelocity(vertexIndex0) + m_linkData.getCurrentLength(linkIndex)*j*m_vertexData.getInverseMass(vertexIndex0); - m_vertexData.getVelocity(vertexIndex1) = m_vertexData.getVelocity(vertexIndex1) - m_linkData.getCurrentLength(linkIndex)*j*m_vertexData.getInverseMass(vertexIndex1); - } - } - } - - // Compute new positions from velocity - // Also update the previous position so that our position computation is now based on the new position from the velocity solution - // rather than based directly on the original positions - if( m_numberOfVelocityIterations > 0 ) - { - for(int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex) - { - m_vertexData.getPosition(vertexIndex) = m_vertexData.getPreviousPosition(vertexIndex) + m_vertexData.getVelocity(vertexIndex) * solverdt; - m_vertexData.getPreviousPosition(vertexIndex) = m_vertexData.getPosition(vertexIndex); - } - } - - // Solve drift - for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration ) - { - for( int clothIndex = 0; clothIndex < m_softBodySet.size(); ++clothIndex ) - { - btAcceleratedSoftBodyInterface *currentCloth = m_softBodySet[clothIndex]; - - const int startLink = currentCloth->getFirstLink(); - const int numLinks = currentCloth->getNumLinks(); - - int endLink = startLink + numLinks; - for(int linkIndex = startLink; linkIndex < endLink; ++linkIndex) - { - int vertexIndex0 = m_linkData.getVertexPair(linkIndex).vertex0; - int vertexIndex1 = m_linkData.getVertexPair(linkIndex).vertex1; - - float massLSC = m_linkData.getMassLSC(linkIndex); - if( massLSC > 0.f ) - { - Point3 &vertexPosition0( m_vertexData.getPosition( vertexIndex0 ) ); - Point3 &vertexPosition1( m_vertexData.getPosition( vertexIndex1 ) ); - - Vector3 del = vertexPosition1 - vertexPosition0; - float len = lengthSqr(del); - float restLength2 = m_linkData.getRestLengthSquared(linkIndex); - float k = ((restLength2 - len) / (massLSC * (restLength2 + len) ) )*kst; - - vertexPosition0 -= del*(k*m_vertexData.getInverseMass(vertexIndex0)); - vertexPosition1 += del*(k*m_vertexData.getInverseMass(vertexIndex1)); - } - } - } - } - - // Clear forces so that friction is applied correctly - for( int clothIndex = 0; clothIndex < m_softBodySet.size(); ++clothIndex ) - { - btAcceleratedSoftBodyInterface *currentCloth = m_softBodySet[clothIndex]; - - const int startLink = currentCloth->getFirstLink(); - const int numLinks = currentCloth->getNumLinks(); - const int startVertex = currentCloth->getFirstVertex(); - const int numVertices = currentCloth->getNumVertices(); - const int lastVertex = startVertex + numVertices; - // Update the velocities based on the change in position - // TODO: Damping should only be applied to the action of link constraints so the cloth still falls but then moves stiffly once it hits something - float velocityCoefficient = (1.f - m_perClothDampingFactor[clothIndex]); - float velocityCorrectionCoefficient = m_perClothVelocityCorrectionCoefficient[clothIndex]; - float isolverDt = 1.f/solverdt; - - for(int vertexIndex = startVertex; vertexIndex < lastVertex; ++vertexIndex) - { - m_vertexData.getForceAccumulator( vertexIndex ) = Vector3(0.f, 0.f, 0.f); - } - } - - - - - // Solve collision constraints - // Very simple solver that pushes the vertex out of collision imposters for now - // to test integration with the broad phase code. - // May also want to put this into position solver loop every n iterations depending on - // how it behaves - for( int clothIndex = 0; clothIndex < m_softBodySet.size(); ++clothIndex ) - { - btAcceleratedSoftBodyInterface *currentCloth = m_softBodySet[clothIndex]; - - float clothFriction = currentCloth->getSoftBody()->getFriction(); - - const int startVertex = currentCloth->getFirstVertex(); - const int numVertices = currentCloth->getNumVertices(); - int endVertex = startVertex + numVertices; - - float velocityCoefficient = (1.f - m_perClothDampingFactor[clothIndex]); - float velocityCorrectionCoefficient = m_perClothVelocityCorrectionCoefficient[clothIndex]; - float isolverDt = 1.f/solverdt; - - int startObject = m_perClothCollisionObjects[clothIndex].firstObject; - int endObject = m_perClothCollisionObjects[clothIndex].endObject; - - if( endObject == startObject ) - { - // No collisions so just do the force update - for(int vertexIndex = startVertex; vertexIndex < endVertex; ++vertexIndex) - { - m_vertexData.getForceAccumulator( vertexIndex ) = Vector3(0.f, 0.f, 0.f); - } - - // Recompute velocity based on updated position inclusive of drift - for(int vertexIndex = startVertex; vertexIndex < endVertex; ++vertexIndex) - { - m_vertexData.getVelocity(vertexIndex) = (m_vertexData.getPosition(vertexIndex) - m_vertexData.getPreviousPosition(vertexIndex)) * velocityCoefficient * isolverDt; - } - } else { - - for( int collisionObject = startObject; collisionObject < endObject; ++collisionObject ) - { - btCPUCollisionShapeDescription &shapeDescription( m_collisionObjectDetails[collisionObject] ); - - float colliderFriction = shapeDescription.friction; - - if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE ) - { - using namespace Vectormath::Aos; - - float capsuleHalfHeight = shapeDescription.shapeInformation.capsule.halfHeight; - float capsuleRadius = shapeDescription.shapeInformation.capsule.radius; - int capsuleUpAxis = shapeDescription.shapeInformation.capsule.upAxis; - float capsuleMargin = shapeDescription.margin; - - Transform3 worldTransform = shapeDescription.shapeTransform; - - // As this is a GPU comparison solver just iterate over the vertices - for( int vertexIndex = startVertex; vertexIndex < endVertex; ++vertexIndex ) - { - // Clear force for vertex first - m_vertexData.getForceAccumulator( vertexIndex ) = Vector3(0.f, 0.f, 0.f); - - Point3 vertex( m_vertexData.getPosition( vertexIndex ) ); - - // Correctly define the centerline depending on the upAxis - Point3 c1(0.f, 0.f, 0.f); - Point3 c2(0.f, 0.f, 0.f); - if( capsuleUpAxis == 0 ) { - c1.setX(-capsuleHalfHeight); - c2.setX(capsuleHalfHeight); - } else if( capsuleUpAxis == 1 ) { - c1.setY(-capsuleHalfHeight); - c2.setY(capsuleHalfHeight); - } else { - c1.setZ(-capsuleHalfHeight); - c2.setZ(capsuleHalfHeight); - } - - Point3 worldC1 = worldTransform * c1; - Point3 worldC2 = worldTransform * c2; - Vector3 segment = worldC2 - worldC1; - - // compute distance of tangent to vertex along line segment in capsule - float distanceAlongSegment = -( dot( worldC1 - vertex, segment ) / lengthSqr(segment) ); - - Point3 closestPoint = (worldC1 + segment * distanceAlongSegment); - float distanceFromLine = length(vertex - closestPoint); - float distanceFromC1 = length(worldC1 - vertex); - float distanceFromC2 = length(worldC2 - vertex); - - // Final distance from collision, point to push from, direction to push in - // for impulse force - float distance; - Point3 sourcePoint; - Vector3 normalVector; - if( distanceAlongSegment < 0 ) - { - distance = distanceFromC1; - sourcePoint = worldC1; - normalVector = normalize(vertex - worldC1); - } else if( distanceAlongSegment > 1.f ) { - distance = distanceFromC2; - sourcePoint = worldC2; - normalVector = normalize(vertex - worldC2); - } else { - distance = distanceFromLine; - sourcePoint = closestPoint; - normalVector = normalize(vertex - closestPoint); - } - - Vector3 colliderLinearVelocity( shapeDescription.linearVelocity ); - Vector3 colliderAngularVelocity( shapeDescription.angularVelocity ); - Vector3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, Vector3(vertex) - worldTransform.getTranslation()); - - float minDistance = capsuleRadius + capsuleMargin; - bool collided = false; - - if( distance < minDistance ) - { - // Project back to surface along normal - Vectormath::Aos::Point3 sourcePos = m_vertexData.getPosition( vertexIndex ); - Vectormath::Aos::Vector3 posChange = (minDistance - distance)*normalVector*0.9; - //if( length(posChange) > 1 ) - // std::cerr << "Poschange: " << length(posChange) << "\n"; - - Vectormath::Aos::Point3 newPos = sourcePos + posChange; - m_vertexData.getPosition( vertexIndex ) = newPos; - //m_vertexData.getPosition( vertexIndex ) = m_vertexData.getPosition( vertexIndex ) + (minDistance - distance)*normalVector*0.9; - - // Experiment with moving back along source vector. - // Removes all ability to slide because it projects back along the vector length so it would undo lateral movement. - // TODO: This isn't quite right because we should take the movement of the collider into account as well - /*Vector3 incomingMoveVector( normalize(m_vertexData.getPreviousPosition(vertexIndex) - m_vertexData.getPosition(vertexIndex)) ); - Vector3 normalDirectionMoveOut( (minDistance - distance)*normalVector*0.9 ); - float distanceOnIncomingVector = dot(normalDirectionMoveOut, incomingMoveVector); - Vector3 vectorCorrection( distanceOnIncomingVector*incomingMoveVector ); - m_vertexData.getPosition( vertexIndex ) = m_vertexData.getPosition( vertexIndex ) + vectorCorrection;*/ - - - collided = true; - } - - // Update velocity of vertex based on position - m_vertexData.getVelocity(vertexIndex) = (m_vertexData.getPosition(vertexIndex) - m_vertexData.getPreviousPosition(vertexIndex)) * velocityCoefficient * isolverDt; - - // If we collided before we are on the surface so have friction - if( collided ) - { - // Compute friction - - // TODO: Just vertex velocity not enough, really we need the velocity - // relative to closest point on the surface of the collider - Vector3 vertexVelocity( m_vertexData.getVelocity(vertexIndex) ); - Vector3 relativeVelocity( vertexVelocity - velocityOfSurfacePoint ); - - - // First compute vectors for plane perpendicular to normal vector - // Cross any vector with normal vector first then cross the normal with it again - Vector3 p1(normalize(cross(normalVector, segment))); - Vector3 p2(normalize(cross(p1, normalVector))); - // Full friction is sum of velocities in each direction of plane. - Vector3 frictionVector(p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2)); - - // Real friction is peak friction corrected by friction coefficients. - frictionVector = frictionVector*(colliderFriction*clothFriction); - - float approachSpeed = dot( relativeVelocity, normalVector ); - - // For now just update vertex position by moving to radius distance along the push vector - // Could use this as the basis for simple vector distance constraint for the point later, possibly? - // That way in the main solver loop all shape types could be the same... though when - // we need to apply bi-directionally it becomes more complicated - - // Add friction vector to the force accumulator - Vector3 ¤tForce( m_vertexData.getForceAccumulator( vertexIndex ) ); - - // Only apply if the vertex is moving towards the object to reduce jitter error - if( approachSpeed <= 0.0 ) - currentForce -= frictionVector; - } - } - } - } // for( int collisionObject = startObject; collisionObject < endObject; ++collisionObject ) - } // if( endObject == startObject ) - } - - - - -} // btCPUSoftBodySolver::solveConstraints - - -btCPUSoftBodySolver::btAcceleratedSoftBodyInterface *btCPUSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) -{ - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; - if( softBodyInterface->getSoftBody() == softBody ) - return softBodyInterface; - } - return 0; -} - -const btCPUSoftBodySolver::btAcceleratedSoftBodyInterface * const btCPUSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) const -{ - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - const btAcceleratedSoftBodyInterface *const softBodyInterface = m_softBodySet[softBodyIndex]; - if( softBodyInterface->getSoftBody() == softBody ) - return softBodyInterface; - } - return 0; -} - -int btCPUSoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody ) -{ - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex]; - if( softBodyInterface->getSoftBody() == softBody ) - return softBodyIndex; - } - return 1; -} - -void btSoftBodySolverOutputCPUtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ) -{ - // Currently only support CPU output buffers - - const btSoftBodySolver *solver = softBody->getSoftBodySolver(); - btAssert( solver->getSolverType() == btSoftBodySolver::CPU_SOLVER ); - const btCPUSoftBodySolver *cpuSolver = static_cast< const btCPUSoftBodySolver * >( solver ); - const btCPUSoftBodySolver::btAcceleratedSoftBodyInterface * const currentCloth = cpuSolver->findSoftBodyInterface( softBody ); - const btSoftBodyVertexData &vertexData( cpuSolver->m_vertexData ); - - if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER ) - { - const int firstVertex = currentCloth->getFirstVertex(); - const int lastVertex = firstVertex + currentCloth->getNumVertices(); - const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer); - float *basePointer = cpuVertexBuffer->getBasePointer(); - - if( vertexBuffer->hasVertexPositions() ) - { - const int vertexOffset = cpuVertexBuffer->getVertexOffset(); - const int vertexStride = cpuVertexBuffer->getVertexStride(); - float *vertexPointer = basePointer + vertexOffset; - - for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) - { - Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex); - *(vertexPointer + 0) = position.getX(); - *(vertexPointer + 1) = position.getY(); - *(vertexPointer + 2) = position.getZ(); - vertexPointer += vertexStride; - } - } - if( vertexBuffer->hasNormals() ) - { - const int normalOffset = cpuVertexBuffer->getNormalOffset(); - const int normalStride = cpuVertexBuffer->getNormalStride(); - float *normalPointer = basePointer + normalOffset; - - for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex ) - { - Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex); - *(normalPointer + 0) = normal.getX(); - *(normalPointer + 1) = normal.getY(); - *(normalPointer + 2) = normal.getZ(); - normalPointer += normalStride; - } - } - } else { - btAssert( 0=="Invalid vertex buffer descriptor used in CPU output." ); - } -} // btCPUSoftBodySolver::outputToVertexBuffers - -void btCPUSoftBodySolver::processCollision( btSoftBody*, btSoftBody *) -{ - -} - -// Add the collision object to the set to deal with for a particular soft body -void btCPUSoftBodySolver::processCollision( btSoftBody *softBody, btCollisionObject* collisionObject ) -{ - int softBodyIndex = findSoftBodyIndex( softBody ); - - if( softBodyIndex >= 0 ) - { - btCollisionShape *collisionShape = collisionObject->getCollisionShape(); - float friction = collisionObject->getFriction(); - int shapeType = collisionShape->getShapeType(); - if( shapeType == CAPSULE_SHAPE_PROXYTYPE ) - { - // Add to the list of expected collision objects - btCPUCollisionShapeDescription newCollisionShapeDescription; - newCollisionShapeDescription.softBodyIdentifier = softBodyIndex; - newCollisionShapeDescription.collisionShapeType = shapeType; - newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform()); - btCapsuleShape *capsule = static_cast( collisionShape ); - newCollisionShapeDescription.shapeInformation.capsule.radius = capsule->getRadius(); - newCollisionShapeDescription.shapeInformation.capsule.halfHeight = capsule->getHalfHeight(); - newCollisionShapeDescription.shapeInformation.capsule.upAxis = capsule->getUpAxis(); - newCollisionShapeDescription.margin = capsule->getMargin(); - newCollisionShapeDescription.friction = friction; - btRigidBody* body = static_cast< btRigidBody* >( collisionObject ); - newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity()); - newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity()); - m_collisionObjectDetails.push_back( newCollisionShapeDescription ); - } else { - btAssert("Unsupported collision shape type\n"); - } - } else { - btAssert("Unknown soft body"); - } -} // btCPUSoftBodySolver::processCollision - - -void btCPUSoftBodySolver::predictMotion( float timeStep ) -{ - // Fill the force arrays with current acceleration data etc - m_perClothWindVelocity.resize( m_softBodySet.size() ); - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody(); - - m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity()); - } - - - // Apply forces that we know about to the cloths - applyForces( timeStep * getTimeScale() ); - - // Itegrate motion for all soft bodies dealt with by the solver - integrate( timeStep * getTimeScale() ); - - // Update bounds - // Will update the bounds for all softBodies being dealt with by the solver and - // set the values in the btSoftBody object - updateBounds(); - - // End prediction work for solvers -} - - diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h deleted file mode 100644 index 25d04cd9b..000000000 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolver_CPU.h +++ /dev/null @@ -1,370 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef BT_ACCELERATED_SOFT_BODY_CPU_SOLVER_H -#define BT_ACCELERATED_SOFT_BODY_CPU_SOLVER_H - -#include "vectormath/vmInclude.h" -#include "BulletSoftBody/btSoftBodySolvers.h" -#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" - -struct btCPUCollisionShapeDescription -{ - int softBodyIdentifier; - int collisionShapeType; - Vectormath::Aos::Transform3 shapeTransform; - union - { - struct Sphere - { - float radius; - } sphere; - struct Capsule - { - float radius; - float halfHeight; - int upAxis; - } capsule; - } shapeInformation; - - float margin; - float friction; - Vectormath::Aos::Vector3 linearVelocity; - Vectormath::Aos::Vector3 angularVelocity; - - btCPUCollisionShapeDescription() - { - collisionShapeType = 0; - margin = 0; - friction = 0; - } -}; - -class btCPUSoftBodySolver : public btSoftBodySolver -{ -protected: - /** - * Entry in the collision shape array. - * Specifies the shape type, the transform matrix and the necessary details of the collisionShape. - */ - - - // Public because output classes need it. This is a better encapsulation to break in the short term - // Than having the solvers themselves need dependencies on DX, CL etc unnecessarily -public: - - struct CollisionObjectIndices - { - CollisionObjectIndices( int f, int e ) - { - firstObject = f; - endObject = e; - } - - int firstObject; - int endObject; - }; - - /** - * SoftBody class to maintain information about a soft body instance - * within a solver. - * This data addresses the main solver arrays. - */ - class btAcceleratedSoftBodyInterface - { - protected: - /** Current number of vertices that are part of this cloth */ - int m_numVertices; - /** Maximum number of vertices allocated to be part of this cloth */ - int m_maxVertices; - /** Current number of triangles that are part of this cloth */ - int m_numTriangles; - /** Maximum number of triangles allocated to be part of this cloth */ - int m_maxTriangles; - /** Index of first vertex in the world allocated to this cloth */ - int m_firstVertex; - /** Index of first triangle in the world allocated to this cloth */ - int m_firstTriangle; - /** Index of first link in the world allocated to this cloth */ - int m_firstLink; - /** Maximum number of links allocated to this cloth */ - int m_maxLinks; - /** Current number of links allocated to this cloth */ - int m_numLinks; - - /** The actual soft body this data represents */ - btSoftBody *m_softBody; - - - public: - btAcceleratedSoftBodyInterface( btSoftBody *softBody ) : - m_softBody( softBody ) - { - m_numVertices = 0; - m_maxVertices = 0; - m_numTriangles = 0; - m_maxTriangles = 0; - m_firstVertex = 0; - m_firstTriangle = 0; - m_firstLink = 0; - m_maxLinks = 0; - m_numLinks = 0; - } - int getNumVertices() const - { - return m_numVertices; - } - - int getNumTriangles() const - { - return m_numTriangles; - } - - int getMaxVertices() const - { - return m_maxVertices; - } - - int getMaxTriangles() const - { - return m_maxTriangles; - } - - int getFirstVertex() const - { - return m_firstVertex; - } - - int getFirstTriangle() const - { - return m_firstTriangle; - } - - /** - * Update the bounds in the btSoftBody object - */ - void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound ); - - // TODO: All of these set functions will have to do checks and - // update the world because restructuring of the arrays will be necessary - // Reasonable use of "friend"? - void setNumVertices( int numVertices ) - { - m_numVertices = numVertices; - } - - void setNumTriangles( int numTriangles ) - { - m_numTriangles = numTriangles; - } - - void setMaxVertices( int maxVertices ) - { - m_maxVertices = maxVertices; - } - - void setMaxTriangles( int maxTriangles ) - { - m_maxTriangles = maxTriangles; - } - - void setFirstVertex( int firstVertex ) - { - m_firstVertex = firstVertex; - } - - void setFirstTriangle( int firstTriangle ) - { - m_firstTriangle = firstTriangle; - } - - void setMaxLinks( int maxLinks ) - { - m_maxLinks = maxLinks; - } - - void setNumLinks( int numLinks ) - { - m_numLinks = numLinks; - } - - void setFirstLink( int firstLink ) - { - m_firstLink = firstLink; - } - - int getMaxLinks() const - { - return m_maxLinks; - } - - int getNumLinks() const - { - return m_numLinks; - } - - int getFirstLink() const - { - return m_firstLink; - } - - btSoftBody* getSoftBody() - { - return m_softBody; - } - - const btSoftBody* const getSoftBody() const - { - return m_softBody; - } - }; - - btSoftBodyLinkData m_linkData; - btSoftBodyVertexData m_vertexData; - btSoftBodyTriangleData m_triangleData; - -protected: - - - - - /** Variable to define whether we need to update solver constants on the next iteration */ - bool m_updateSolverConstants; - - /** - * Cloths owned by this solver. - * Only our cloths are in this array. - */ - btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet; - - /** Acceleration value to be applied to all non-static vertices in the solver. - * Index n is cloth n, array sized by number of cloths in the world not the solver. - */ - btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothAcceleration; - - /** Wind velocity to be applied normal to all non-static vertices in the solver. - * Index n is cloth n, array sized by number of cloths in the world not the solver. - */ - btAlignedObjectArray< Vectormath::Aos::Vector3 > m_perClothWindVelocity; - - /** Velocity damping factor */ - btAlignedObjectArray< float > m_perClothDampingFactor; - - /** Velocity correction coefficient */ - btAlignedObjectArray< float > m_perClothVelocityCorrectionCoefficient; - - /** Lift parameter for wind effect on cloth. */ - btAlignedObjectArray< float > m_perClothLiftFactor; - - /** Drag parameter for wind effect on cloth. */ - btAlignedObjectArray< float > m_perClothDragFactor; - - /** Density of the medium in which each cloth sits */ - btAlignedObjectArray< float > m_perClothMediumDensity; - - /** - * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects. - */ - btAlignedObjectArray< CollisionObjectIndices > m_perClothCollisionObjects; - - /** - * Collision shapes being passed across to the cloths in this solver. - */ - btAlignedObjectArray< btCPUCollisionShapeDescription > m_collisionObjectDetails; - - - void prepareCollisionConstraints(); - - Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a ); - - void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce ); - - float computeTriangleArea( - const Vectormath::Aos::Point3 &vertex0, - const Vectormath::Aos::Point3 &vertex1, - const Vectormath::Aos::Point3 &vertex2 ); - - void applyForces( float solverdt ); - void integrate( float solverdt ); - void updateConstants( float timeStep ); - int findSoftBodyIndex( const btSoftBody* const softBody ); - - /** Update the bounds of the soft body objects in the solver */ - void updateBounds(); - - -public: - btCPUSoftBodySolver(); - - virtual ~btCPUSoftBodySolver(); - - - virtual SolverTypes getSolverType() const - { - return CPU_SOLVER; - } - - - virtual btSoftBodyLinkData &getLinkData(); - - virtual btSoftBodyVertexData &getVertexData(); - - virtual btSoftBodyTriangleData &getTriangleData(); - - - - btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody ); - const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const; - - - - virtual bool checkInitialized(); - - virtual void updateSoftBodies( ); - - virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false); - - virtual void copyBackToSoftBodies(); - - virtual void solveConstraints( float solverdt ); - - virtual void predictMotion( float solverdt ); - - virtual void processCollision( btSoftBody *, btCollisionObject* ); - - virtual void processCollision( btSoftBody*, btSoftBody *); - -}; - - -/** - * Class to manage movement of data from a solver to a given target. - * This version is the CPU to CPU generic version. - */ -class btSoftBodySolverOutputCPUtoCPU : public btSoftBodySolverOutput -{ -protected: - -public: - btSoftBodySolverOutputCPUtoCPU() - { - } - - /** Output current computed vertex data to the vertex buffers for all cloths in the solver. */ - virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ); -}; - -#endif // #ifndef BT_ACCELERATED_SOFT_BODY_CPU_SOLVER_H \ No newline at end of file diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt index 393d37d6a..e488771d1 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt @@ -8,7 +8,7 @@ SET(DX11_INCLUDE_PATH "${DIRECTX_SDK_BASE_DIR}/Include" CACHE DOCSTRING "Micros INCLUDE_DIRECTORIES( -${DX11_INCLUDE_PATH} "../cpu/" +${DX11_INCLUDE_PATH} "../Shared/" ${VECTOR_MATH_INCLUDE} ) @@ -20,7 +20,7 @@ SET(BulletSoftBodyDX11Solvers_SRCS SET(BulletSoftBodyDX11Solvers_HDRS btSoftBodySolver_DX11.h btSoftBodySolver_DX11SIMDAware.h - ../cpu/btSoftBodySolverData.h + ../Shared/btSoftBodySolverData.h btSoftBodySolverVertexData_DX11.h btSoftBodySolverTriangleData_DX11.h btSoftBodySolverLinkData_DX11.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h index 555483d2d..0f753cec2 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h @@ -14,7 +14,7 @@ subject to the following restrictions: */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_DX11.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h index 92864a159..82ce46a3b 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h @@ -13,7 +13,7 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_DX11.h" #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h index 9d78d91f5..fc06a2722 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h @@ -13,7 +13,7 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_DX11.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h index 2216cb9e0..2d460174a 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h @@ -14,7 +14,7 @@ subject to the following restrictions: */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_DX11.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt index cdb825a79..995e275b7 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt @@ -18,7 +18,7 @@ SET(BulletSoftBodyOpenCLSolvers_SRCS SET(BulletSoftBodyOpenCLSolvers_HDRS ../btSoftBodySolver_OpenCL.h ../btSoftBodySolver_OpenCLSIMDAware.h - ../../CPU/btSoftBodySolverData.h + ../../Shared/btSoftBodySolverData.h ../btSoftBodySolverVertexData_OpenCL.h ../btSoftBodySolverTriangleData_OpenCL.h ../btSoftBodySolverLinkData_OpenCL.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt index 5c54c6587..3db66c9db 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt @@ -13,7 +13,7 @@ SET(BulletSoftBodyOpenCLSolvers_SRCS SET(BulletSoftBodyOpenCLSolvers_HDRS ../btSoftBodySolver_OpenCL.h - ../../CPU/btSoftBodySolverData.h + ../../Shared/btSoftBodySolverData.h ../btSoftBodySolverVertexData_OpenCL.h ../btSoftBodySolverTriangleData_OpenCL.h ../btSoftBodySolverLinkData_OpenCL.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt index a6426dcab..3b77c2100 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt @@ -18,7 +18,7 @@ SET(BulletSoftBodyOpenCLSolvers_SRCS SET(BulletSoftBodyOpenCLSolvers_HDRS ../btSoftBodySolver_OpenCL.h ../btSoftBodySolver_OpenCLSIMDAware.h - ../../CPU/btSoftBodySolverData.h + ../../Shared/btSoftBodySolverData.h ../btSoftBodySolverVertexData_OpenCL.h ../btSoftBodySolverTriangleData_OpenCL.h ../btSoftBodySolverLinkData_OpenCL.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt index d4627b0f8..72d0a7310 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt @@ -14,7 +14,7 @@ SET(BulletSoftBodyOpenCLSolvers_SRCS SET(BulletSoftBodyOpenCLSolvers_HDRS ../btSoftBodySolver_OpenCL.h - ../../CPU/btSoftBodySolverData.h + ../../Shared/btSoftBodySolverData.h ../btSoftBodySolverVertexData_OpenCL.h ../btSoftBodySolverTriangleData_OpenCL.h ../btSoftBodySolverLinkData_OpenCL.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp index 79b0ac234..0c4da2c15 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp @@ -24,9 +24,12 @@ subject to the following restrictions: #include "../OpenCLC10/UpdateNormals.cl" #include "../OpenCLC10/UpdatePositions.cl" #include "../OpenCLC10/UpdatePositionsFromVelocities.cl" -//#include "../OpenCLC10/VSolveLinks.cl" +#include "../OpenCLC10/VSolveLinks.cl" +//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" + MINICL_REGISTER(PrepareLinksKernel) +MINICL_REGISTER(VSolveLinksKernel) MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel) MINICL_REGISTER(SolvePositionsFromLinksKernel) MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel) @@ -38,3 +41,208 @@ MINICL_REGISTER(NormalizeNormalsAndAreasKernel) MINICL_REGISTER(UpdateSoftBodiesKernel) +float mydot3a(float4 a, float4 b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + + +typedef struct +{ + int firstObject; + int endObject; +} CollisionObjectIndices; + +typedef struct +{ + float4 shapeTransform[4]; // column major 4x4 matrix + float4 linearVelocity; + float4 angularVelocity; + + int softBodyIdentifier; + int collisionShapeType; + + + // Shape information + // Compressed from the union + float radius; + float halfHeight; + int upAxis; + + float margin; + float friction; + + int padding0; + +} CollisionShapeDescription; + +// From btBroadphaseProxy.h +__constant int CAPSULE_SHAPE_PROXYTYPE = 10; + +// Multiply column-major matrix against vector +float4 matrixVectorMul( float4 matrix[4], float4 vector ) +{ + float4 returnVector; + float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x); + float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y); + float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z); + float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w); + returnVector.x = dot(row0, vector); + returnVector.y = dot(row1, vector); + returnVector.z = dot(row2, vector); + returnVector.w = dot(row3, vector); + return returnVector; +} + +__kernel void +SolveCollisionsAndUpdateVelocitiesKernel( + const int numNodes, + const float isolverdt, + __global int *g_vertexClothIdentifier, + __global float4 *g_vertexPreviousPositions, + __global float * g_perClothFriction, + __global float * g_clothDampingFactor, + __global CollisionObjectIndices * g_perClothCollisionObjectIndices, + __global CollisionShapeDescription * g_collisionObjectDetails, + __global float4 * g_vertexForces, + __global float4 *g_vertexVelocities, + __global float4 *g_vertexPositions GUID_ARG) +{ + int nodeID = get_global_id(0); + float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f); + + if( get_global_id(0) < numNodes ) + { + int clothIdentifier = g_vertexClothIdentifier[nodeID]; + + // Abort if this is not a valid cloth + if( clothIdentifier < 0 ) + return; + + + float4 position (g_vertexPositions[nodeID].xyz, 1.f); + float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f); + + float clothFriction = g_perClothFriction[clothIdentifier]; + float dampingFactor = g_clothDampingFactor[clothIdentifier]; + float velocityCoefficient = (1.f - dampingFactor); + float4 difference = position - previousPosition; + float4 velocity = difference*velocityCoefficient*isolverdt; + + CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier]; + + int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject; + + if( numObjects > 0 ) + { + // We have some possible collisions to deal with + for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision ) + { + CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision]; + float colliderFriction = shapeDescription.friction; + + if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE ) + { + // Colliding with a capsule + + float capsuleHalfHeight = shapeDescription.halfHeight; + float capsuleRadius = shapeDescription.radius; + float capsuleMargin = shapeDescription.margin; + int capsuleupAxis = shapeDescription.upAxis; + + // Four columns of worldTransform matrix + float4 worldTransform[4]; + worldTransform[0] = shapeDescription.shapeTransform[0]; + worldTransform[1] = shapeDescription.shapeTransform[1]; + worldTransform[2] = shapeDescription.shapeTransform[2]; + worldTransform[3] = shapeDescription.shapeTransform[3]; + + // Correctly define capsule centerline vector + float4 c1 (0.f, 0.f, 0.f, 1.f); + float4 c2 (0.f, 0.f, 0.f, 1.f); + c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 ); + c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 ); + c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 ); + c2.x = -c1.x; + c2.y = -c1.y; + c2.z = -c1.z; + + + float4 worldC1 = matrixVectorMul(worldTransform, c1); + float4 worldC2 = matrixVectorMul(worldTransform, c2); + float4 segment = (worldC2 - worldC1); + + // compute distance of tangent to vertex along line segment in capsule + float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) ); + + float4 closestPoint = (worldC1 + (segment * distanceAlongSegment)); + float distanceFromLine = length(position - closestPoint); + float distanceFromC1 = length(worldC1 - position); + float distanceFromC2 = length(worldC2 - position); + + // Final distance from collision, point to push from, direction to push in + // for impulse force + float dist; + float4 normalVector; + if( distanceAlongSegment < 0 ) + { + dist = distanceFromC1; + normalVector = float4(normalize(position - worldC1).xyz, 0.f); + } else if( distanceAlongSegment > 1.f ) { + dist = distanceFromC2; + normalVector = float4(normalize(position - worldC2).xyz, 0.f); + } else { + dist = distanceFromLine; + normalVector = float4(normalize(position - closestPoint).xyz, 0.f); + } + + float4 colliderLinearVelocity = shapeDescription.linearVelocity; + float4 colliderAngularVelocity = shapeDescription.angularVelocity; + float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f)); + + float minDistance = capsuleRadius + capsuleMargin; + + // In case of no collision, this is the value of velocity + velocity = (position - previousPosition) * velocityCoefficient * isolverdt; + + + // Check for a collision + if( dist < minDistance ) + { + // Project back to surface along normal + position = position + float4(normalVector*(minDistance - dist)*0.9f); + velocity = (position - previousPosition) * velocityCoefficient * isolverdt; + float4 relativeVelocity = velocity - velocityOfSurfacePoint; + + float4 p1 = normalize(cross(normalVector, segment)); + float4 p2 = normalize(cross(p1, normalVector)); + // Full friction is sum of velocities in each direction of plane + float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2); + + // Real friction is peak friction corrected by friction coefficients + frictionVector = frictionVector * (colliderFriction*clothFriction); + + float approachSpeed = dot(relativeVelocity, normalVector); + + if( approachSpeed <= 0.0f ) + forceOnVertex -= frictionVector; + } + } + } + } + + g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f); + + // Update external force + g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f); + + g_vertexPositions[nodeID] = float4(position.xyz, 0.f); + } +} + + +MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel); + + + + diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt index a7e4e8014..5dcba5723 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt @@ -17,7 +17,7 @@ SET(BulletSoftBodyOpenCLSolvers_SRCS SET(BulletSoftBodyOpenCLSolvers_HDRS ../btSoftBodySolver_OpenCL.h - ../../CPU/btSoftBodySolverData.h + ../../Shared/btSoftBodySolverData.h ../btSoftBodySolverVertexData_OpenCL.h ../btSoftBodySolverTriangleData_OpenCL.h ../btSoftBodySolverLinkData_OpenCL.h diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl index 555d07a1d..8f472d121 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl @@ -78,8 +78,8 @@ ApplyForcesKernel( float4 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim); nodeF = nodeFPlusForce; - if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) - nodeF = nodeFMinus; + //if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) ) + // nodeF = nodeFMinus; g_vertexForceAccumulator[nodeID] = nodeF; } diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl index 29b04024f..099042f03 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl @@ -1,8 +1,8 @@ MSTRINGIFY( -#pragma OPENCL EXTENSION cl_amd_printf : enable\n -float mydot3(float4 a, float4 b) + +float mydot3a(float4 a, float4 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } @@ -67,7 +67,7 @@ SolveCollisionsAndUpdateVelocitiesKernel( __global CollisionShapeDescription * g_collisionObjectDetails, __global float4 * g_vertexForces, __global float4 *g_vertexVelocities, - __global float4 *g_vertexPositions) + __global float4 *g_vertexPositions GUID_ARG) { int nodeID = get_global_id(0); float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f); @@ -134,7 +134,7 @@ SolveCollisionsAndUpdateVelocitiesKernel( float4 segment = (worldC2 - worldC1); // compute distance of tangent to vertex along line segment in capsule - float distanceAlongSegment = -( mydot3( (worldC1 - position), segment ) / mydot3(segment, segment) ); + float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) ); float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment)); float distanceFromLine = length(position - closestPoint); @@ -178,7 +178,7 @@ SolveCollisionsAndUpdateVelocitiesKernel( float4 p1 = normalize(cross(normalVector, segment)); float4 p2 = normalize(cross(p1, normalVector)); // Full friction is sum of velocities in each direction of plane - float4 frictionVector = p1*mydot3(relativeVelocity, p1) + p2*mydot3(relativeVelocity, p2); + float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2); // Real friction is peak friction corrected by friction coefficients frictionVector = frictionVector * (colliderFriction*clothFriction); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h index cef924f6f..0092c8f62 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h @@ -13,7 +13,7 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_OpenCL.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h index 5e3511040..7a6e72699 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h @@ -13,7 +13,7 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_OpenCL.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h index e1094e38a..9cafe8371 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h @@ -14,7 +14,7 @@ subject to the following restrictions: */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_OpenCL.h" diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h index 24997e726..a077978cb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h @@ -13,7 +13,7 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ -#include "BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h" +#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h" #include "btSoftBodySolverBuffer_OpenCL.h" #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp index fccafa1ab..d43b9517e 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp @@ -1642,10 +1642,11 @@ void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollision m_collisionObjectDetails.push_back( newCollisionShapeDescription ); } else { - btAssert(0 && "Unsupported collision shape type\n"); + printf("Unsupported collision shape type\n"); + //btAssert(0 && "Unsupported collision shape type\n"); } } else { - btAssert("Unknown soft body"); + btAssert(0,"Unknown soft body"); } } // btOpenCLSoftBodySolver::processCollision diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h similarity index 100% rename from src/BulletMultiThreaded/GpuSoftBodySolvers/CPU/btSoftBodySolverData.h rename to src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h diff --git a/src/BulletSoftBody/btSoftBody.cpp b/src/BulletSoftBody/btSoftBody.cpp index 4036877b1..0d7d98b3a 100644 --- a/src/BulletSoftBody/btSoftBody.cpp +++ b/src/BulletSoftBody/btSoftBody.cpp @@ -1822,7 +1822,7 @@ btScalar btSoftBody::RayFromToCaster::rayFromToTriangle( const btVector3& rayF void btSoftBody::pointersToIndices() { #define PTR2IDX(_p_,_b_) reinterpret_cast((_p_)-(_b_)) - btSoftBody::Node* base=&m_nodes[0]; + btSoftBody::Node* base=m_nodes.size() ? &m_nodes[0] : 0; int i,ni; for(i=0,ni=m_nodes.size();i