diff --git a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp index 2afa2b2fd..e5c1a825c 100644 --- a/Demos/OpenCLClothDemo/cl_cloth_demo.cpp +++ b/Demos/OpenCLClothDemo/cl_cloth_demo.cpp @@ -16,12 +16,16 @@ subject to the following restrictions: #ifdef _WIN32 #include #endif +#ifndef __APPLE__ #ifndef USE_MINICL #define USE_SIMDAWARE_SOLVER #define USE_GPU_SOLVER -#define USE_GPU_COPY +#ifdef _WIN32 +#define USE_GPU_COPY //only tested on Windows, may work under Linux +#endif //_WIN32 #endif //USE_MINICL +#endif @@ -550,7 +554,9 @@ int main(int argc, char *argv[]) preInitGL(argc, argv); +#ifdef _WIN32 glewInit(); +#endif #ifdef USE_GPU_COPY #ifdef _WIN32 diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt index 8ffe074dd..5c54c6587 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt @@ -8,6 +8,7 @@ ${BULLET_PHYSICS_SOURCE_DIR}/src SET(BulletSoftBodyOpenCLSolvers_SRCS ../btSoftBodySolver_OpenCL.cpp + ../btSoftBodySolver_OpenCLSIMDAware.cpp ) SET(BulletSoftBodyOpenCLSolvers_HDRS diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h index 4d5ae31a4..3a30b0ecb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h @@ -25,11 +25,9 @@ subject to the following restrictions: #include #else #include + #include #endif //__APPLE__ #endif//USE_MINICL -#ifndef USE_MINICL -#include -#endif //USE_MINICL #ifdef _WIN32//for glut.h @@ -165,4 +163,4 @@ public: } }; -#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H \ No newline at end of file +#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp index ad2edfbda..357bb6c4e 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp @@ -22,6 +22,7 @@ subject to the following restrictions: #include "BulletSoftBody/btSoftBody.h" #include "BulletCollision/CollisionShapes/btCapsuleShape.h" #include "LinearMath/btQuickprof.h" +#include #ifdef USE_MINICL #include "MiniCL/cl.h" @@ -65,8 +66,6 @@ static char* UpdateNormalsCLString = #include "OpenCLC/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC/VSolveLinks.cl" -static char* ComputeBoundsCLString = -#include "OpenCLC/ComputeBounds.cl" static char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC/SolveCollisionsAndUpdateVelocities.cl" #else @@ -92,8 +91,6 @@ static char* UpdateNormalsCLString = #include "OpenCLC10/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC10/VSolveLinks.cl" -static char* ComputeBoundsCLString = -#include "OpenCLC10/ComputeBounds.cl" static char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl" #endif //CL_VERSION_1_1 @@ -617,8 +614,6 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ), m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ), m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ), - m_clPerClothMinBounds( queue, ctx, &m_perClothMinBounds, false ), - m_clPerClothMaxBounds( queue, ctx, &m_perClothMaxBounds, false ), m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ), m_cqCommandQue( queue ), m_cxMainContext(ctx), @@ -644,7 +639,6 @@ btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_contex resetNormalsAndAreasKernel = 0; resetNormalsAndAreasKernel = 0; normalizeNormalsAndAreasKernel = 0; - computeBoundsKernel = 0; outputToVertexArrayKernel = 0; applyForcesKernel = 0; } @@ -668,7 +662,6 @@ void btOpenCLSoftBodySolver::releaseKernels() RELEASE_CL_KERNEL( solveCollisionsAndUpdateVelocitiesKernel ); RELEASE_CL_KERNEL( resetNormalsAndAreasKernel ); RELEASE_CL_KERNEL( normalizeNormalsAndAreasKernel ); - RELEASE_CL_KERNEL( computeBoundsKernel ); RELEASE_CL_KERNEL( outputToVertexArrayKernel ); RELEASE_CL_KERNEL( applyForcesKernel ); @@ -734,8 +727,6 @@ void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &sof m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time - m_perClothMinBounds.push_back( UIntVector3(UINT_MAX, UINT_MAX, UINT_MAX) ); - m_perClothMaxBounds.push_back( UIntVector3(0, 0, 0) ); m_perClothFriction.push_back( softBody->getFriction() ); m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) ); @@ -1039,95 +1030,11 @@ float btOpenCLSoftBodySolver::computeTriangleArea( void btOpenCLSoftBodySolver::updateBounds() { - -//#define USE_GPU_BOUNDS_COMPUTATION -#ifdef USE_GPU_BOUNDS_COMPUTATION - using Vectormath::Aos::Point3; - // Interpretation structure for float and int - - struct FPRep { - unsigned int mantissa : 23; - unsigned int exponent : 8; - unsigned int sign : 1; - }; - union FloatAsInt - { - float floatValue; - int intValue; - unsigned int uintValue; - FPRep fpRep; - }; - - - // Update bounds array to min and max int values to allow easy atomics - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - m_perClothMinBounds[softBodyIndex] = UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ); - m_perClothMaxBounds[softBodyIndex] = UIntVector3( 0, 0, 0 ); - } - - m_vertexData.moveToAccelerator(); - m_clPerClothMinBounds.moveToGPU(); - m_clPerClothMaxBounds.moveToGPU(); - - - computeBounds( ); - - - m_clPerClothMinBounds.moveFromGPU(); - m_clPerClothMaxBounds.moveFromGPU(); - - - - for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) - { - UIntVector3 minBoundUInt = m_perClothMinBounds[softBodyIndex]; - UIntVector3 maxBoundUInt = m_perClothMaxBounds[softBodyIndex]; - /*UIntVector3 minBoundUInt; - minBoundUInt.x = m_perClothMinBounds[softBodyIndex*4]; - minBoundUInt.y = m_perClothMinBounds[softBodyIndex*4+1]; - minBoundUInt.z = m_perClothMinBounds[softBodyIndex*4+2]; - UIntVector3 maxBoundUInt; - maxBoundUInt.x = m_perClothMaxBounds[softBodyIndex*4]; - maxBoundUInt.y = m_perClothMaxBounds[softBodyIndex*4+1]; - maxBoundUInt.z = m_perClothMaxBounds[softBodyIndex*4+2];*/ - - // Convert back to float - FloatAsInt fai; - - btVector3 minBound; - fai.uintValue = minBoundUInt.x; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - minBound.setX( fai.floatValue ); - fai.uintValue = minBoundUInt.y; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - minBound.setY( fai.floatValue ); - fai.uintValue = minBoundUInt.z; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - minBound.setZ( fai.floatValue ); - - btVector3 maxBound; - fai.uintValue = maxBoundUInt.x; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - maxBound.setX( fai.floatValue ); - fai.uintValue = maxBoundUInt.y; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - maxBound.setY( fai.floatValue ); - fai.uintValue = maxBoundUInt.z; - fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000); - maxBound.setZ( fai.floatValue ); - - - // And finally assign to the soft body - m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound ); - } -#else for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex ) { btVector3 minBound(-1e30,-1e30,-1e30), maxBound(1e30,1e30,1e30); m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound ); } -#endif//USE_GPU_BOUNDS_COMPUTATION } // btOpenCLSoftBodySolver::updateBounds @@ -1431,33 +1338,6 @@ void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( flo } // updateVelocitiesFromPositionsWithoutVelocities -void btOpenCLSoftBodySolver::computeBounds( ) -{ - m_vertexData.moveToAccelerator(); - - cl_int ciErrNum; - int numVerts = m_vertexData.getNumVertices(); - int numSoftBodies = m_softBodySet.size(); - ciErrNum = clSetKernelArg(computeBoundsKernel, 0, sizeof(int), &numVerts); - ciErrNum = clSetKernelArg(computeBoundsKernel, 1, sizeof(int), &numSoftBodies); - ciErrNum = clSetKernelArg(computeBoundsKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer); - ciErrNum = clSetKernelArg(computeBoundsKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer); - ciErrNum = clSetKernelArg(computeBoundsKernel, 4, sizeof(cl_mem),&m_clPerClothMinBounds.m_buffer); - ciErrNum = clSetKernelArg(computeBoundsKernel, 5, sizeof(cl_mem),&m_clPerClothMaxBounds.m_buffer); - ciErrNum = clSetKernelArg(computeBoundsKernel, 6, sizeof(cl_uint4)*256,0); - ciErrNum = clSetKernelArg(computeBoundsKernel, 7, sizeof(cl_uint4)*256,0); - - size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize); - if (numWorkItems) - { - ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,computeBoundsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0); - if( ciErrNum != CL_SUCCESS ) - { - btAssert( 0 && "enqueueNDRangeKernel(computeBoundsKernel)"); - } - } - clFinish(m_cqCommandQue); -} // btOpenCLSoftBodySolver::computeBounds void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt ) { @@ -1779,7 +1659,6 @@ bool btOpenCLSoftBodySolver::buildShaders() solvePositionsFromLinksKernel = clFunctions.compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel" ); updateVelocitiesFromPositionsWithVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ); updateVelocitiesFromPositionsWithoutVelocitiesKernel = clFunctions.compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ); - computeBoundsKernel = clFunctions.compileCLKernelFromString( ComputeBoundsCLString, "ComputeBoundsKernel" ); solveCollisionsAndUpdateVelocitiesKernel = clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ); integrateKernel = clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ); applyForcesKernel = clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ); diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h index 507143261..bc55594bb 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h @@ -330,24 +330,6 @@ protected: btAlignedObjectArray< CollisionShapeDescription > m_collisionObjectDetails; btOpenCLBuffer< CollisionShapeDescription > m_clCollisionObjectDetails; - /** - * Minimum bounds for each cloth. - * Updated by GPU and returned for use by broad phase. - * These are int vectors as a reminder that they store the int representation of a float, not a float. - * Bit 31 is inverted - is floats are stored with int-sortable values. - * This is really a uint4 array but thanks to a limitation of OpenCL atomics we are using uints. - */ - btAlignedObjectArray< UIntVector3 > m_perClothMinBounds; - btOpenCLBuffer< UIntVector3 > m_clPerClothMinBounds; - - /** - * Maximum bounds for each cloth. - * Updated by GPU and returned for use by broad phase. - * These are int vectors as a reminder that they store the int representation of a float, not a float. - * Bit 31 is inverted - is floats are stored with int-sortable values. - */ - btAlignedObjectArray< UIntVector3 > m_perClothMaxBounds; - btOpenCLBuffer< UIntVector3 > m_clPerClothMaxBounds; /** @@ -370,7 +352,6 @@ protected: cl_kernel solveCollisionsAndUpdateVelocitiesKernel; cl_kernel resetNormalsAndAreasKernel; cl_kernel normalizeNormalsAndAreasKernel; - cl_kernel computeBoundsKernel; cl_kernel updateSoftBodiesKernel; cl_kernel outputToVertexArrayKernel; @@ -427,7 +408,6 @@ protected: void updateVelocitiesFromPositionsWithVelocities( float isolverdt ); void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt ); - void computeBounds( ); virtual void solveCollisionsAndUpdateVelocities( float isolverdt ); // End kernel dispatches diff --git a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp index aeee3364a..19013037b 100644 --- a/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp +++ b/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp @@ -21,6 +21,7 @@ subject to the following restrictions: #include "BulletSoftBody/btSoftBodySolverVertexBuffer.h" #include "BulletSoftBody/btSoftBody.h" #include "BulletCollision/CollisionShapes/btCapsuleShape.h" +#include #define WAVEFRONT_SIZE 32 #define WAVEFRONT_BLOCK_MULTIPLIER 2 @@ -53,8 +54,6 @@ static char* UpdateNormalsCLString = #include "OpenCLC/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC/VSolveLinks.cl" -static char* ComputeBoundsCLString = -#include "OpenCLC/ComputeBounds.cl" static char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl" static char* OutputToVertexArrayCLString = @@ -80,8 +79,6 @@ static char* UpdateNormalsCLString = #include "OpenCLC10/UpdateNormals.cl" static char* VSolveLinksCLString = #include "OpenCLC10/VSolveLinks.cl" -static char* ComputeBoundsCLString = -#include "OpenCLC10/ComputeBounds.cl" static char* SolveCollisionsAndUpdateVelocitiesCLString = #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl" static char* OutputToVertexArrayCLString = @@ -241,17 +238,6 @@ void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody m_perClothLiftFactor.push_back( softBody->m_cfg.kLF ); m_perClothDragFactor.push_back( softBody->m_cfg.kDG ); m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density); - // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time - m_perClothMinBounds.push_back( UIntVector3(UINT_MAX, UINT_MAX, UINT_MAX) ); - m_perClothMaxBounds.push_back( UIntVector3(0, 0, 0) ); - /*m_perClothMinBounds.push_back( UINT_MAX ); - m_perClothMaxBounds.push_back( 0 ); - m_perClothMinBounds.push_back( UINT_MAX ); - m_perClothMaxBounds.push_back( 0 ); - m_perClothMinBounds.push_back( UINT_MAX ); - m_perClothMaxBounds.push_back( 0 ); - m_perClothMinBounds.push_back( UINT_MAX ); - m_perClothMaxBounds.push_back( 0 );*/ m_perClothFriction.push_back( softBody->getFriction() ); @@ -541,7 +527,6 @@ bool btOpenCLSoftBodySolverSIMDAware::buildShaders() integrateKernel = clFunctions.compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", "" ); applyForcesKernel = clFunctions.compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", "" ); solveCollisionsAndUpdateVelocitiesKernel = clFunctions.compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", "" ); - computeBoundsKernel = clFunctions.compileCLKernelFromString( ComputeBoundsCLString, "ComputeBoundsKernel" ); // TODO: Rename to UpdateSoftBodies resetNormalsAndAreasKernel = clFunctions.compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", "" );