Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_AMD()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_AMD"
|
||||
|
||||
initOpenCL_AMD()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_Intel()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_Intel"
|
||||
|
||||
initOpenCL_Intel()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,49 @@
|
||||
|
||||
hasCL = findOpenCL_NVIDIA()
|
||||
|
||||
if (hasCL) then
|
||||
|
||||
project "OpenCL_broadphase_benchmark_NVIDIA"
|
||||
|
||||
initOpenCL_NVIDIA()
|
||||
|
||||
language "C++"
|
||||
|
||||
kind "ConsoleApp"
|
||||
targetdir "../../../bin"
|
||||
|
||||
initOpenGL()
|
||||
initGlut()
|
||||
initGlew()
|
||||
|
||||
includedirs {
|
||||
"../../../rendering/BulletMath",
|
||||
"../../primitives",
|
||||
"../../../../../src"
|
||||
}
|
||||
|
||||
files {
|
||||
"../main.cpp",
|
||||
"../findPairsOpenCL.cpp",
|
||||
"../findPairsOpenCL.h",
|
||||
"../btGridBroadphaseCL.cpp",
|
||||
"../btGridBroadphaseCL.h",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
|
||||
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
|
||||
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
|
||||
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.cpp",
|
||||
"../../../../../src/LinearMath/btQuickprof.h",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
|
||||
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.cpp",
|
||||
"../../basic_initialize/btOpenCLUtils.h",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
|
||||
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
|
||||
"../../opengl_interop/btStopwatch.cpp",
|
||||
"../../opengl_interop/btStopwatch.h"
|
||||
}
|
||||
|
||||
end
|
||||
@@ -0,0 +1,335 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_row[3];
|
||||
} Matrix3x3;
|
||||
|
||||
typedef unsigned int u32;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_pos;
|
||||
float4 m_quat;
|
||||
float4 m_linVel;
|
||||
float4 m_angVel;
|
||||
|
||||
u32 m_shapeIdx;
|
||||
u32 m_shapeType;
|
||||
float m_invMass;
|
||||
float m_restituitionCoeff;
|
||||
float m_frictionCoeff;
|
||||
} Body;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Matrix3x3 m_invInertia;
|
||||
Matrix3x3 m_initInvInertia;
|
||||
} Shape;
|
||||
|
||||
|
||||
__inline
|
||||
Matrix3x3 qtGetRotationMatrix(float4 quat)
|
||||
{
|
||||
float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
|
||||
Matrix3x3 out;
|
||||
|
||||
out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);
|
||||
out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);
|
||||
out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);
|
||||
out.m_row[0].w = 0.f;
|
||||
|
||||
out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);
|
||||
out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);
|
||||
out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);
|
||||
out.m_row[1].w = 0.f;
|
||||
|
||||
out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);
|
||||
out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);
|
||||
out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);
|
||||
out.m_row[2].w = 0.f;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float fx;
|
||||
float fy;
|
||||
float fz;
|
||||
unsigned int uw;
|
||||
} btAABBCL;
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtTranspose(Matrix3x3 m)
|
||||
{
|
||||
Matrix3x3 out;
|
||||
out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
|
||||
out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
|
||||
out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
|
||||
return out;
|
||||
}
|
||||
|
||||
__inline
|
||||
float dot3F4(float4 a, float4 b)
|
||||
{
|
||||
float4 a1 = (float4)(a.xyz,0.f);
|
||||
float4 b1 = (float4)(b.xyz,0.f);
|
||||
return dot(a1, b1);
|
||||
}
|
||||
|
||||
|
||||
__inline
|
||||
Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
|
||||
{
|
||||
Matrix3x3 transB;
|
||||
transB = mtTranspose( b );
|
||||
Matrix3x3 ans;
|
||||
// why this doesn't run when 0ing in the for{}
|
||||
a.m_row[0].w = 0.f;
|
||||
a.m_row[1].w = 0.f;
|
||||
a.m_row[2].w = 0.f;
|
||||
for(int i=0; i<3; i++)
|
||||
{
|
||||
// a.m_row[i].w = 0.f;
|
||||
ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
|
||||
ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
|
||||
ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
|
||||
ans.m_row[i].w = 0.f;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
//apply gravity
|
||||
//update world inverse inertia tensor
|
||||
//copy velocity from arrays to bodies
|
||||
//copy transforms from buffer to bodies
|
||||
|
||||
__kernel void
|
||||
setupBodiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global Body* gBodies, __global Shape* bodyInertias
|
||||
)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
float timeStep = 0.0166666f;
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float inverseMass = gBodies[nodeID].m_invMass;
|
||||
if (inverseMass != 0.f)
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
|
||||
float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);
|
||||
linVel[nodeID] += gravityAcceleration * timeStep;
|
||||
|
||||
gBodies[nodeID].m_pos = position;
|
||||
gBodies[nodeID].m_quat = orientation;
|
||||
|
||||
gBodies[nodeID].m_linVel = (float4)(linVel[nodeID].xyz,0.f);
|
||||
gBodies[nodeID].m_angVel = (float4)(pAngVel[nodeID].xyz,0.f);
|
||||
|
||||
Matrix3x3 m = qtGetRotationMatrix( orientation);
|
||||
Matrix3x3 mT = mtTranspose( m );
|
||||
|
||||
Matrix3x3 tmp = mtMul(m, bodyInertias[nodeID].m_initInvInertia);
|
||||
Matrix3x3 tmp2 = mtMul(tmp, mT);
|
||||
bodyInertias[nodeID].m_invInertia = tmp2;
|
||||
|
||||
//shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );
|
||||
|
||||
|
||||
} else
|
||||
{
|
||||
gBodies[nodeID].m_linVel = (float4)(0.f,0.f,0.f,0.f);
|
||||
gBodies[nodeID].m_angVel = (float4)(0.f,0.f,0.f,0.f);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
copyVelocitiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global Body* gBodies, __global Shape* bodyInertias
|
||||
)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float inverseMass = gBodies[nodeID].m_invMass;
|
||||
if (inverseMass != 0.f)
|
||||
{
|
||||
linVel[nodeID] = (float4)(gBodies[nodeID].m_linVel.xyz,0.f);
|
||||
pAngVel[nodeID] = (float4)(gBodies[nodeID].m_angVel.xyz,0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
initializeGpuAabbsSimple( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global btAABBCL* pAABB)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 green = (float4)(.4f,1.f,.4f,1.f);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
|
||||
float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);
|
||||
//float4 extent=(float4)(1.f,1.f,1.f,0.f);
|
||||
|
||||
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
|
||||
|
||||
float4 extent = (float4) (
|
||||
dot(abs_b.m_row[0],halfExtents),
|
||||
dot(abs_b.m_row[1],halfExtents),
|
||||
dot(abs_b.m_row[2],halfExtents),
|
||||
0.f);
|
||||
|
||||
|
||||
pAABB[nodeID*2].fx = position.x-extent.x;
|
||||
pAABB[nodeID*2].fy = position.y-extent.y;
|
||||
pAABB[nodeID*2].fz = position.z-extent.z;
|
||||
pAABB[nodeID*2].uw = nodeID;
|
||||
|
||||
pAABB[nodeID*2+1].fx = position.x+extent.x;
|
||||
pAABB[nodeID*2+1].fy = position.y+extent.y;
|
||||
pAABB[nodeID*2+1].fz = position.z+extent.z;
|
||||
pAABB[nodeID*2+1].uw = nodeID;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
initializeGpuAabbsFull( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global Body* gBodies, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 green = (float4)(.4f,1.f,.4f,1.f);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
int shapeIndex = gBodies[nodeID].m_shapeIdx;
|
||||
if (shapeIndex>=0)
|
||||
{
|
||||
btAABBCL minAabb = plocalShapeAABB[shapeIndex*2];
|
||||
btAABBCL maxAabb = plocalShapeAABB[shapeIndex*2+1];
|
||||
|
||||
float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;
|
||||
|
||||
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
|
||||
float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);
|
||||
|
||||
|
||||
pAABB[nodeID*2].fx = position.x-extent.x;
|
||||
pAABB[nodeID*2].fy = position.y-extent.y;
|
||||
pAABB[nodeID*2].fz = position.z-extent.z;
|
||||
pAABB[nodeID*2].uw = nodeID;
|
||||
|
||||
pAABB[nodeID*2+1].fx = position.x+extent.x;
|
||||
pAABB[nodeID*2+1].fy = position.y+extent.y;
|
||||
pAABB[nodeID*2+1].fz = position.z+extent.z;
|
||||
pAABB[nodeID*2+1].uw = nodeID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
broadphaseColorKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global int2* pOverlappingPairs, const int numOverlap)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
if( nodeID < numOverlap )
|
||||
{
|
||||
int2 pair = pOverlappingPairs[nodeID];
|
||||
float4 red = (float4)(1.f,0.4f,0.4f,1.f);
|
||||
|
||||
g_vertexBuffer[pair.x + startOffset/4+numNodes+numNodes] = red;
|
||||
g_vertexBuffer[pair.y + startOffset/4+numNodes+numNodes] = red;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
broadphaseKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
// float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
//float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
|
||||
|
||||
float4 red = (float4)(1.f,0.f,0.f,0.f);
|
||||
float4 green = (float4)(0.f,1.f,0.f,0.f);
|
||||
float4 blue = (float4)(0.f,0.f,1.f,0.f);
|
||||
float overlap=0;
|
||||
int equal = 0;
|
||||
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
|
||||
|
||||
for (int i=0;i<numNodes;i++)
|
||||
{
|
||||
if (i!=nodeID)
|
||||
{
|
||||
float4 otherPosition = g_vertexBuffer[i + startOffset/4];
|
||||
if ((otherPosition.x == position.x)&&
|
||||
(otherPosition.y == position.y)&&
|
||||
(otherPosition.z == position.z))
|
||||
equal=1;
|
||||
|
||||
|
||||
float distsqr =
|
||||
((otherPosition.x - position.x)* (otherPosition.x - position.x))+
|
||||
((otherPosition.y - position.y)* (otherPosition.y - position.y))+
|
||||
((otherPosition.z - position.z)* (otherPosition.z - position.z));
|
||||
|
||||
if (distsqr<7.f)
|
||||
overlap+=0.25f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (equal)
|
||||
{
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=blue;
|
||||
} else
|
||||
{
|
||||
if (overlap>0.f)
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=red*overlap;
|
||||
else
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=green;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
);
|
||||
@@ -0,0 +1,231 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifdef RELEASE_ME
|
||||
#define COMPUTE_AABB_KERNEL_PATH "computeAabbKernelOCL.cl"
|
||||
#else
|
||||
#define COMPUTE_AABB_KERNEL_PATH "..\\..\\opencl\\broadphase_benchmark\\computeAabbKernelOCL"
|
||||
#endif
|
||||
|
||||
|
||||
#include "btGridBroadphaseCl.h"
|
||||
#include "LinearMath/btQuickprof.h"
|
||||
#include "Adl/Adl.h"
|
||||
#include "AdlPrimitives/Math/Math.h"
|
||||
|
||||
#include "Adl/AdlKernel.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
#define MSTRINGIFY(A) #A
|
||||
static const char* spComputeAabbSource=
|
||||
#include "computeAabbKernelOCL.cl"
|
||||
|
||||
struct btTmpAabb
|
||||
{
|
||||
float minfx;
|
||||
float minfy;
|
||||
float minfz;
|
||||
unsigned int index0;
|
||||
float maxfx;
|
||||
float maxfy;
|
||||
float maxfz;
|
||||
unsigned int index1;
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
|
||||
btGridBroadphaseCl::btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell,
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
cl_command_queue queue,
|
||||
adl::DeviceCL* deviceCL)
|
||||
:bt3dGridBroadphaseOCL(overlappingPairCache,cellSize,
|
||||
gridSizeX, gridSizeY, gridSizeZ,
|
||||
maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy,
|
||||
maxSmallProxySize,maxSmallProxiesPerCell,
|
||||
context,device,queue,deviceCL)
|
||||
{
|
||||
m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource);
|
||||
|
||||
m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource);
|
||||
|
||||
m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource);
|
||||
|
||||
m_aabbConstBuffer = new adl::Buffer<MyAabbConstData >(m_deviceCL,1,adl::BufferBase::BUFFER_CONST);
|
||||
|
||||
size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2;
|
||||
cl_int ciErrNum=0;
|
||||
m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
|
||||
|
||||
memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody);
|
||||
copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2));
|
||||
|
||||
|
||||
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
btGridBroadphaseCl::~btGridBroadphaseCl()
|
||||
{
|
||||
clReleaseMemObject(m_dAllOverlappingPairs);
|
||||
|
||||
delete m_aabbConstBuffer;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void btGridBroadphaseCl::prepareAABB(float* positions, int numObjects)
|
||||
{
|
||||
return;
|
||||
#if 0
|
||||
bt3dGridBroadphaseOCL::prepareAABB();
|
||||
#else
|
||||
BT_PROFILE("prepareAABB");
|
||||
bt3DGrid3F1U* pBB = m_hAABB;
|
||||
|
||||
int new_largest_index = numObjects;
|
||||
unsigned int num_small = numObjects;
|
||||
m_LastHandleIndex = new_largest_index;
|
||||
new_largest_index = -1;
|
||||
unsigned int num_large = 0;
|
||||
m_LastLargeHandleIndex = new_largest_index;
|
||||
// paranoid checks
|
||||
//btAssert(num_small == m_numHandles);
|
||||
//btAssert(num_large == m_numLargeHandles);
|
||||
|
||||
//copyArrayFromDevice( m_hAABB, m_dAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
|
||||
//clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
void btGridBroadphaseCl::calcHashAABB()
|
||||
{
|
||||
bt3dGridBroadphaseOCL::calcHashAABB();
|
||||
}
|
||||
|
||||
|
||||
void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects)
|
||||
{
|
||||
btDispatcher* dispatcher=0;
|
||||
|
||||
// update constants
|
||||
{
|
||||
BT_PROFILE("setParameters");
|
||||
setParameters(&m_params);
|
||||
}
|
||||
|
||||
// prepare AABB array
|
||||
{
|
||||
BT_PROFILE("prepareAABB");
|
||||
prepareAABB(positions, numObjects);
|
||||
}
|
||||
// calculate hash
|
||||
{
|
||||
BT_PROFILE("calcHashAABB");
|
||||
calcHashAABB();
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("sortHash");
|
||||
// sort bodies based on hash
|
||||
sortHash();
|
||||
}
|
||||
|
||||
// find start of each cell
|
||||
{
|
||||
BT_PROFILE("findCellStart");
|
||||
findCellStart();
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("findOverlappingPairs");
|
||||
// findOverlappingPairs (small/small)
|
||||
findOverlappingPairs();
|
||||
}
|
||||
|
||||
// add pairs to CPU cache
|
||||
{
|
||||
BT_PROFILE("computePairCacheChanges");
|
||||
#if 0
|
||||
computePairCacheChanges();
|
||||
#else
|
||||
int ciErrNum=0;
|
||||
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
|
||||
size_t localWorkSize=64;
|
||||
size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize);
|
||||
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 );
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
ciErrNum = clFlush(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
{
|
||||
BT_PROFILE("scanOverlappingPairBuff");
|
||||
scanOverlappingPairBuff(false);
|
||||
}
|
||||
{
|
||||
BT_PROFILE("squeezeOverlappingPairBuff");
|
||||
//#define FORCE_CPU
|
||||
#ifdef FORCE_CPU
|
||||
bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff();
|
||||
copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum
|
||||
#else
|
||||
//squeezeOverlappingPairBuff();
|
||||
int ciErrNum = 0;
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs);
|
||||
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB);
|
||||
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 );
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
|
||||
|
||||
// copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum
|
||||
// clFinish(m_cqCommandQue);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifndef GRID_BROADPHASE_CL_H
|
||||
#define GRID_BROADPHASE_CL_H
|
||||
|
||||
#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h"
|
||||
|
||||
#include "Adl/Adl.h"
|
||||
#include "Adl/AdlKernel.h"
|
||||
|
||||
|
||||
struct MyAabbConstData
|
||||
{
|
||||
int bla;
|
||||
int numElem;
|
||||
};
|
||||
|
||||
|
||||
|
||||
class btGridBroadphaseCl : public bt3dGridBroadphaseOCL
|
||||
{
|
||||
protected:
|
||||
|
||||
adl::Kernel* m_computeAabbKernel;
|
||||
adl::Kernel* m_countOverlappingPairs;
|
||||
adl::Kernel* m_squeezePairCaches;
|
||||
|
||||
|
||||
adl::Buffer<MyAabbConstData>* m_aabbConstBuffer;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
cl_mem m_dAllOverlappingPairs;
|
||||
|
||||
|
||||
btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
|
||||
const btVector3& cellSize,
|
||||
int gridSizeX, int gridSizeY, int gridSizeZ,
|
||||
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
|
||||
btScalar maxSmallProxySize,
|
||||
int maxSmallProxiesPerCell = 4,
|
||||
cl_context context = NULL,
|
||||
cl_device_id device = NULL,
|
||||
cl_command_queue queue = NULL,
|
||||
adl::DeviceCL* deviceCL=0
|
||||
);
|
||||
|
||||
virtual void prepareAABB(float* positions, int numObjects);
|
||||
virtual void calcHashAABB();
|
||||
|
||||
void calculateOverlappingPairs(float* positions, int numObjects);
|
||||
|
||||
virtual ~btGridBroadphaseCl();
|
||||
|
||||
};
|
||||
|
||||
#endif //GRID_BROADPHASE_CL_H
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int bla;
|
||||
int numElem;
|
||||
} MyAabbConstDataCL ;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float minfx;
|
||||
float minfy;
|
||||
float minfz;
|
||||
unsigned int index0;
|
||||
float maxfx;
|
||||
float maxfy;
|
||||
float maxfz;
|
||||
unsigned int index1;
|
||||
} btAabbCL;
|
||||
|
||||
|
||||
__kernel void computeAabb( __global btAabbCL* aabbs,__global float4* positions, MyAabbConstDataCL cb)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
if( nodeID < cb.numElem )
|
||||
{
|
||||
aabbs[nodeID].minfx = positions[nodeID].x -1.f;
|
||||
aabbs[nodeID].minfy = positions[nodeID].y -1.f;
|
||||
aabbs[nodeID].minfz = positions[nodeID].z -1.f;
|
||||
aabbs[nodeID].index0 = nodeID;
|
||||
aabbs[nodeID].maxfx = positions[nodeID].x +1.f;
|
||||
aabbs[nodeID].maxfy = positions[nodeID].y +1.f;
|
||||
aabbs[nodeID].maxfz = positions[nodeID].z +1.f;
|
||||
aabbs[nodeID].index1 = nodeID;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void countOverlappingpairs( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global float4* pAABB )
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int *pInp = pPairBuff + start;
|
||||
int num_changes = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(((*pInp) & 0x60000000))//either new or existing pairs (ignore old non-overlapping pairs)
|
||||
{
|
||||
num_changes++;
|
||||
}
|
||||
}
|
||||
pPairScan[index+1] = num_changes;
|
||||
}
|
||||
|
||||
|
||||
__kernel void squeezePairCaches( int numObjects,
|
||||
__global int* pPairBuff,
|
||||
__global int2* pPairBuffStartCurr,
|
||||
__global int* pPairScan,
|
||||
__global int2* pPairOut,
|
||||
__global float4* pAABB )
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index * 2];
|
||||
int handleIndex = as_int(bbMin.w);
|
||||
int2 start_curr = pPairBuffStartCurr[handleIndex];
|
||||
int start = start_curr.x;
|
||||
int curr = start_curr.y;
|
||||
__global int* pInp = pPairBuff + start;
|
||||
__global int2* pOut = pPairOut + pPairScan[index+1];
|
||||
__global int* pOut2 = pInp;
|
||||
int num = 0;
|
||||
for(int k = 0; k < curr; k++, pInp++)
|
||||
{
|
||||
if(((*pInp) & 0x60000000))
|
||||
{
|
||||
int2 newpair;
|
||||
newpair.x = handleIndex;
|
||||
newpair.y = (*pInp) & (~0x60000000);
|
||||
*pOut = newpair;
|
||||
pOut++;
|
||||
}
|
||||
if((*pInp) & 0x60000000)
|
||||
{
|
||||
*pOut2 = (*pInp) & (~0x60000000);
|
||||
pOut2++;
|
||||
num++;
|
||||
}
|
||||
}
|
||||
int2 newStartCurr;
|
||||
newStartCurr.x = start;
|
||||
newStartCurr.y = num;
|
||||
pPairBuffStartCurr[handleIndex] = newStartCurr;
|
||||
}
|
||||
);
|
||||
@@ -0,0 +1,204 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#include "findPairsOpenCL.h"
|
||||
#include "../basic_initialize/btOpenCLUtils.h"
|
||||
|
||||
#define MSTRINGIFY(A) #A
|
||||
static char* broadphaseKernelString =
|
||||
#include "broadphaseKernel.cl"
|
||||
|
||||
#define GRID_BROADPHASE_PATH "..\\..\\opencl\\broadphase_benchmark\\broadphaseKernel.cl"
|
||||
|
||||
|
||||
|
||||
|
||||
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles, int maxPairsPerBody)
|
||||
{
|
||||
|
||||
//m_proxies.push_back( proxy );
|
||||
|
||||
fpio.m_mainContext = cxMainContext;
|
||||
fpio.m_cqCommandQue = commandQueue;
|
||||
fpio.m_device = device;
|
||||
cl_int pErrNum;
|
||||
cl_program prog = btOpenCLUtils::compileCLProgramFromString(cxMainContext, device, broadphaseKernelString, &pErrNum ,"",GRID_BROADPHASE_PATH);
|
||||
|
||||
fpio.m_broadphaseBruteForceKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseKernel" ,&pErrNum,prog);
|
||||
fpio.m_initializeGpuAabbsKernelSimple = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsSimple" ,&pErrNum,prog);
|
||||
fpio.m_initializeGpuAabbsKernelFull = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsFull" ,&pErrNum,prog);
|
||||
|
||||
fpio.m_broadphaseColorKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseColorKernel" ,&pErrNum,prog);
|
||||
|
||||
fpio.m_setupBodiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "setupBodiesKernel" ,&pErrNum,prog);
|
||||
fpio.m_copyVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "copyVelocitiesKernel" ,&pErrNum,prog);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
void findPairsOpenCLBruteForce(btFindPairsIO& fpio)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
size_t numWorkItems = numObjects;///workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize);
|
||||
size_t workGroupSize = 64;
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseBruteForceKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 3, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 4, sizeof(cl_mem), (void*)&fpio.m_dlocalShapeAABB);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 5, sizeof(cl_mem), (void*)&fpio.m_dAABB);
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelFull, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
void setupGpuAabbsSimple(btFindPairsIO& fpio)
|
||||
{
|
||||
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 1, sizeof(int), &numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 3, sizeof(cl_mem), (void*)&fpio.m_dAABB);
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelSimple, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
|
||||
|
||||
if (numObjects)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_setupBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
|
||||
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
|
||||
|
||||
if (numObjects)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_copyVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void colorPairsOpenCL(btFindPairsIO& fpio)
|
||||
{
|
||||
int ciErrNum = 0;
|
||||
|
||||
int numObjects = fpio.m_numObjects;
|
||||
int offset = fpio.m_positionOffset;
|
||||
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 0, sizeof(int), &offset);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 1, sizeof(int), &fpio.m_numObjects);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 3, sizeof(cl_mem), (void*)&fpio.m_dAllOverlappingPairs);
|
||||
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 4, sizeof(int), &fpio.m_numOverlap);
|
||||
|
||||
|
||||
if (fpio.m_numOverlap)
|
||||
{
|
||||
size_t workGroupSize = 64;
|
||||
size_t numWorkItems = workGroupSize*((fpio.m_numOverlap+ (workGroupSize)) / workGroupSize);
|
||||
|
||||
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseColorKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
|
||||
oclCHECKERROR(ciErrNum, CL_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void releaseFindPairs(btFindPairsIO& fpio)
|
||||
{
|
||||
clReleaseKernel(fpio.m_initializeGpuAabbsKernelSimple);
|
||||
clReleaseKernel(fpio.m_initializeGpuAabbsKernelFull);
|
||||
clReleaseKernel(fpio.m_broadphaseColorKernel);
|
||||
clReleaseKernel(fpio.m_broadphaseBruteForceKernel);
|
||||
clReleaseKernel(fpio.m_setupBodiesKernel);
|
||||
clReleaseKernel(fpio.m_copyVelocitiesKernel);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Roman Ponomarev, Erwin Coumans
|
||||
|
||||
#ifndef FIND_PAIRS_H
|
||||
#define FIND_PAIRS_H
|
||||
|
||||
#include "../basic_initialize/btOpenCLInclude.h"
|
||||
|
||||
struct btKernelInfo
|
||||
{
|
||||
int m_Id;
|
||||
cl_kernel m_kernel;
|
||||
char* m_name;
|
||||
int m_workgroupSize;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct btFindPairsIO
|
||||
{
|
||||
int m_numObjects;
|
||||
|
||||
cl_mem m_clObjectsBuffer; //for memory layout details see main.cpp (todo, make it flexible)
|
||||
int m_positionOffset;//offset in m_clObjectsBuffer where position array starts
|
||||
|
||||
cl_command_queue m_cqCommandQue;
|
||||
cl_kernel m_initializeGpuAabbsKernelSimple;
|
||||
cl_kernel m_initializeGpuAabbsKernelFull;
|
||||
cl_kernel m_broadphaseColorKernel;
|
||||
cl_kernel m_broadphaseBruteForceKernel;
|
||||
|
||||
cl_kernel m_setupBodiesKernel;
|
||||
cl_kernel m_copyVelocitiesKernel;
|
||||
|
||||
cl_context m_mainContext;
|
||||
cl_device_id m_device;
|
||||
|
||||
cl_kernel m_calcHashAabbKernel;
|
||||
cl_kernel m_clearCellStartKernel;
|
||||
cl_kernel m_findCellStartKernel;
|
||||
cl_kernel m_findOverlappingPairsKernel;
|
||||
cl_kernel m_computePairChangeKernel;
|
||||
cl_kernel m_squeezePairBuffKernel;
|
||||
|
||||
|
||||
cl_mem m_dAllOverlappingPairs;
|
||||
int m_numOverlap;
|
||||
|
||||
cl_mem m_dBpParams;
|
||||
cl_mem m_dBodiesHash;
|
||||
cl_mem m_dCellStart;
|
||||
cl_mem m_dPairBuff;
|
||||
cl_mem m_dPairBuffStartCurr;
|
||||
cl_mem m_dlocalShapeAABB;
|
||||
cl_mem m_dAABB;
|
||||
cl_mem m_dPairScan;
|
||||
cl_mem m_dPairOut;
|
||||
};
|
||||
|
||||
|
||||
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles,int maxPairsPerBody = 16);
|
||||
|
||||
void findPairsOpenCLBruteForce(btFindPairsIO& fpio);
|
||||
|
||||
void setupGpuAabbsSimple(btFindPairsIO& fpio);
|
||||
|
||||
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies);
|
||||
|
||||
|
||||
void colorPairsOpenCL(btFindPairsIO& fpio);
|
||||
|
||||
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
|
||||
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
|
||||
|
||||
void releaseFindPairs(btFindPairsIO& fpio);
|
||||
|
||||
#endif //FIND_PAIRS_H
|
||||
@@ -0,0 +1,116 @@
|
||||
MSTRINGIFY(
|
||||
|
||||
float4 quatMult(float4 q1, float4 q2)
|
||||
{
|
||||
float4 q;
|
||||
q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
|
||||
q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
|
||||
q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
|
||||
q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
|
||||
return q;
|
||||
}
|
||||
|
||||
float4 quatNorm(float4 q)
|
||||
{
|
||||
float len = native_sqrt(dot(q, q));
|
||||
if(len > 0.f)
|
||||
{
|
||||
q *= 1.f / len;
|
||||
}
|
||||
else
|
||||
{
|
||||
q.x = q.y = q.z = 0.f;
|
||||
q.w = 1.f;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void
|
||||
integrateTransformsKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global float* pBodyTimes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
|
||||
|
||||
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
|
||||
float mAmplitude = 66.f;
|
||||
float timeStep = 0.0166666f;
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
|
||||
//g_vertexBuffer[nodeID + startOffset/4+numNodes] += pAngVel[nodeID];
|
||||
if (1)
|
||||
{
|
||||
float4 axis;
|
||||
//add some hardcoded angular damping
|
||||
pAngVel[nodeID].x *= 0.99f;
|
||||
pAngVel[nodeID].y *= 0.99f;
|
||||
pAngVel[nodeID].z *= 0.99f;
|
||||
|
||||
float4 angvel = pAngVel[nodeID];
|
||||
float fAngle = native_sqrt(dot(angvel, angvel));
|
||||
//limit the angular motion
|
||||
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
|
||||
{
|
||||
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
|
||||
}
|
||||
if(fAngle < 0.001f)
|
||||
{
|
||||
// use Taylor's expansions of sync function
|
||||
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
|
||||
}
|
||||
else
|
||||
{
|
||||
// sync(fAngle) = sin(c*fAngle)/t
|
||||
axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);
|
||||
}
|
||||
float4 dorn = axis;
|
||||
dorn.w = native_cos(fAngle * timeStep * 0.5f);
|
||||
float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes];
|
||||
float4 predictedOrn = quatMult(dorn, orn0);
|
||||
predictedOrn = quatNorm(predictedOrn);
|
||||
g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn;
|
||||
}
|
||||
|
||||
//linear velocity
|
||||
g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID] * timeStep;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
sineWaveKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
|
||||
__global float4 *linVel,
|
||||
__global float4 *pAngVel,
|
||||
__global float* pBodyTimes)
|
||||
{
|
||||
int nodeID = get_global_id(0);
|
||||
float timeStepPos = 0.000166666;
|
||||
|
||||
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
|
||||
float mAmplitude = 166.f;
|
||||
|
||||
|
||||
if( nodeID < numNodes )
|
||||
{
|
||||
pBodyTimes[nodeID] += timeStepPos;
|
||||
float4 position = g_vertexBuffer[nodeID + startOffset/4];
|
||||
position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f;
|
||||
position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude);
|
||||
position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude;
|
||||
g_vertexBuffer[nodeID + startOffset/4] = position;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
);
|
||||
1565
Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp
Normal file
1565
Extras/RigidBodyGpuPipeline/opencl/broadphase_benchmark/main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
|
||||
include "AMD"
|
||||
include "Intel"
|
||||
include "NVIDIA"
|
||||
|
||||
Reference in New Issue
Block a user