Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

This commit is contained in:
erwin.coumans
2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "OpenCL_broadphase_benchmark_AMD"
initOpenCL_AMD()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "OpenCL_broadphase_benchmark_Intel"
initOpenCL_Intel()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "OpenCL_broadphase_benchmark_NVIDIA"
initOpenCL_NVIDIA()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,335 @@
MSTRINGIFY(
typedef struct
{
float4 m_row[3];
} Matrix3x3;
typedef unsigned int u32;
typedef struct
{
float4 m_pos;
float4 m_quat;
float4 m_linVel;
float4 m_angVel;
u32 m_shapeIdx;
u32 m_shapeType;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
} Body;
typedef struct
{
Matrix3x3 m_invInertia;
Matrix3x3 m_initInvInertia;
} Shape;
__inline
Matrix3x3 qtGetRotationMatrix(float4 quat)
{
float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
Matrix3x3 out;
out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);
out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);
out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);
out.m_row[0].w = 0.f;
out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);
out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);
out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);
out.m_row[1].w = 0.f;
out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);
out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);
out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);
out.m_row[2].w = 0.f;
return out;
}
typedef struct
{
float fx;
float fy;
float fz;
unsigned int uw;
} btAABBCL;
__inline
Matrix3x3 mtTranspose(Matrix3x3 m)
{
Matrix3x3 out;
out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
return out;
}
__inline
float dot3F4(float4 a, float4 b)
{
float4 a1 = (float4)(a.xyz,0.f);
float4 b1 = (float4)(b.xyz,0.f);
return dot(a1, b1);
}
__inline
Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
{
Matrix3x3 transB;
transB = mtTranspose( b );
Matrix3x3 ans;
// why this doesn't run when 0ing in the for{}
a.m_row[0].w = 0.f;
a.m_row[1].w = 0.f;
a.m_row[2].w = 0.f;
for(int i=0; i<3; i++)
{
// a.m_row[i].w = 0.f;
ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
ans.m_row[i].w = 0.f;
}
return ans;
}
//apply gravity
//update world inverse inertia tensor
//copy velocity from arrays to bodies
//copy transforms from buffer to bodies
__kernel void
setupBodiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
__global float4 *linVel,
__global float4 *pAngVel,
__global Body* gBodies, __global Shape* bodyInertias
)
{
int nodeID = get_global_id(0);
float timeStep = 0.0166666f;
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
if( nodeID < numNodes )
{
float inverseMass = gBodies[nodeID].m_invMass;
if (inverseMass != 0.f)
{
float4 position = g_vertexBuffer[nodeID + startOffset/4];
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);
linVel[nodeID] += gravityAcceleration * timeStep;
gBodies[nodeID].m_pos = position;
gBodies[nodeID].m_quat = orientation;
gBodies[nodeID].m_linVel = (float4)(linVel[nodeID].xyz,0.f);
gBodies[nodeID].m_angVel = (float4)(pAngVel[nodeID].xyz,0.f);
Matrix3x3 m = qtGetRotationMatrix( orientation);
Matrix3x3 mT = mtTranspose( m );
Matrix3x3 tmp = mtMul(m, bodyInertias[nodeID].m_initInvInertia);
Matrix3x3 tmp2 = mtMul(tmp, mT);
bodyInertias[nodeID].m_invInertia = tmp2;
//shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );
} else
{
gBodies[nodeID].m_linVel = (float4)(0.f,0.f,0.f,0.f);
gBodies[nodeID].m_angVel = (float4)(0.f,0.f,0.f,0.f);
}
}
}
__kernel void
copyVelocitiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
__global float4 *linVel,
__global float4 *pAngVel,
__global Body* gBodies, __global Shape* bodyInertias
)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float inverseMass = gBodies[nodeID].m_invMass;
if (inverseMass != 0.f)
{
linVel[nodeID] = (float4)(gBodies[nodeID].m_linVel.xyz,0.f);
pAngVel[nodeID] = (float4)(gBodies[nodeID].m_angVel.xyz,0.f);
}
}
}
__kernel void
initializeGpuAabbsSimple( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global btAABBCL* pAABB)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexBuffer[nodeID + startOffset/4];
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
float4 green = (float4)(.4f,1.f,.4f,1.f);
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);
//float4 extent=(float4)(1.f,1.f,1.f,0.f);
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
float4 extent = (float4) (
dot(abs_b.m_row[0],halfExtents),
dot(abs_b.m_row[1],halfExtents),
dot(abs_b.m_row[2],halfExtents),
0.f);
pAABB[nodeID*2].fx = position.x-extent.x;
pAABB[nodeID*2].fy = position.y-extent.y;
pAABB[nodeID*2].fz = position.z-extent.z;
pAABB[nodeID*2].uw = nodeID;
pAABB[nodeID*2+1].fx = position.x+extent.x;
pAABB[nodeID*2+1].fy = position.y+extent.y;
pAABB[nodeID*2+1].fz = position.z+extent.z;
pAABB[nodeID*2+1].uw = nodeID;
}
}
__kernel void
initializeGpuAabbsFull( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global Body* gBodies, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)
{
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
float4 position = g_vertexBuffer[nodeID + startOffset/4];
float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
float4 green = (float4)(.4f,1.f,.4f,1.f);
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
int shapeIndex = gBodies[nodeID].m_shapeIdx;
if (shapeIndex>=0)
{
btAABBCL minAabb = plocalShapeAABB[shapeIndex*2];
btAABBCL maxAabb = plocalShapeAABB[shapeIndex*2+1];
float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;
Matrix3x3 abs_b = qtGetRotationMatrix(orientation);
float4 extent = (float4) ( dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);
pAABB[nodeID*2].fx = position.x-extent.x;
pAABB[nodeID*2].fy = position.y-extent.y;
pAABB[nodeID*2].fz = position.z-extent.z;
pAABB[nodeID*2].uw = nodeID;
pAABB[nodeID*2+1].fx = position.x+extent.x;
pAABB[nodeID*2+1].fy = position.y+extent.y;
pAABB[nodeID*2+1].fz = position.z+extent.z;
pAABB[nodeID*2+1].uw = nodeID;
}
}
}
__kernel void
broadphaseColorKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global int2* pOverlappingPairs, const int numOverlap)
{
int nodeID = get_global_id(0);
if( nodeID < numOverlap )
{
int2 pair = pOverlappingPairs[nodeID];
float4 red = (float4)(1.f,0.4f,0.4f,1.f);
g_vertexBuffer[pair.x + startOffset/4+numNodes+numNodes] = red;
g_vertexBuffer[pair.y + startOffset/4+numNodes+numNodes] = red;
}
}
__kernel void
broadphaseKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer)
{
int nodeID = get_global_id(0);
// float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);
if( nodeID < numNodes )
{
float4 position = g_vertexBuffer[nodeID + startOffset/4];
//float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];
float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];
float4 red = (float4)(1.f,0.f,0.f,0.f);
float4 green = (float4)(0.f,1.f,0.f,0.f);
float4 blue = (float4)(0.f,0.f,1.f,0.f);
float overlap=0;
int equal = 0;
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;
for (int i=0;i<numNodes;i++)
{
if (i!=nodeID)
{
float4 otherPosition = g_vertexBuffer[i + startOffset/4];
if ((otherPosition.x == position.x)&&
(otherPosition.y == position.y)&&
(otherPosition.z == position.z))
equal=1;
float distsqr =
((otherPosition.x - position.x)* (otherPosition.x - position.x))+
((otherPosition.y - position.y)* (otherPosition.y - position.y))+
((otherPosition.z - position.z)* (otherPosition.z - position.z));
if (distsqr<7.f)
overlap+=0.25f;
}
}
if (equal)
{
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=blue;
} else
{
if (overlap>0.f)
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=red*overlap;
else
g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=green;
}
}
}
);

View File

@@ -0,0 +1,231 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Roman Ponomarev, Erwin Coumans
#ifdef RELEASE_ME
#define COMPUTE_AABB_KERNEL_PATH "computeAabbKernelOCL.cl"
#else
#define COMPUTE_AABB_KERNEL_PATH "..\\..\\opencl\\broadphase_benchmark\\computeAabbKernelOCL"
#endif
#include "btGridBroadphaseCl.h"
#include "LinearMath/btQuickprof.h"
#include "Adl/Adl.h"
#include "AdlPrimitives/Math/Math.h"
#include "Adl/AdlKernel.h"
#include "../basic_initialize/btOpenCLUtils.h"
#define MSTRINGIFY(A) #A
static const char* spComputeAabbSource=
#include "computeAabbKernelOCL.cl"
struct btTmpAabb
{
float minfx;
float minfy;
float minfz;
unsigned int index0;
float maxfx;
float maxfy;
float maxfz;
unsigned int index1;
} ;
btGridBroadphaseCl::btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell,
cl_context context,
cl_device_id device,
cl_command_queue queue,
adl::DeviceCL* deviceCL)
:bt3dGridBroadphaseOCL(overlappingPairCache,cellSize,
gridSizeX, gridSizeY, gridSizeZ,
maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy,
maxSmallProxySize,maxSmallProxiesPerCell,
context,device,queue,deviceCL)
{
m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource);
m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource);
m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource);
m_aabbConstBuffer = new adl::Buffer<MyAabbConstData >(m_deviceCL,1,adl::BufferBase::BUFFER_CONST);
size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2;
cl_int ciErrNum=0;
m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody);
copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2));
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
btGridBroadphaseCl::~btGridBroadphaseCl()
{
clReleaseMemObject(m_dAllOverlappingPairs);
delete m_aabbConstBuffer;
}
void btGridBroadphaseCl::prepareAABB(float* positions, int numObjects)
{
return;
#if 0
bt3dGridBroadphaseOCL::prepareAABB();
#else
BT_PROFILE("prepareAABB");
bt3DGrid3F1U* pBB = m_hAABB;
int new_largest_index = numObjects;
unsigned int num_small = numObjects;
m_LastHandleIndex = new_largest_index;
new_largest_index = -1;
unsigned int num_large = 0;
m_LastLargeHandleIndex = new_largest_index;
// paranoid checks
//btAssert(num_small == m_numHandles);
//btAssert(num_large == m_numLargeHandles);
//copyArrayFromDevice( m_hAABB, m_dAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
//clFinish(m_cqCommandQue);
#endif
}
void btGridBroadphaseCl::calcHashAABB()
{
bt3dGridBroadphaseOCL::calcHashAABB();
}
void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects)
{
btDispatcher* dispatcher=0;
// update constants
{
BT_PROFILE("setParameters");
setParameters(&m_params);
}
// prepare AABB array
{
BT_PROFILE("prepareAABB");
prepareAABB(positions, numObjects);
}
// calculate hash
{
BT_PROFILE("calcHashAABB");
calcHashAABB();
}
{
BT_PROFILE("sortHash");
// sort bodies based on hash
sortHash();
}
// find start of each cell
{
BT_PROFILE("findCellStart");
findCellStart();
}
{
BT_PROFILE("findOverlappingPairs");
// findOverlappingPairs (small/small)
findOverlappingPairs();
}
// add pairs to CPU cache
{
BT_PROFILE("computePairCacheChanges");
#if 0
computePairCacheChanges();
#else
int ciErrNum=0;
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects);
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB);
size_t localWorkSize=64;
size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 );
oclCHECKERROR(ciErrNum, CL_SUCCESS);
ciErrNum = clFlush(m_cqCommandQue);
#endif
}
{
BT_PROFILE("scanOverlappingPairBuff");
scanOverlappingPairBuff(false);
}
{
BT_PROFILE("squeezeOverlappingPairBuff");
//#define FORCE_CPU
#ifdef FORCE_CPU
bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff();
copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum
#else
//squeezeOverlappingPairBuff();
int ciErrNum = 0;
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects);
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs);
ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB);
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 );
oclCHECKERROR(ciErrNum, CL_SUCCESS);
// copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum
// clFinish(m_cqCommandQue);
#endif
}
return;
}

View File

@@ -0,0 +1,73 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Roman Ponomarev, Erwin Coumans
#ifndef GRID_BROADPHASE_CL_H
#define GRID_BROADPHASE_CL_H
#include "../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h"
#include "Adl/Adl.h"
#include "Adl/AdlKernel.h"
struct MyAabbConstData
{
int bla;
int numElem;
};
class btGridBroadphaseCl : public bt3dGridBroadphaseOCL
{
protected:
adl::Kernel* m_computeAabbKernel;
adl::Kernel* m_countOverlappingPairs;
adl::Kernel* m_squeezePairCaches;
adl::Buffer<MyAabbConstData>* m_aabbConstBuffer;
public:
cl_mem m_dAllOverlappingPairs;
btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell = 4,
cl_context context = NULL,
cl_device_id device = NULL,
cl_command_queue queue = NULL,
adl::DeviceCL* deviceCL=0
);
virtual void prepareAABB(float* positions, int numObjects);
virtual void calcHashAABB();
void calculateOverlappingPairs(float* positions, int numObjects);
virtual ~btGridBroadphaseCl();
};
#endif //GRID_BROADPHASE_CL_H

View File

@@ -0,0 +1,112 @@
MSTRINGIFY(
typedef struct
{
int bla;
int numElem;
} MyAabbConstDataCL ;
typedef struct
{
float minfx;
float minfy;
float minfz;
unsigned int index0;
float maxfx;
float maxfy;
float maxfz;
unsigned int index1;
} btAabbCL;
__kernel void computeAabb( __global btAabbCL* aabbs,__global float4* positions, MyAabbConstDataCL cb)
{
int nodeID = get_global_id(0);
if( nodeID < cb.numElem )
{
aabbs[nodeID].minfx = positions[nodeID].x -1.f;
aabbs[nodeID].minfy = positions[nodeID].y -1.f;
aabbs[nodeID].minfz = positions[nodeID].z -1.f;
aabbs[nodeID].index0 = nodeID;
aabbs[nodeID].maxfx = positions[nodeID].x +1.f;
aabbs[nodeID].maxfy = positions[nodeID].y +1.f;
aabbs[nodeID].maxfz = positions[nodeID].z +1.f;
aabbs[nodeID].index1 = nodeID;
}
}
__kernel void countOverlappingpairs( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global float4* pAABB )
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int *pInp = pPairBuff + start;
int num_changes = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(((*pInp) & 0x60000000))//either new or existing pairs (ignore old non-overlapping pairs)
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
__kernel void squeezePairCaches( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global int2* pPairOut,
__global float4* pAABB )
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int* pInp = pPairBuff + start;
__global int2* pOut = pPairOut + pPairScan[index+1];
__global int* pOut2 = pInp;
int num = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(((*pInp) & 0x60000000))
{
int2 newpair;
newpair.x = handleIndex;
newpair.y = (*pInp) & (~0x60000000);
*pOut = newpair;
pOut++;
}
if((*pInp) & 0x60000000)
{
*pOut2 = (*pInp) & (~0x60000000);
pOut2++;
num++;
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = num;
pPairBuffStartCurr[handleIndex] = newStartCurr;
}
);

View File

@@ -0,0 +1,204 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Roman Ponomarev, Erwin Coumans
#include "findPairsOpenCL.h"
#include "../basic_initialize/btOpenCLUtils.h"
#define MSTRINGIFY(A) #A
static char* broadphaseKernelString =
#include "broadphaseKernel.cl"
#define GRID_BROADPHASE_PATH "..\\..\\opencl\\broadphase_benchmark\\broadphaseKernel.cl"
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles, int maxPairsPerBody)
{
//m_proxies.push_back( proxy );
fpio.m_mainContext = cxMainContext;
fpio.m_cqCommandQue = commandQueue;
fpio.m_device = device;
cl_int pErrNum;
cl_program prog = btOpenCLUtils::compileCLProgramFromString(cxMainContext, device, broadphaseKernelString, &pErrNum ,"",GRID_BROADPHASE_PATH);
fpio.m_broadphaseBruteForceKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseKernel" ,&pErrNum,prog);
fpio.m_initializeGpuAabbsKernelSimple = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsSimple" ,&pErrNum,prog);
fpio.m_initializeGpuAabbsKernelFull = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "initializeGpuAabbsFull" ,&pErrNum,prog);
fpio.m_broadphaseColorKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "broadphaseColorKernel" ,&pErrNum,prog);
fpio.m_setupBodiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "setupBodiesKernel" ,&pErrNum,prog);
fpio.m_copyVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString(cxMainContext,device, broadphaseKernelString, "copyVelocitiesKernel" ,&pErrNum,prog);
}
void findPairsOpenCLBruteForce(btFindPairsIO& fpio)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 1, sizeof(int), &numObjects);
ciErrNum = clSetKernelArg(fpio.m_broadphaseBruteForceKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
size_t numWorkItems = numObjects;///workGroupSize*((NUM_OBJECTS + (workGroupSize)) / workGroupSize);
size_t workGroupSize = 64;
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseBruteForceKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 1, sizeof(int), &numObjects);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 3, sizeof(cl_mem), (void*)&bodies);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 4, sizeof(cl_mem), (void*)&fpio.m_dlocalShapeAABB);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelFull, 5, sizeof(cl_mem), (void*)&fpio.m_dAABB);
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelFull, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
void setupGpuAabbsSimple(btFindPairsIO& fpio)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 1, sizeof(int), &numObjects);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
ciErrNum = clSetKernelArg(fpio.m_initializeGpuAabbsKernelSimple, 3, sizeof(cl_mem), (void*)&fpio.m_dAABB);
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_initializeGpuAabbsKernelSimple, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 1, sizeof(int), &fpio.m_numObjects);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
ciErrNum = clSetKernelArg(fpio.m_setupBodiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
if (numObjects)
{
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_setupBodiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
}
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 1, sizeof(int), &fpio.m_numObjects);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 3, sizeof(cl_mem), (void*)&linVelMem);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 4, sizeof(cl_mem), (void*)&angVelMem);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 5, sizeof(cl_mem), (void*)&bodies);
ciErrNum = clSetKernelArg(fpio.m_copyVelocitiesKernel, 6, sizeof(cl_mem), (void*)&bodyInertias);
if (numObjects)
{
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_copyVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
}
void colorPairsOpenCL(btFindPairsIO& fpio)
{
int ciErrNum = 0;
int numObjects = fpio.m_numObjects;
int offset = fpio.m_positionOffset;
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 0, sizeof(int), &offset);
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 1, sizeof(int), &fpio.m_numObjects);
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 2, sizeof(cl_mem), (void*)&fpio.m_clObjectsBuffer);
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 3, sizeof(cl_mem), (void*)&fpio.m_dAllOverlappingPairs);
ciErrNum = clSetKernelArg(fpio.m_broadphaseColorKernel, 4, sizeof(int), &fpio.m_numOverlap);
if (fpio.m_numOverlap)
{
size_t workGroupSize = 64;
size_t numWorkItems = workGroupSize*((fpio.m_numOverlap+ (workGroupSize)) / workGroupSize);
ciErrNum = clEnqueueNDRangeKernel(fpio.m_cqCommandQue, fpio.m_broadphaseColorKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
}
void releaseFindPairs(btFindPairsIO& fpio)
{
clReleaseKernel(fpio.m_initializeGpuAabbsKernelSimple);
clReleaseKernel(fpio.m_initializeGpuAabbsKernelFull);
clReleaseKernel(fpio.m_broadphaseColorKernel);
clReleaseKernel(fpio.m_broadphaseBruteForceKernel);
clReleaseKernel(fpio.m_setupBodiesKernel);
clReleaseKernel(fpio.m_copyVelocitiesKernel);
}

View File

@@ -0,0 +1,90 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Roman Ponomarev, Erwin Coumans
#ifndef FIND_PAIRS_H
#define FIND_PAIRS_H
#include "../basic_initialize/btOpenCLInclude.h"
struct btKernelInfo
{
int m_Id;
cl_kernel m_kernel;
char* m_name;
int m_workgroupSize;
};
struct btFindPairsIO
{
int m_numObjects;
cl_mem m_clObjectsBuffer; //for memory layout details see main.cpp (todo, make it flexible)
int m_positionOffset;//offset in m_clObjectsBuffer where position array starts
cl_command_queue m_cqCommandQue;
cl_kernel m_initializeGpuAabbsKernelSimple;
cl_kernel m_initializeGpuAabbsKernelFull;
cl_kernel m_broadphaseColorKernel;
cl_kernel m_broadphaseBruteForceKernel;
cl_kernel m_setupBodiesKernel;
cl_kernel m_copyVelocitiesKernel;
cl_context m_mainContext;
cl_device_id m_device;
cl_kernel m_calcHashAabbKernel;
cl_kernel m_clearCellStartKernel;
cl_kernel m_findCellStartKernel;
cl_kernel m_findOverlappingPairsKernel;
cl_kernel m_computePairChangeKernel;
cl_kernel m_squeezePairBuffKernel;
cl_mem m_dAllOverlappingPairs;
int m_numOverlap;
cl_mem m_dBpParams;
cl_mem m_dBodiesHash;
cl_mem m_dCellStart;
cl_mem m_dPairBuff;
cl_mem m_dPairBuffStartCurr;
cl_mem m_dlocalShapeAABB;
cl_mem m_dAABB;
cl_mem m_dPairScan;
cl_mem m_dPairOut;
};
void initFindPairs(btFindPairsIO& fpio,cl_context cxMainContext, cl_device_id device, cl_command_queue commandQueue, int maxHandles,int maxPairsPerBody = 16);
void findPairsOpenCLBruteForce(btFindPairsIO& fpio);
void setupGpuAabbsSimple(btFindPairsIO& fpio);
void setupGpuAabbsFull(btFindPairsIO& fpio, cl_mem bodies);
void colorPairsOpenCL(btFindPairsIO& fpio);
void setupBodies(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
void copyBodyVelocities(btFindPairsIO& fpio, cl_mem linVelMem, cl_mem angVelMem, cl_mem bodies, cl_mem bodyInertias);
void releaseFindPairs(btFindPairsIO& fpio);
#endif //FIND_PAIRS_H

View File

@@ -0,0 +1,116 @@
MSTRINGIFY(
float4 quatMult(float4 q1, float4 q2)
{
float4 q;
q.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
q.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
q.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
q.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
return q;
}
float4 quatNorm(float4 q)
{
float len = native_sqrt(dot(q, q));
if(len > 0.f)
{
q *= 1.f / len;
}
else
{
q.x = q.y = q.z = 0.f;
q.w = 1.f;
}
return q;
}
__kernel void
integrateTransformsKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
__global float4 *linVel,
__global float4 *pAngVel,
__global float* pBodyTimes)
{
int nodeID = get_global_id(0);
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
float mAmplitude = 66.f;
float timeStep = 0.0166666f;
if( nodeID < numNodes )
{
//g_vertexBuffer[nodeID + startOffset/4+numNodes] += pAngVel[nodeID];
if (1)
{
float4 axis;
//add some hardcoded angular damping
pAngVel[nodeID].x *= 0.99f;
pAngVel[nodeID].y *= 0.99f;
pAngVel[nodeID].z *= 0.99f;
float4 angvel = pAngVel[nodeID];
float fAngle = native_sqrt(dot(angvel, angvel));
//limit the angular motion
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
{
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
}
if(fAngle < 0.001f)
{
// use Taylor's expansions of sync function
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel * ( native_sin(0.5f * fAngle * timeStep) / fAngle);
}
float4 dorn = axis;
dorn.w = native_cos(fAngle * timeStep * 0.5f);
float4 orn0 = g_vertexBuffer[nodeID + startOffset/4+numNodes];
float4 predictedOrn = quatMult(dorn, orn0);
predictedOrn = quatNorm(predictedOrn);
g_vertexBuffer[nodeID + startOffset/4+numNodes]=predictedOrn;
}
//linear velocity
g_vertexBuffer[nodeID + startOffset/4] += linVel[nodeID] * timeStep;
}
}
__kernel void
sineWaveKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,
__global float4 *linVel,
__global float4 *pAngVel,
__global float* pBodyTimes)
{
int nodeID = get_global_id(0);
float timeStepPos = 0.000166666;
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
float mAmplitude = 166.f;
if( nodeID < numNodes )
{
pBodyTimes[nodeID] += timeStepPos;
float4 position = g_vertexBuffer[nodeID + startOffset/4];
position.x = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID])*mAmplitude*0.5f;
position.y = native_cos(pBodyTimes[nodeID]*1.38f)*mAmplitude + native_sin(pBodyTimes[nodeID]*mAmplitude);
position.z = native_cos(pBodyTimes[nodeID]*2.17f)*mAmplitude + native_sin(pBodyTimes[nodeID]*0.777f)*mAmplitude;
g_vertexBuffer[nodeID + startOffset/4] = position;
}
}
);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
include "AMD"
include "Intel"
include "NVIDIA"