Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

This commit is contained in:
erwin.coumans
2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,36 @@
function findDirectX11()
local dx11path = os.getenv("DXSDK_DIR")
if (dx11path) then
local filepath = string.format("%s%s",dx11path,"Include/D3D11.h")
headerdx11 = io.open(filepath, "r")
if (headerdx11) then
printf("Found DX11: '%s'", filepath)
return true
end
end
return false
end
function initDirectX11()
configuration {}
local dx11path = os.getenv("DXSDK_DIR")
defines { "ADL_ENABLE_DX11"}
includedirs {"$(DXSDK_DIR)/include"}
configuration "x32"
libdirs {"$(DXSDK_DIR)/Lib/x86"}
configuration "x64"
libdirs {"$(DXSDK_DIR)/Lib/x64"}
configuration {}
links {"d3dcompiler",
"dxerr",
"dxguid",
"d3dx9",
"d3d9",
"winmm",
"comctl32",
"d3dx11"
}
return true
end

View File

@@ -0,0 +1,84 @@
-- todo: add Apple OpenCL environment vars
function findOpenCL_AMD()
local amdopenclpath = os.getenv("AMDAPPSDKROOT")
if (amdopenclpath) then
return true
end
return false
end
function findOpenCL_NVIDIA()
local nvidiaopenclpath = os.getenv("CUDA_PATH")
if (nvidiaopenclpath) then
return true
end
return false
end
function findOpenCL_Intel()
local intelopenclpath = os.getenv("INTELOCLSDKROOT")
if (intelopenclpath) then
return true
end
return false
end
function initOpenCL_AMD()
configuration {}
local amdopenclpath = os.getenv("AMDAPPSDKROOT")
if (amdopenclpath) then
defines { "ADL_ENABLE_CL" , "CL_PLATFORM_AMD"}
includedirs {
"$(AMDAPPSDKROOT)/include"
}
configuration "x32"
libdirs {"$(AMDAPPSDKROOT)/lib/x86"}
configuration "x64"
libdirs {"$(AMDAPPSDKROOT)/lib/x86_64"}
configuration {}
links {"OpenCL"}
return true
end
return false
end
function initOpenCL_NVIDIA()
configuration {}
local nvidiaopenclpath = os.getenv("CUDA_PATH")
if (nvidiaopenclpath) then
defines { "ADL_ENABLE_CL" , "CL_PLATFORM_NVIDIA"}
includedirs {
"$(CUDA_PATH)/include"
}
configuration "x32"
libdirs {"$(CUDA_PATH)/lib/Win32"}
configuration "x64"
libdirs {"$(CUDA_PATH)/lib/x64"}
configuration {}
links {"OpenCL"}
return true
end
return false
end
function initOpenCL_Intel()
configuration {}
local intelopenclpath = os.getenv("INTELOCLSDKROOT")
if (intelopenclpath) then
defines { "ADL_ENABLE_CL" , "CL_PLATFORM_INTEL"}
includedirs {
"$(INTELOCLSDKROOT)/include"
}
configuration "x32"
libdirs {"$(INTELOCLSDKROOT)/lib/x86"}
configuration "x64"
libdirs {"$(INTELOCLSDKROOT)/lib/x64"}
configuration {}
links {"OpenCL"}
return true
end
return false
end

View File

@@ -0,0 +1,52 @@
-- todo: add Apple OpenCL environment vars
function initOpenGL()
configuration {}
configuration {"Windows"}
links {"opengl32"}
configuration {"MacOSX"}
links { "Carbon.framework","OpenGL.framework","AGL.framework"}
configuration {"not Windows", "not MacOSX"}
links {"GL","GLU"}
configuration{}
end
function initGlut()
configuration {}
configuration {"Windows"}
includedirs {
projectRootDir .. "../../Glut"
}
libdirs { projectRootDir .. "../../Glut"}
configuration {"Windows", "x32"}
links {"glut32"}
configuration {"Windows", "x64"}
links {"glut64"}
configuration {"MacOSX"}
links { "Glut.framework" }
configuration {"not Windows", "not MacOSX"}
links {"glut"}
configuration{}
end
function initGlew()
configuration {}
configuration {"Windows"}
defines { "GLEW_STATIC"}
includedirs {
projectRootDir .. "../../Glut"
}
libdirs { projectRootDir .. "../../Glut"}
configuration {"Windows", "x32"}
links {"glew32s"}
configuration {"Windows", "x64"}
links {"glew64s"}
configuration{}
end

View File

@@ -0,0 +1,55 @@
solution "0MySolution"
-- Multithreaded compiling
if _ACTION == "vs2010" then
buildoptions { "/MP" }
end
configurations {"Release", "Debug"}
configuration "Release"
flags { "Optimize", "StaticRuntime", "NoMinimalRebuild", "FloatFast"}
configuration "Debug"
flags { "Symbols", "StaticRuntime" , "NoMinimalRebuild", "NoEditAndContinue" ,"FloatFast"}
platforms {"x32", "x64"}
configuration "x64"
targetsuffix "_64"
configuration {"x64", "debug"}
targetsuffix "_x64_debug"
configuration {"x64", "release"}
targetsuffix "_x64"
configuration {"x32", "debug"}
targetsuffix "_debug"
configuration{}
flags { "NoRTTI", "NoExceptions"}
defines { "_HAS_EXCEPTIONS=0" }
targetdir "../bin"
location("./" .. _ACTION)
projectRootDir = os.getcwd() .. "/../"
print("Project root directroy: " .. projectRootDir);
dofile ("findOpenCL.lua")
dofile ("findDirectX11.lua")
dofile ("findOpenGLGlewGlut.lua")
language "C++"
include "../opencl/gpu_rigidbody_pipeline2"
include "../opencl/gpu_rigidbody_pipeline"
include "../opencl/basic_initialize"
include "../opencl/vector_add"
include "../opencl/primitives/AdlTest"
include "../opencl/primitives/benchmark"
include "../opencl/3dGridBroadphase"
include "../opencl/broadphase_benchmark"

View File

@@ -0,0 +1,10 @@
rem premake4 --no-pelibs vs2008
rem premake4 --no-pedemos vs2008
rem premake4 --no-bulletlibs --no-pelibs vs2008
rem premake4 --with-nacl vs2008
..\..\..\msvc\premake4 vs2008
mkdir vs2008\cache
pause

View File

@@ -0,0 +1,5 @@
..\..\..\msvc\premake4 vs2010
mkdir vs2010\cache
pause

View File

@@ -0,0 +1,45 @@
if os.is("Windows") then
hasCL = findOpenCL_AMD()
if (hasCL) then
project "basic_bullet2_demo_AMD"
initOpenCL_AMD()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
includedirs {
"..",
"../../../bullet2",
"../../testbed",
"../../../rendering/Gwen",
"../../../opencl/basic_initialize",
"../../../opencl/primitives"
}
links { "testbed",
"bullet2",
"gwen"
}
initOpenGL()
initGlut()
files {
"../**.cpp",
"../**.h",
"../../../opencl/basic_initialize/btOpenCLUtils.cpp",
"../../../opencl/basic_initialize/btOpenCLUtils.h"
}
end
end

View File

@@ -0,0 +1,538 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "BasicDemo.h"
#include "GlutStuff.h"
///btBulletDynamicsCommon.h is the main Bullet include file, contains most common include files.
#include "btBulletDynamicsCommon.h"
#include "CustomConvexShape.h"
#include "CustomConvexPairCollision.h"
#include "CustomCollisionDispatcher.h"
#include "ConvexHeightFieldShape.h"
#include "GLDebugDrawer.h"
static GLDebugDrawer sDebugDraw;
#include <stdio.h> //printf debugging
#ifdef CL_PLATFORM_AMD
#include "../../opencl/basic_initialize/btOpenCLUtils.h"
cl_context g_cxMainContext=0;
cl_command_queue g_cqCommandQue=0;
cl_device_id g_clDevice=0;
#endif
///create 125 (5x5x5) dynamic object
#define ARRAY_SIZE_X 6
#define ARRAY_SIZE_Y 6
#define ARRAY_SIZE_Z 4
//maximum number of objects (and allow user to shoot additional boxes)
#define MAX_PROXIES (ARRAY_SIZE_X*ARRAY_SIZE_Y*ARRAY_SIZE_Z + 1024)
///scaling of the objects (0.1 = 20 centimeter boxes )
#define SCALING 1.
#define START_POS_X 0
#define START_POS_Y -0.8
#define START_POS_Z 0
#define BoxVtxCount 8
static float BoxVtx[] = {
-0.5,-0.5,-0.5,
-0.5,-0.5,0.5,
-0.5,0.5,-0.5,
-0.5,0.5,0.5,
0.5,-0.5,-0.5,
0.5,-0.5,0.5,
0.5,0.5,-0.5,
0.5,0.5,0.5,
};
static float BoxVtx2[] = {
-20.3,-10.3,-20.3,
-20.3,-10.3,20.3,
-20.3,10.3,-20.3,
-20.3,10.3,20.3,
20.3,-10.3,-20.3,
20.3,-10.3,20.3,
20.3,10.3,-20.3,
20.3,10.3,20.3,
};
#define BarrelVtxCount2 57
static float BarrelVtx2[] = {
0.0f,-0.5f,0.0f, 0.0f,-1.0f,0.0f,
0.282362f,-0.5f,-0.205148f, 0.0f,-1.0f,0.0f,
0.349018f,-0.5f,0.0f, 0.0f,-1.0f,0.0f,
0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f,
-0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f,
0.107853f,-0.5f,-0.331936f, 0.0f,-1.0f,0.0f,
-0.282362f,-0.5f,-0.205148f, 0.0f,-1.0f,0.0f,
-0.349018f,-0.5f,0.0f, 0.0f,-1.0f,0.0f,
-0.282362f,-0.5f,0.205148f, 0.0f,-1.0f,0.0f,
-0.107853f,-0.5f,0.331936f, 0.0f,-1.0f,0.0f,
0.107853f,-0.5f,0.331936f, 0.0f,-1.0f,0.0f,
0.282362f,-0.5f,0.205148f, 0.0f,-1.0f,0.0f,
0.0f,0.5f,0.0f, 0.0f,1.0f,0.0f,
0.349018f,0.5f,0.0f, 0.0f,1.0f,0.0f,
0.282362f,0.5f,-0.205148f, 0.0f,1.0f,0.0f,
0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f,
0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f,
-0.107853f,0.5f,-0.331936f, 0.0f,1.0f,0.0f,
-0.282362f,0.5f,-0.205148f, 0.0f,1.0f,0.0f,
-0.349018f,0.5f,0.0f, 0.0f,1.0f,0.0f,
-0.282362f,0.5f,0.205148f, 0.0f,1.0f,0.0f,
-0.107853f,0.5f,0.331936f, 0.0f,1.0f,0.0f,
0.107853f,0.5f,0.331936f, 0.0f,1.0f,0.0f,
0.282362f,0.5f,0.205148f, 0.0f,1.0f,0.0f,
0.349018f,-0.5f,0.0f, 0.957307f,-0.289072f,0.0f,
0.404509f,0.0f,-0.293893f, 0.809017f,0.0f,-0.587785f,
0.5f,0.0f,0.0f, 1.0f,0.0f,0.0f,
0.282362f,-0.5f,-0.205148f, 0.774478f,-0.289072f,-0.562691f,
0.154508f,0.0f,-0.475528f, 0.309017f,0.0f,-0.951057f,
0.107853f,-0.5f,-0.331936f, 0.295824f,-0.289072f,-0.910453f,
0.107853f,-0.5f,-0.331936f, 0.295824f,-0.289072f,-0.910453f,
-0.154509f,0.0f,-0.475528f, -0.309017f,0.0f,-0.951057f,
0.154508f,0.0f,-0.475528f, 0.309017f,0.0f,-0.951057f,
-0.107853f,-0.5f,-0.331936f, -0.295824f,-0.289072f,-0.910453f,
-0.404509f,0.0f,-0.293893f, -0.809017f,0.0f,-0.587785f,
-0.282362f,-0.5f,-0.205148f, -0.774478f,-0.289072f,-0.562691f,
-0.5f,0.0f,0.0f, -1.0f,0.0f,0.0f,
-0.349018f,-0.5f,0.0f, -0.957307f,-0.289072f,0.0f,
-0.404508f,0.0f,0.293893f, -0.809017f,0.0f,0.587785f,
-0.282362f,-0.5f,0.205148f, -0.774478f,-0.289072f,0.562691f,
-0.154509f,0.0f,0.475528f, -0.309017f,0.0f,0.951056f,
-0.107853f,-0.5f,0.331936f, -0.295824f,-0.289072f,0.910453f,
0.154509f,0.0f,0.475528f, 0.309017f,0.0f,0.951056f,
0.107853f,-0.5f,0.331936f, 0.295824f,-0.289072f,0.910453f,
0.404509f,0.0f,0.293892f, 0.809017f,0.0f,0.587785f,
0.282362f,-0.5f,0.205148f, 0.774478f,-0.289072f,0.562691f,
0.282362f,0.5f,-0.205148f, 0.774478f,0.289072f,-0.562691f,
0.349018f,0.5f,0.0f, 0.957307f,0.289072f,0.0f,
0.107853f,0.5f,-0.331936f, 0.295824f,0.289072f,-0.910453f,
-0.107853f,0.5f,-0.331936f, -0.295824f,0.289072f,-0.910453f,
0.107853f,0.5f,-0.331936f, 0.295824f,0.289072f,-0.910453f,
-0.282362f,0.5f,-0.205148f, -0.774478f,0.289072f,-0.562691f,
-0.349018f,0.5f,0.0f, -0.957307f,0.289072f,0.0f,
-0.282362f,0.5f,0.205148f, -0.774478f,0.289072f,0.562691f,
-0.107853f,0.5f,0.331936f, -0.295824f,0.289072f,0.910453f,
0.107853f,0.5f,0.331936f, 0.295824f,0.289072f,0.910453f,
0.282362f,0.5f,0.205148f, 0.774478f,0.289072f,0.562691f,
};
static int BarrelIdx[] = {
0,1,2,
0,3,1,
0,4,5,
0,6,4,
0,7,6,
0,8,7,
0,9,8,
0,10,9,
0,11,10,
0,2,11,
12,13,14,
12,14,15,
12,16,17,
12,17,18,
12,18,19,
12,19,20,
12,20,21,
12,21,22,
12,22,23,
12,23,13,
24,25,26,
24,27,25,
27,28,25,
27,29,28,
30,31,32,
30,33,31,
33,34,31,
33,35,34,
35,36,34,
35,37,36,
37,38,36,
37,39,38,
39,40,38,
39,41,40,
41,42,40,
41,43,42,
43,44,42,
43,45,44,
45,26,44,
45,24,26,
26,46,47,
26,25,46,
25,48,46,
25,28,48,
32,49,50,
32,31,49,
31,51,49,
31,34,51,
34,52,51,
34,36,52,
36,53,52,
36,38,53,
38,54,53,
38,40,54,
40,55,54,
40,42,55,
42,56,55,
42,44,56,
44,47,56,
44,26,47,
};
__inline void glVertexFloat4( const float4& v )
{
glVertex3f( v.x, v.y, v.z );
}
__inline void drawPointListTransformed(const float4* vtx, int nVtx, const float4& translation, const Quaternion& quat)
{
glPushMatrix();
Matrix3x3 rotMat = mtTranspose( qtGetRotationMatrix( quat ) );
float transformMat[16] =
{
rotMat.m_row[0].x, rotMat.m_row[0].y, rotMat.m_row[0].z, 0,
rotMat.m_row[1].x, rotMat.m_row[1].y, rotMat.m_row[1].z, 0,
rotMat.m_row[2].x, rotMat.m_row[2].y, rotMat.m_row[2].z, 0,
translation.x, translation.y, translation.z,1
};
glMultMatrixf( transformMat );
float4 c = make_float4(1,1,0,0);
glPointSize(3.f);
glBegin(GL_POINTS);
for(int i=0; i<nVtx; i++)
{
glColor4f(c.x, c.y, c.z, 1);
glVertexFloat4( vtx[i] );
}
glEnd();
glPopMatrix();
}
void displaySamples(const float4* vertices, int numVertices, const float4& translation, const Quaternion& quaternion)
{
drawPointListTransformed( vertices,numVertices, translation, quaternion );
}
void BasicDemo::renderSurfacePoints()
{
if (m_dynamicsWorld->getDebugDrawer()->getDebugMode()& btIDebugDraw::DBG_DrawContactPoints)
for (int i=0;i<m_dynamicsWorld->getCollisionObjectArray().size();i++)
{
btCollisionObject* ob = m_dynamicsWorld->getCollisionObjectArray()[i];
if (ob->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE)
{
CustomConvexShape* customConvex = (CustomConvexShape*)ob->getCollisionShape();
ConvexHeightField* cvxShape= customConvex->m_ConvexHeightField;
if (!cvxShape)
{
printf("aargh\n");
}
float4 bodyApos;
Quaternion bodyAquat;
const btVector3& pA = ob->getWorldTransform().getOrigin();
btQuaternion qA = ob->getWorldTransform().getRotation();
bodyApos.x = pA.getX();
bodyApos.y = pA.getY();
bodyApos.z = pA.getZ();
bodyApos.w = 0.f;
bodyAquat.x = qA.getX();
bodyAquat.y = qA.getY();
bodyAquat.z = qA.getZ();
bodyAquat.w = qA.getW();
displaySamples(cvxShape->getSamplePoints(),cvxShape->getNumSamplePoints(),bodyApos,bodyAquat);
}
}
}
void BasicDemo::clientMoveAndDisplay()
{
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//simple dynamics world doesn't handle fixed-time-stepping
float ms = getDeltaTimeMicroseconds();
///step the simulation
if (m_dynamicsWorld)
{
m_dynamicsWorld->stepSimulation(ms / 1000000.f);
//optional but useful: debug drawing
m_dynamicsWorld->debugDrawWorld();
}
renderme();
renderSurfacePoints();
glFlush();
swapBuffers();
}
void BasicDemo::displayCallback(void) {
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
renderme();
renderSurfacePoints();
//optional but useful: debug drawing to detect problems
if (m_dynamicsWorld)
m_dynamicsWorld->debugDrawWorld();
glFlush();
swapBuffers();
}
void BasicDemo::initPhysics()
{
setTexturing(true);
setShadows(true);
m_acceleratedRigidBodies = 0;
setCameraDistance(btScalar(SCALING*20.));
///collision configuration contains default setup for memory, collision setup
m_collisionConfiguration = new btDefaultCollisionConfiguration();
//m_collisionConfiguration->setConvexConvexMultipointIterations();
///use the default collision dispatcher. For parallel processing you can use a diffent dispatcher (see Extras/BulletMultiThreaded)
m_dispatcher = new btCollisionDispatcher(m_collisionConfiguration);
#ifdef CL_PLATFORM_AMD
m_dispatcher = new CustomCollisionDispatcher(m_collisionConfiguration, g_cxMainContext,g_clDevice,g_cqCommandQue);
#else
m_dispatcher = new CustomCollisionDispatcher(m_collisionConfiguration);
#endif
m_dispatcher->registerCollisionCreateFunc(CUSTOM_POLYHEDRAL_SHAPE_TYPE,CUSTOM_POLYHEDRAL_SHAPE_TYPE,new CustomConvexConvexPairCollision::CreateFunc(m_collisionConfiguration->getSimplexSolver(), m_collisionConfiguration->getPenetrationDepthSolver()));
m_broadphase = new btDbvtBroadphase();
///the default constraint solver. For parallel processing you can use a different solver (see Extras/BulletMultiThreaded)
btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver;
m_solver = sol;
m_dynamicsWorld = new btDiscreteDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration);
m_dynamicsWorld->setGravity(btVector3(0,-10,0));
m_dynamicsWorld->setDebugDrawer(&sDebugDraw);
///create a few basic rigid bodies
//btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
#if 1
CustomConvexShape* groundShape = new CustomConvexShape(BoxVtx2,BoxVtxCount,3*sizeof(float));
//btCollisionShape* groundShape = new btStaticPlaneShape(btVector3(0,1,0),0);
m_collisionShapes.push_back(groundShape);
btTransform groundTransform;
groundTransform.setIdentity();
groundTransform.setOrigin(btVector3(0,-11,0));
//We can also use DemoApplication::localCreateRigidBody, but for clarity it is provided here:
{
btScalar mass(0.);
//rigidbody is dynamic if and only if mass is non zero, otherwise static
bool isDynamic = (mass != 0.f);
btVector3 localInertia(0,0,0);
if (isDynamic)
groundShape->calculateLocalInertia(mass,localInertia);
//using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects
btDefaultMotionState* myMotionState = new btDefaultMotionState(groundTransform);
btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,groundShape,localInertia);
btRigidBody* body = new btRigidBody(rbInfo);
//add the body to the dynamics world
m_dynamicsWorld->addRigidBody(body);
}
#endif
{
//create a few dynamic rigidbodies
// Re-using the same collision is better for memory usage and performance
//btCollisionShape* colShape = new btBoxShape(btVector3(SCALING*1,SCALING*1,SCALING*1));
//btCollisionShape* colShape = new btSphereShape(btScalar(1.));
#define USE_CUSTOM_HEIGHTFIELD_SHAPE
#ifdef USE_CUSTOM_HEIGHTFIELD_SHAPE
CustomConvexShape* colShape = new CustomConvexShape(BarrelVtx2,BarrelVtxCount2,6*sizeof(float));
//CustomConvexShape* colShape = new CustomConvexShape(BoxVtx,BoxVtxCount,3*sizeof(float));
#else
btConvexHullShape* colShape = new btConvexHullShape(BarrelVtx2,BarrelVtxCount2,6*sizeof(float));
colShape->setLocalScaling(btVector3(0.9,0.9,0.9));
#endif //USE_CUSTOM_HEIGHTFIELD_SHAPE
btScalar scale = 0.5f;
//btScalar scale = 1.f;
//next line is already called inside the CustomConvexShape constructor
//colShape->initializePolyhedralFeatures();
m_collisionShapes.push_back(colShape);
/// Create Dynamic Objects
btTransform startTransform;
startTransform.setIdentity();
btScalar mass(1.f);
//rigidbody is dynamic if and only if mass is non zero, otherwise static
bool isDynamic = (mass != 0.f);
btVector3 localInertia(0,0,0);
if (isDynamic)
colShape->calculateLocalInertia(mass,localInertia);
float start_x = START_POS_X - ARRAY_SIZE_X/2;
float start_y = START_POS_Y;
float start_z = START_POS_Z - ARRAY_SIZE_Z/2;
for (int k=0;k<ARRAY_SIZE_Y;k++)
{
for(int j = 0;j<ARRAY_SIZE_Z;j++)
{
for (int i=0;i<ARRAY_SIZE_X;i++)
{
{
// if ((k>0) && ((j<2) || (j>(ARRAY_SIZE_Z-3))))
// continue;
// if ((k>0) && ((i<2) || (i>(ARRAY_SIZE_X-3))))
// continue;
startTransform.setOrigin(SCALING*btVector3(
btScalar(scale*2.0*i + start_x),
btScalar(scale*1+scale*2.0*k + start_y),
btScalar(scale*2.0*j + start_z)));
//using motionstate is recommended, it provides interpolation capabilities, and only synchronizes 'active' objects
btDefaultMotionState* myMotionState = new btDefaultMotionState(startTransform);
btRigidBody* body=0;
if (0)//k==0)
{
btVector3 zeroInertia(0,0,0);
btRigidBody::btRigidBodyConstructionInfo rbInfo(0.f,myMotionState,colShape,zeroInertia);
body = new btRigidBody(rbInfo);
} else
{
btRigidBody::btRigidBodyConstructionInfo rbInfo(mass,myMotionState,colShape,localInertia);
body = new btRigidBody(rbInfo);
}
//m_acceleratedRigidBodies is used as a mapping to the accelerated rigid body index
body->setCompanionId(m_acceleratedRigidBodies++);
m_dynamicsWorld->addRigidBody(body);
}
}
}
}
}
}
void BasicDemo::clientResetScene()
{
exitPhysics();
initPhysics();
}
void BasicDemo::exitPhysics()
{
//cleanup in the reverse order of creation/initialization
//remove the rigidbodies from the dynamics world and delete them
int i;
for (i=m_dynamicsWorld->getNumCollisionObjects()-1; i>=0 ;i--)
{
btCollisionObject* obj = m_dynamicsWorld->getCollisionObjectArray()[i];
btRigidBody* body = btRigidBody::upcast(obj);
if (body && body->getMotionState())
{
delete body->getMotionState();
}
m_dynamicsWorld->removeCollisionObject( obj );
delete obj;
}
//delete collision shapes
for (int j=0;j<m_collisionShapes.size();j++)
{
btCollisionShape* shape = m_collisionShapes[j];
delete shape;
}
m_collisionShapes.clear();
delete m_dynamicsWorld;
delete m_solver;
delete m_broadphase;
delete m_dispatcher;
delete m_collisionConfiguration;
}

View File

@@ -0,0 +1,86 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BASIC_DEMO_H
#define BASIC_DEMO_H
#ifdef _WINDOWS
#include "Win32DemoApplication.h"
#define PlatformDemoApplication Win32DemoApplication
#else
#include "GlutDemoApplication.h"
#define PlatformDemoApplication GlutDemoApplication
#endif
#include "LinearMath/btAlignedObjectArray.h"
class btBroadphaseInterface;
class btCollisionShape;
class btOverlappingPairCache;
class btCollisionDispatcher;
class btConstraintSolver;
struct btCollisionAlgorithmCreateFunc;
class btDefaultCollisionConfiguration;
///BasicDemo is good starting point for learning the code base and porting.
class BasicDemo : public PlatformDemoApplication
{
//keep the collision shapes, for deletion/cleanup
btAlignedObjectArray<btCollisionShape*> m_collisionShapes;
btBroadphaseInterface* m_broadphase;
btCollisionDispatcher* m_dispatcher;
btConstraintSolver* m_solver;
btDefaultCollisionConfiguration* m_collisionConfiguration;
int m_acceleratedRigidBodies;
public:
BasicDemo()
{
}
virtual ~BasicDemo()
{
exitPhysics();
}
void initPhysics();
void exitPhysics();
virtual void clientMoveAndDisplay();
virtual void displayCallback();
virtual void clientResetScene();
static DemoApplication* Create()
{
BasicDemo* demo = new BasicDemo;
demo->myinit();
demo->initPhysics();
return demo;
}
void renderSurfacePoints();
};
#endif //BASIC_DEMO_H

View File

@@ -0,0 +1,507 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#include "ConvexHeightFieldShape.h"
#include "Stubs/AdlCollideUtils.h"
#include "CubeMapUtils.h"
//#include <common/Physics/ShapeBase.h>
//#include <common/Physics/SphereShape.h>
//#include "GlutStuff.h"
//#define USE_OLD
ConvexHeightField::ConvexHeightField(const float4* vtxBuffer, const int4* idxBuffer, int nTriangles)
: CollisionShape( SHAPE_CONVEX_HEIGHT_FIELD )
{
create( vtxBuffer, idxBuffer, nTriangles );
}
void ConvexHeightField::create( const float4* vtxBuffer, const int4* idxBuffer, int nTriangles )
{
{
float maxDx2 = -1.f;
int maxIdx = -1;
for(int i=0; i<nTriangles; i++)
{
const int4& idx = idxBuffer[i];
for(int j=0; j<3; j++)
{
float dx2 = dot3F4( vtxBuffer[idx.s[j]], vtxBuffer[idx.s[j]] );
if( dx2 > maxDx2 )
{
maxDx2 = dx2;
maxIdx = idx.s[j];
}
}
}
ADLASSERT( maxIdx != -1 );
m_scale = sqrtf( maxDx2 );
}
// cast ray to find intersectPlaneLineions
{
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 v;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
v = CubeMapUtils::calcVector(faceIdx, x, y);
v = normalize3( v );
v *= m_scale;
float minFraction = FLT_MAX;
float4 minNormal;
float4 minBCrd;
for(int itri=0; itri<nTriangles; itri++)
{
float4 from = make_float4(0.f);
float4 bCrd;
float fraction = CollideUtils::castRay( vtxBuffer[idxBuffer[itri].x], vtxBuffer[idxBuffer[itri].y], vtxBuffer[idxBuffer[itri].z],
from, v, 0.0f, &bCrd );
if( fraction > 0.f )
{
minFraction = min2( minFraction, fraction ); // todo. have to check if this is the min to replace normal?
float4 ab = vtxBuffer[idxBuffer[itri].y]-vtxBuffer[idxBuffer[itri].x];
float4 ac = vtxBuffer[idxBuffer[itri].z]-vtxBuffer[idxBuffer[itri].x];
minNormal = cross3( ab, ac );
minBCrd = bCrd;
}
}
if( minFraction == FLT_MAX )
minFraction = 0.f;
{
u8 quantizedHeight = (u8)(minFraction*255.f);
sample( (Face)faceIdx, i,j ) = quantizedHeight;
sampleNormal( (Face)faceIdx, i,j ) = normalize3(minNormal);
float minValue = 3.f*(1.f/3.f)*(1.f/3.f);
sampleNormal( (Face)faceIdx, i,j ).w = (dot3F4( minBCrd, minBCrd ) - minValue )/(1.f-minValue);
}
}
}
}
calcSamplePoints( m_samplePoints );
// calc support height using m_samplePoints
{
for(u32 faceIdx=0; faceIdx<6; faceIdx++) for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 v;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
v = CubeMapUtils::calcVector(faceIdx, x, y);
v = normalize3( v );
float maxHeight = -1;
for(int ie=0; ie<6*HEIGHT_RES*HEIGHT_RES; ie++)
{
float h = dot3F4( v, m_samplePoints[ie] )/m_scale;
ADLASSERT( h <= 1.f );
if( h > maxHeight ) maxHeight = h;
}
{
u8 quantizedHeight = min2((u8)(maxHeight*255.f)+1, 255);
sampleSupport( (Face)faceIdx, i, j ) = quantizedHeight;
}
}
}
m_aabb.setEmpty();
for(int i=0; i<nTriangles; i++)
{
const int4& idx = idxBuffer[i];
m_aabb.includePoint( vtxBuffer[idx.x] );
m_aabb.includePoint( vtxBuffer[idx.y] );
m_aabb.includePoint( vtxBuffer[idx.z] );
}
m_aabb.expandBy( make_float4( m_collisionMargin ) );
for(int i=0; i<6; i++)
{
m_faceAabbs[i].setEmpty();
for(int j=0; j<HEIGHT_RES*HEIGHT_RES; j++)
{
float4 p = m_samplePoints[i*HEIGHT_RES*HEIGHT_RES + j];
m_faceAabbs[i].includePoint(p);
}
m_faceAabbs[i].expandBy( make_float4( m_collisionMargin ) );
}
}
static __inline float localIntersectPlaneLine( const float4& planeEqn, const float4& vec, const float4& orig )
{
return (-planeEqn.w - dot3F4(planeEqn, orig))/dot3F4(planeEqn, vec);
}
ConvexHeightField::ConvexHeightField(const float4* eqn, int nEqn)
: CollisionShape( SHAPE_CONVEX_HEIGHT_FIELD )
{
{ // cast ray to find intersectPlaneLineions
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 v;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
v = CubeMapUtils::calcVector(faceIdx, x, y);
v = normalize3( v );
float minFraction = FLT_MAX;
float4 minNormal;
for(int ii=0; ii<nEqn; ii++)
{
const float4& iEqn = eqn[ii];
float fraction = localIntersectPlaneLine( iEqn, v, make_float4(0.f) );
if( fraction > 0.f )
{
if( fraction < minFraction )
{
minFraction = fraction;
minNormal = iEqn;
}
}
}
ADLASSERT( minFraction != FLT_MAX );
minNormal.w = minFraction;
sampleNormal( (Face)faceIdx, i, j ) = minNormal;
}
}
}
{
m_scale = -FLT_MAX;
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4& n = sampleNormal( (Face)faceIdx, i, j );
m_scale = max2( m_scale, n.w );
}
}
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4& n = sampleNormal( (Face)faceIdx, i, j );
u8 quantizedHeight = (u8)(n.w/m_scale*255.f);
sample( (Face)faceIdx, i, j ) = quantizedHeight;
}
}
}
calcSamplePoints( m_samplePoints );
// calc support height using m_samplePoints
{
for(u32 faceIdx=0; faceIdx<6; faceIdx++) for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 v;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
v = CubeMapUtils::calcVector(faceIdx, x, y);
v = normalize3( v );
float maxHeight = -1;
for(int ie=0; ie<6*HEIGHT_RES*HEIGHT_RES; ie++)
{
float h = dot3F4( v, m_samplePoints[ie] )/m_scale;
if (h>1.f)
h=1.f;
// ADLASSERT( h <= 1.f );
if( h > maxHeight ) maxHeight = h;
}
{
u8 quantizedHeight = min2((u8)(maxHeight*255.f)+1, 255);
sampleSupport( (Face)faceIdx, i, j ) = quantizedHeight;
}
}
}
for(int i=0; i<6; i++)
{
m_faceAabbs[i].setEmpty();
for(int j=0; j<HEIGHT_RES*HEIGHT_RES; j++)
{
float4 p = m_samplePoints[i*HEIGHT_RES*HEIGHT_RES + j];
m_faceAabbs[i].includePoint(p);
}
m_faceAabbs[i].expandBy( make_float4( m_collisionMargin ) );
}
m_aabb.setEmpty();
for(int i=0; i<6; i++)
{
m_aabb.includeVolume( m_faceAabbs[i] );
}
}
#if 0
ConvexHeightField::ConvexHeightField(const ShapeBase* shape)
: CollisionShape( SHAPE_CONVEX_HEIGHT_FIELD )
{
if( shape->m_type == ADL_SHAPE_SPHERE )
{
SphereShape* sphere = (SphereShape*)shape;
m_scale = sphere->m_radius;
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 minNormal;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
minNormal = CubeMapUtils::calcVector(faceIdx, x, y);
minNormal = normalize3( minNormal );
{
u8 quantizedHeight = (u8)(1.f*255.f);
sample( (Face)faceIdx, i,j ) = quantizedHeight;
sampleNormal( (Face)faceIdx, i,j ) = normalize3(minNormal);
// float minValue = 3.f*(1.f/3.f)*(1.f/3.f);
// sampleNormal( (Face)faceIdx, i,j ).w = (dot3F4( minBCrd, minBCrd ) - minValue )/(1.f-minValue);
}
}
}
calcSamplePoints( m_samplePoints );
m_aabb.m_max = make_float4( sphere->m_radius );
m_aabb.m_min = make_float4( -sphere->m_radius );
m_aabb.expandBy( make_float4( m_collisionMargin ) );
for(int i=0; i<6; i++)
{
m_faceAabbs[i].setEmpty();
for(int j=0; j<HEIGHT_RES*HEIGHT_RES; j++)
{
float4 p = m_samplePoints[i*HEIGHT_RES*HEIGHT_RES + j];
m_faceAabbs[i].includePoint(p);
}
m_faceAabbs[i].expandBy( make_float4( m_collisionMargin ) );
}
}
else
{
ShapeBase* s = (ShapeBase*)shape;
create( s->getVertexBuffer(), s->getTriangleBuffer(), s->getNumTris() );
}
}
#endif
ConvexHeightField::~ConvexHeightField()
{
}
float ConvexHeightField::queryDistance(const float4& p ) const
{
const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)};
if( dot3F4( p, p ) >= m_scale*m_scale ) return FLT_MAX;
int faceIdx;
float x, y;
CubeMapUtils::calcCrd( p, faceIdx, x, y );
x = (x*HEIGHT_RES) - 0.5f;
y = (y*HEIGHT_RES) - 0.5f;
float height;
{
int xi = (int)(x);
int yi = (int)(y);
float dx = x-xi;
float dy = y-yi;
{
int xip = min2((int)(HEIGHT_RES-1), xi+1);
int yip = min2((int)(HEIGHT_RES-1), yi+1);
u8 xy = sample( (Face)faceIdx, xi, yi );
u8 xpy = sample( (Face)faceIdx, xip, yi );
u8 xpyp = sample( (Face)faceIdx, xip, yip );
u8 xyp = sample( (Face)faceIdx, xi, yip );
height = (xy*(1.f-dx)+xpy*dx)*(1.f-dy) + (xyp*(1.f-dx)+xpyp*dx)*dy;
height = height/255.f*m_scale;
height = length3( p ) - height;
}
}
return height;
}
float ConvexHeightField::querySupportHeight(const float4& p ) const
{
const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)};
// if( dot3F4( p, p ) >= m_scale*m_scale ) return FLT_MAX;
int faceIdx;
float x, y;
CubeMapUtils::calcCrd( p, faceIdx, x, y );
x = (x*HEIGHT_RES) - 0.5f;
y = (y*HEIGHT_RES) - 0.5f;
float height;
{
int xi = (int)(x);
int yi = (int)(y);
float dx = x-xi;
float dy = y-yi;
{
int xip = min2((int)(HEIGHT_RES-1), xi+1);
int yip = min2((int)(HEIGHT_RES-1), yi+1);
u8 xy = sampleSupport( (Face)faceIdx, xi, yi );
u8 xpy = sampleSupport( (Face)faceIdx, xip, yi );
u8 xpyp = sampleSupport( (Face)faceIdx, xip, yip );
u8 xyp = sampleSupport( (Face)faceIdx, xi, yip );
height = max2( xy, max2( xpy, max2( xpyp, xyp ) ) );
height = height/255.f*m_scale;
}
}
return height;
}
float ConvexHeightField::queryW(const float4& p ) const
{
const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)};
float value;
if( dot3F4( p, p ) >= m_scale*m_scale ) return 0;
int faceIdx;
float x, y;
CubeMapUtils::calcCrd( p, faceIdx, x, y );
x = (x*HEIGHT_RES) - 0.5f;
y = (y*HEIGHT_RES) - 0.5f;
{
int xi = (int)(x);
int yi = (int)(y);
value = sampleNormal( (Face)faceIdx, xi, yi ).w;
}
return value;
}
bool ConvexHeightField::queryDistanceWithNormal( const float4& p, float4& normalOut ) const
{
int faceIdx;
float x, y;
CubeMapUtils::calcCrd( p, faceIdx, x, y );
x = (x*HEIGHT_RES) - 0.5f;
y = (y*HEIGHT_RES) - 0.5f;
{
int xi = (int)(x);
int yi = (int)(y);
normalOut = sampleNormal( (Face)faceIdx, xi, yi );
}
return true;
}
void ConvexHeightField::calcSamplePoints(float4* points) const
{
for(u32 faceIdx=0; faceIdx<6; faceIdx++)
{
for(int i=0; i<HEIGHT_RES; i++) for(int j=0; j<HEIGHT_RES; j++)
{
float4 v;
float x = (i+0.5f)/(float)HEIGHT_RES;
float y = (j+0.5f)/(float)HEIGHT_RES;
v = CubeMapUtils::calcVector(faceIdx, x, y);
v = normalize3( v );
int quantizedHeight = sample( (Face)faceIdx, i, j );
float rheight = quantizedHeight/255.f*m_scale;
points[ HEIGHT_RES*HEIGHT_RES*faceIdx + i + j*HEIGHT_RES ] = rheight*v;
}
}
return;
}
float4 ConvexHeightField::calcSamplePoint( int sIdx ) const
{
int idir; int plus;
Face faceIdx = (Face)(sIdx/(HEIGHT_RES*HEIGHT_RES));
idir = (faceIdx/2);
plus = faceIdx & 1;
float4 viewVector = make_float4((idir==0)?1.f:0.f, (idir==1)?1.f:0.f, (idir==2)?1.f:0.f );
if( plus==0 ) viewVector *= -1.f;
float4 xVector = make_float4( viewVector.z, viewVector.x, viewVector.y );
float4 yVector = make_float4( viewVector.y, viewVector.z, viewVector.x );
float4 orig = viewVector-xVector-yVector;
int pIdx = sIdx%(HEIGHT_RES*HEIGHT_RES);
int i = pIdx/HEIGHT_RES;
int j = pIdx%HEIGHT_RES;
float4 v = orig + (i+0.5f)*xVector/(HEIGHT_RES*0.5f) + (j+0.5f)*yVector/(HEIGHT_RES*0.5f);
v = normalize3( v );
int quantizedHeight = sample( faceIdx, i, j );
float rheight = quantizedHeight/255.f*m_scale;
return rheight*v;
}
const float4* ConvexHeightField::getSamplePoints() const
{
return m_samplePoints;
}
int ConvexHeightField::getNumSamplePoints() const
{
return HEIGHT_RES*HEIGHT_RES*6;
}
__inline
float4 rainbowMap( float s )
{
float c = 4.f;
float r,g,b;
r = c*(s-0.75f);
g = c*(s-0.5f);
b = c*(s-0.25f);
float4 col = make_float4( 1.f-r*r, 1.f-g*g, 1.f-b*b );
return col;
}

View File

@@ -0,0 +1,143 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef CONVEX_HEIGHT_FIELD_SHAPE_H
#define CONVEX_HEIGHT_FIELD_SHAPE_H
#include "Stubs/AdlQuaternion.h"
#include "Stubs/AdlCollisionShape.h"
#include "Stubs/AdlAabb.h"
class ShapeBase;
class ConvexHeightField : public CollisionShape
{
public:
enum
{
HEIGHT_RES = 4, //was 4 originally
};
enum Face
{
FACE_XM,
FACE_XP,
FACE_YM,
FACE_YP,
FACE_ZM,
FACE_ZP,
NUM_FACES,
};
ConvexHeightField(const float4* vtxBuffer, const int4* idxBuffer, int nTriangles);
ConvexHeightField(const ShapeBase* shape);
ConvexHeightField(const float4* eqn, int nEqn);
ConvexHeightField(): CollisionShape( SHAPE_CONVEX_HEIGHT_FIELD ){}
virtual ~ConvexHeightField();
// CollisionShape interface
virtual float queryDistance(const float4& p ) const;
// distance is not written to normalOut.w
virtual bool queryDistanceWithNormal( const float4& p, float4& normalOut ) const;
float querySupportHeight(const float4& p ) const;
// what is it?
float queryW(const float4& p ) const;
// others
u8& sample(Face face, int x, int y);
u8 sample(Face face, int x, int y) const;
u8& sampleSupport(Face face, int x, int y);
u8 sampleSupport(Face face, int x, int y) const;
float4& sampleNormal(Face face, int x, int y);
float4 sampleNormal(Face face, int x, int y) const;
void calcSamplePoints(float4* points) const;
float4 calcSamplePoint(int sIdx) const;
const float4* getSamplePoints() const;
int getNumSamplePoints() const;
//void displaySamples(const float4& translation, const Quaternion& quaternion) const;
private:
void create( const float4* vtxBuffer, const int4* idxBuffer, int nTriangles );
public:
u8 m_data[HEIGHT_RES*HEIGHT_RES*6];
float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];
float m_scale;
u8 m_supportHeight[HEIGHT_RES*HEIGHT_RES*6];
float4 m_samplePoints[HEIGHT_RES*HEIGHT_RES*6];
Aabb m_faceAabbs[6];
};
__inline
u8& ConvexHeightField::sample(Face face, int x, int y)
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_data[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
__inline
u8 ConvexHeightField::sample(Face face, int x, int y) const
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_data[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
__inline
u8& ConvexHeightField::sampleSupport(Face face, int x, int y)
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_supportHeight[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
__inline
u8 ConvexHeightField::sampleSupport(Face face, int x, int y) const
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_supportHeight[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
__inline
float4& ConvexHeightField::sampleNormal(Face face, int x, int y)
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_normal[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
__inline
float4 ConvexHeightField::sampleNormal(Face face, int x, int y) const
{
ADLASSERT( x < HEIGHT_RES );
ADLASSERT( y < HEIGHT_RES );
return m_normal[ HEIGHT_RES*HEIGHT_RES*face + x + y*HEIGHT_RES ];
}
#endif

View File

@@ -0,0 +1,111 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma once
// Coords are 0.5f shifted. See CubeMapDemo.cpp for usage.
class CubeMapUtils
{
public:
//enum Face
//{
// FACE_XM,
// FACE_XP,
// FACE_YM,
// FACE_YP,
// FACE_ZM,
// FACE_ZP,
// NUM_FACES,
//};
__inline
static void calcCrd(const float4& p, int& faceIdxOut, float& x, float& y);
__inline
static float4 calcVector(int faceIdx, float x, float y);
};
__inline
void CubeMapUtils::calcCrd(const float4& p, int& faceIdxOut, float& x, float& y)
{
const float4 majorAxes[] = {make_float4(1,0,0,0), make_float4(0,1,0,0), make_float4(0,0,1,0)};
float4 majorAxis;
{
int idx;
float r2[] = {p.x*p.x, p.y*p.y, p.z*p.z};
idx = (r2[1]>r2[0])? 1:0;
idx = (r2[2]>r2[idx])? 2:idx;
majorAxis = majorAxes[idx];
bool isNeg = dot3F4( p, majorAxis ) < 0.f;
faceIdxOut = (idx*2+((isNeg)? 0:1));
//==
float4 abs = make_float4( fabs(p.x), fabs(p.y), fabs(p.z), 0.f );
float d;
if( idx == 0 )
{
x = p.y;
y = p.z;
d = abs.x;
}
else if( idx == 1 )
{
x = p.z;
y = p.x;
d = abs.y;
}
else
{
x = p.x;
y = p.y;
d = abs.z;
}
float dInv = (d==0.f)? 0.f: (1.f/d);
x = (x*dInv+1.f)*0.5f;
y = (y*dInv+1.f)*0.5f;
}
}
__inline
float4 CubeMapUtils::calcVector(int faceIdx, float x, float y)
{
int dir = faceIdx/2;
float z = (faceIdx%2 == 0)? -1.f:1.f;
x = x*2.f-1.f;
y = y*2.f-1.f;
if( dir == 0 )
{
return make_float4(z, x, y);
}
else if( dir == 1 )
{
return make_float4(y,z,x);
}
else
{
return make_float4(x,y,z);
}
}

View File

@@ -0,0 +1,699 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "CustomCollisionDispatcher.h"
#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "CustomConvexShape.h"
#include "CustomConvexPairCollision.h"
#include "LinearMath/btQuickprof.h"
#ifdef CL_PLATFORM_AMD
#include "Adl/Adl.h"
#include "Stubs/AdlMath.h"
#include "Stubs/AdlContact4.h"
#include "Stubs/AdlQuaternion.h"
#include "Stubs/ChNarrowPhase.h"
#include "Stubs/Solver.h"
struct CustomDispatchData
{
adl::DeviceCL* m_ddcl;
adl::Device* m_deviceHost;
ShapeDataType m_ShapeBuffer;
adl::HostBuffer<int2>* m_pBufPairsCPU;
adl::Buffer<int2>* m_pBufPairsGPU;
adl::Buffer<Contact4>* m_pBufContactOutGPU;
adl::HostBuffer<Contact4>* m_pBufContactOutCPU;
adl::ChNarrowphase<adl::TYPE_CL>::Data* m_Data;
adl::HostBuffer<RigidBodyBase::Body>* m_pBufRBodiesCPU;
adl::Buffer<RigidBodyBase::Body>* m_pBufRBodiesGPU;
adl::Buffer<RigidBodyBase::Shape>* m_bodyInfoBufferCPU;
adl::Buffer<RigidBodyBase::Shape>* m_bodyInfoBufferGPU;
adl::Solver<adl::TYPE_CL>::Data* m_solverDataGPU;
SolverData m_contactCGPU;
void* m_frictionCGPU;
int m_numAcceleratedShapes;
};
#endif //CL_PLATFORM_AMD
CustomCollisionDispatcher::CustomCollisionDispatcher(btCollisionConfiguration* collisionConfiguration
#ifdef CL_PLATFORM_AMD
, cl_context context,cl_device_id device,cl_command_queue queue
#endif //CL_PLATFORM_AMD
):btCollisionDispatcher(collisionConfiguration),
m_internalData(0)
{
#ifdef CL_PLATFORM_AMD
if (context && queue)
{
m_internalData = new CustomDispatchData();
memset(m_internalData,0,sizeof(CustomDispatchData));
adl::DeviceUtils::Config cfg;
m_internalData->m_ddcl = new adl::DeviceCL();
m_internalData->m_ddcl->m_deviceIdx = device;
m_internalData->m_ddcl->m_context = context;
m_internalData->m_ddcl->m_commandQueue = queue;
m_internalData->m_ddcl->m_kernelManager = new adl::KernelManager;
m_internalData->m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
m_internalData->m_pBufPairsCPU = new adl::HostBuffer<int2>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_pBufContactOutCPU = new adl::HostBuffer<Contact4>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_pBufRBodiesCPU = new adl::HostBuffer<RigidBodyBase::Body>(m_internalData->m_deviceHost, MAX_CONVEX_BODIES_CL);
m_internalData->m_bodyInfoBufferCPU = new adl::Buffer<RigidBodyBase::Shape>(m_internalData->m_deviceHost,MAX_CONVEX_BODIES_CL);
m_internalData->m_pBufContactOutGPU = new adl::Buffer<Contact4>(m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_bodyInfoBufferGPU = new adl::Buffer<RigidBodyBase::Shape>(m_internalData->m_ddcl,MAX_CONVEX_BODIES_CL);
m_internalData->m_pBufPairsGPU = new adl::Buffer<int2>(m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_pBufRBodiesGPU = new adl::Buffer<RigidBodyBase::Body>(m_internalData->m_ddcl, MAX_CONVEX_BODIES_CL);
m_internalData->m_Data = adl::ChNarrowphase<adl::TYPE_CL>::allocate(m_internalData->m_ddcl);
m_internalData->m_ShapeBuffer = adl::ChNarrowphase<adl::TYPE_CL>::allocateShapeBuffer(m_internalData->m_ddcl, MAX_CONVEX_SHAPES_CL);
m_internalData->m_numAcceleratedShapes = 0;
m_internalData->m_contactCGPU = adl::Solver<adl::TYPE_CL>::allocateConstraint4( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_frictionCGPU = adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_internalData->m_ddcl, MAX_BROADPHASE_COLLISION_CL);
}
#endif //CL_PLATFORM_AMD
}
CustomCollisionDispatcher::~CustomCollisionDispatcher(void)
{
#ifdef CL_PLATFORM_AMD
if (m_internalData)
{
delete m_internalData->m_pBufPairsCPU;
delete m_internalData->m_pBufPairsGPU;
delete m_internalData->m_pBufContactOutGPU;
delete m_internalData->m_pBufContactOutCPU;
adl::Solver<adl::TYPE_CL>::deallocateConstraint4( m_internalData->m_contactCGPU );
adl::Solver<adl::TYPE_CL>::deallocateFrictionConstraint( m_internalData->m_frictionCGPU );
adl::Solver<adl::TYPE_CL>::deallocate(m_internalData->m_solverDataGPU);
adl::DeviceUtils::deallocate(m_internalData->m_deviceHost);
delete m_internalData->m_ddcl;
delete m_internalData;
}
#endif //CL_PLATFORM_AMD
}
#ifdef CL_PLATFORM_AMD
#include "BulletDynamics/Dynamics/btRigidBody.h"
RigidBodyBase::Shape CreateBodyInfo(const btCollisionObject& colObj)
{
RigidBodyBase::Shape shape;
const btRigidBody* bulletBody = btRigidBody::upcast(&colObj);
if( colObj.isStaticOrKinematicObject() || !bulletBody)
{
//body.m_quat = qtGetIdentity();
//body.m_invMass = 0.f;
shape.m_initInvInertia = mtZero();
shape.m_invInertia = mtZero();
}
else
{
btVector3 invLocalInertia = bulletBody->getInvInertiaDiagLocal();
shape.m_initInvInertia = mtZero();
shape.m_initInvInertia.m_row[0].x = invLocalInertia.x();
shape.m_initInvInertia.m_row[1].y = invLocalInertia.y();
shape.m_initInvInertia.m_row[2].z = invLocalInertia.z();
btQuaternion q = colObj.getWorldTransform().getRotation();
Quaternion qBody;
qBody.x = q.getX();
qBody.y = q.getY();
qBody.z = q.getZ();
qBody.w = q.getW();
Matrix3x3 m = qtGetRotationMatrix( qBody);
Matrix3x3 mT = mtTranspose( m );
shape.m_invInertia = mtMul( mtMul( m, shape.m_initInvInertia ), mT );
//bulletBody->getInvInertiaTensorWorld();
// shape.m_initInvInertia = mtInvert( localInertia );
}
return shape;
}
RigidBodyBase::Body CreateRBodyCL(const btCollisionObject& colObj, int shapeIdx)
{
RigidBodyBase::Body bodyCL;
// position
const btVector3& p = colObj.getWorldTransform().getOrigin();
bodyCL.m_pos.x = p.getX();
bodyCL.m_pos.y = p.getY();
bodyCL.m_pos.z = p.getZ();
bodyCL.m_pos.w = 0.0f;
// quaternion
btQuaternion q = colObj.getWorldTransform().getRotation();
bodyCL.m_quat.x = q.getX();
bodyCL.m_quat.y = q.getY();
bodyCL.m_quat.z = q.getZ();
bodyCL.m_quat.w = q.getW();
const btRigidBody* bulletBody = btRigidBody::upcast(&colObj);
if( colObj.isStaticOrKinematicObject() || !bulletBody)
{
// linear velocity
bodyCL.m_linVel = make_float4(0.0f, 0.0f, 0.0f);
// angular velocity
bodyCL.m_angVel = make_float4(0.0f, 0.0f, 0.0f);
bodyCL.m_invMass = 0.f;
} else
{
// linear velocity
const btVector3& lv = bulletBody->getLinearVelocity();
const btVector3& av = bulletBody->getAngularVelocity();
bodyCL.m_linVel = make_float4(lv.x(),lv.y(),lv.z(),0.0f);
// angular velocity
bodyCL.m_angVel = make_float4(av.x(),av.y(),av.z(),0.0f);
bodyCL.m_invMass = bulletBody->getInvMass();
}
// shape index
bodyCL.m_shapeIdx = shapeIdx;
// restituition coefficient
bodyCL.m_restituitionCoeff = colObj.getRestitution();
// friction coefficient
bodyCL.m_frictionCoeff = colObj.getFriction();
return bodyCL;
}
#endif //CL_PLATFORM_AMD
void CustomCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher)
{
BT_PROFILE("CustomCollisionDispatcher::dispatchAllCollisionPairs");
{
btBroadphasePairArray& overlappingPairArray = pairCache->getOverlappingPairArray();
bool bGPU = (m_internalData != 0);
#ifdef CL_PLATFORM_AMD
if ( !bGPU )
#endif //CL_PLATFORM_AMD
{
BT_PROFILE("btCollisionDispatcher::dispatchAllCollisionPairs");
btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher);
}
#ifdef CL_PLATFORM_AMD
else
{
{
BT_PROFILE("refreshContactPoints");
//----------------------------------------------------------------
// GPU version of convex heightmap narrowphase collision detection
//----------------------------------------------------------------
for ( int i = 0; i < getNumManifolds(); i++ )
{
btPersistentManifold* manifold = getManifoldByIndexInternal(i);
btCollisionObject* body0 = (btCollisionObject*)manifold->getBody0();
btCollisionObject* body1 = (btCollisionObject*)manifold->getBody1();
manifold->refreshContactPoints(body0->getWorldTransform(),body1->getWorldTransform());
}
}
// OpenCL
int nColPairsFromBP = overlappingPairArray.size();
btAssert(MAX_BROADPHASE_COLLISION_CL >= nColPairsFromBP);
int maxBodyIndex = -1;
{
BT_PROFILE("CreateRBodyCL and GPU pairs");
for ( int i=0; i<overlappingPairArray.size(); i++)
{
btAssert(i<MAX_BROADPHASE_COLLISION_CL);
btBroadphasePair* pair = &overlappingPairArray[i];
btCollisionObject* colObj0 = (btCollisionObject*)pair->m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject;
int bodyIndex0 = colObj0->getCompanionId();
int bodyIndex1 = colObj1->getCompanionId();
//keep a one-to-one mapping between Bullet and Adl broadphase pairs
(*m_internalData->m_pBufPairsCPU)[i].x = bodyIndex0;
(*m_internalData->m_pBufPairsCPU)[i].y = bodyIndex1;
if (bodyIndex0>=0 && bodyIndex1>=0)
{
//create companion shapes (if necessary)
btAssert(colObj0->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE);
btAssert(colObj1->getCollisionShape()->getShapeType() == CUSTOM_POLYHEDRAL_SHAPE_TYPE);
CustomConvexShape* convexShape0 = (CustomConvexShape*)colObj0->getCollisionShape();
CustomConvexShape* convexShape1 = (CustomConvexShape*)colObj1->getCollisionShape();
if (convexShape0->m_acceleratedCompanionShapeIndex<0)
{
convexShape0->m_acceleratedCompanionShapeIndex = m_internalData->m_numAcceleratedShapes;
adl::ChNarrowphase<adl::TYPE_CL>::setShape(m_internalData->m_ShapeBuffer, convexShape0->m_ConvexHeightField, convexShape0->m_acceleratedCompanionShapeIndex, 0.0f);
m_internalData->m_numAcceleratedShapes++;
}
if (convexShape1->m_acceleratedCompanionShapeIndex<0)
{
convexShape1->m_acceleratedCompanionShapeIndex = m_internalData->m_numAcceleratedShapes;
adl::ChNarrowphase<adl::TYPE_CL>::setShape(m_internalData->m_ShapeBuffer, convexShape1->m_ConvexHeightField, convexShape1->m_acceleratedCompanionShapeIndex, 0.0f);
m_internalData->m_numAcceleratedShapes++;
}
btAssert(m_internalData->m_numAcceleratedShapes<MAX_CONVEX_SHAPES_CL);
if (bodyIndex0>maxBodyIndex)
maxBodyIndex = bodyIndex0;
if (bodyIndex1>maxBodyIndex)
maxBodyIndex = bodyIndex1;
btAssert(maxBodyIndex<MAX_CONVEX_BODIES_CL);
if (maxBodyIndex>=MAX_CONVEX_BODIES_CL)
{
printf("error: maxBodyIndex(%d)>MAX_CONVEX_BODIES_CL(%d)\n",maxBodyIndex,MAX_CONVEX_BODIES_CL);
}
(*m_internalData->m_pBufRBodiesCPU)[bodyIndex0] = CreateRBodyCL(*colObj0, convexShape0->m_acceleratedCompanionShapeIndex);
m_internalData->m_bodyInfoBufferCPU->m_ptr[bodyIndex0] = CreateBodyInfo(*colObj0);
(*m_internalData->m_pBufRBodiesCPU)[bodyIndex1] = CreateRBodyCL(*colObj1, convexShape0->m_acceleratedCompanionShapeIndex);
m_internalData->m_bodyInfoBufferCPU->m_ptr[bodyIndex1] = CreateBodyInfo(*colObj1);
} else
{
//TODO: dispatch using default dispatcher
btAssert(0);
}
}
}
if (maxBodyIndex>=0)
{
int numOfConvexRBodies = maxBodyIndex+1;
adl::ChNarrowphaseBase::Config cfgNP;
cfgNP.m_collisionMargin = 0.01f;
int nContactOut = 0;
{
BT_PROFILE("ChNarrowphase::execute");
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, m_internalData->m_pBufPairsGPU, nColPairsFromBP, m_internalData->m_pBufRBodiesGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
}
bool useCpu = false;//true;
bool useSolver = true;//true;//false;
if (useSolver)
{
float dt=1./60.;
adl::SolverBase::ConstraintCfg csCfg( dt );
csCfg.m_enableParallelSolve = true;
csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = -1;//numOfConvexRBodies-1;//m_nBodies-1;
if (useCpu)
{
{
BT_PROFILE("read m_pBufContactOutGPU");
m_internalData->m_pBufContactOutGPU->read(m_internalData->m_pBufContactOutCPU->m_ptr, nContactOut);//MAX_BROADPHASE_COLLISION_CL);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
}
BT_PROFILE("CPU stuff");
adl::Solver<adl::TYPE_HOST>::Data* solverData = adl::Solver<adl::TYPE_HOST>::allocate( m_internalData->m_deviceHost, nContactOut);
SolverData contactCPU = adl::Solver<adl::TYPE_HOST>::allocateConstraint4(
m_internalData->m_deviceHost,
numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL );
void* frictionCPU = adl::Solver<adl::TYPE_HOST>::allocateFrictionConstraint(
m_internalData->m_deviceHost,
numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL );
//write body with current linear/angluar velocities to GPU
m_internalData->m_bodyInfoBufferGPU->write(m_internalData->m_bodyInfoBufferCPU->m_ptr,numOfConvexRBodies);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
if (nContactOut)
{
reorderConvertToConstraints2(
solverData,
m_internalData->m_pBufRBodiesCPU,
m_internalData->m_bodyInfoBufferCPU,
m_internalData->m_pBufContactOutCPU,
contactCPU,
frictionCPU,
nContactOut,
csCfg );
bool forceGPU = true;
if (forceGPU)
{
SolverData contactCPUcopy = adl::Solver<adl::TYPE_HOST>::allocateConstraint4(
m_internalData->m_deviceHost,
numOfConvexRBodies*MAX_PAIRS_PER_BODY_CL );
adl::Solver<adl::TYPE_CL>::reorderConvertToConstraints(
m_internalData->m_solverDataGPU,
m_internalData->m_pBufRBodiesGPU,
m_internalData->m_bodyInfoBufferGPU,
m_internalData->m_pBufContactOutGPU,
m_internalData->m_contactCGPU,
m_internalData->m_frictionCGPU,
nContactOut,
csCfg );
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
m_internalData->m_contactCGPU->read(contactCPUcopy->m_ptr,nContactOut);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
//m_internalData->m_contactCGPU->write(contactCPU->m_ptr,nContactOut);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
m_internalData->m_solverDataGPU->m_nIterations = 4;
adl::Solver<adl::TYPE_CL>::solveContactConstraint( m_internalData->m_solverDataGPU,
m_internalData->m_pBufRBodiesGPU,
m_internalData->m_bodyInfoBufferGPU,
m_internalData->m_contactCGPU,
0,
nContactOut );
adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl );
//read body updated linear/angular velocities back to CPU
m_internalData->m_pBufRBodiesGPU->read(
m_internalData->m_pBufRBodiesCPU->m_ptr,numOfConvexRBodies);
adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl );
} else
{
solverData->m_nIterations = 4;
adl::Solver<adl::TYPE_HOST>::solveContactConstraint( solverData,
m_internalData->m_pBufRBodiesCPU,
m_internalData->m_bodyInfoBufferCPU,
contactCPU,
0,
nContactOut );
}
}
adl::Solver<adl::TYPE_HOST>::deallocateConstraint4( contactCPU );
adl::Solver<adl::TYPE_HOST>::deallocateFrictionConstraint( frictionCPU );
adl::Solver<adl::TYPE_HOST>::deallocate( solverData );
}
else
{
{
BT_PROFILE("rigid body data to GPU buffer");
// Transfer rigid body data from CPU buffer to GPU buffer
m_internalData->m_pBufRBodiesGPU->write(m_internalData->m_pBufRBodiesCPU->m_ptr, numOfConvexRBodies);
m_internalData->m_pBufPairsGPU->write(m_internalData->m_pBufPairsCPU->m_ptr, MAX_BROADPHASE_COLLISION_CL);
//write body with current linear/angluar velocities to GPU
m_internalData->m_bodyInfoBufferGPU->write(m_internalData->m_bodyInfoBufferCPU->m_ptr,numOfConvexRBodies);
adl::DeviceUtils::waitForCompletion(m_internalData->m_ddcl);
}
{
BT_PROFILE("GPU reorderConvertToConstraints");
adl::Solver<adl::TYPE_CL>::reorderConvertToConstraints(
m_internalData->m_solverDataGPU,
m_internalData->m_pBufRBodiesGPU,
m_internalData->m_bodyInfoBufferGPU,
m_internalData->m_pBufContactOutGPU,
m_internalData->m_contactCGPU,
m_internalData->m_frictionCGPU,
nContactOut,
csCfg );
}
{
BT_PROFILE("GPU solveContactConstraint");
m_internalData->m_solverDataGPU->m_nIterations = 4;
adl::Solver<adl::TYPE_CL>::solveContactConstraint( m_internalData->m_solverDataGPU,
m_internalData->m_pBufRBodiesGPU,
m_internalData->m_bodyInfoBufferGPU,
m_internalData->m_contactCGPU,
0,
nContactOut );
adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl );
}
{
BT_PROFILE("read body velocities back to CPU");
//read body updated linear/angular velocities back to CPU
m_internalData->m_pBufRBodiesGPU->read(
m_internalData->m_pBufRBodiesCPU->m_ptr,numOfConvexRBodies);
adl::DeviceUtils::waitForCompletion( m_internalData->m_ddcl );
}
}
#if 0
if( !m_useGPUPipeline )
{ // CPU
BT_PROFILE("CPU solve");
{
BT_PROFILE("CPU reorderConvertToConstraints");
SOLVER_CLASS<TYPE_HOST>::reorderConvertToConstraints( solver, m_bodyBuffer, m_bodyInfoBufferCPU, (Buffer<Contact4>*)m_contactBuffer,
contactC, frictionC, m_numContacts, csCfg );
}
{
BT_PROFILE("CPU solveContactConstraint");
solver->m_nIterations = 4;
SOLVER_CLASS<TYPE_HOST>::solveContactConstraint( solver, m_bodyBuffer, m_bodyInfoBufferCPU, contactC, 0, m_numContacts );
}
}
else
{
BT_PROFILE("GPU solve");
{ // GPU using host buffers
{
BT_PROFILE("GPU reorderConvertToConstraints");
Solver<TYPE_CL>::reorderConvertToConstraints( m_solver, m_bodyBuffer, m_bodyInfoBufferCPU, (Buffer<Contact4>*)m_contactBuffer,
contactC, frictionC, m_numContacts, csCfg );
}
timerEnd();
timerStart(0);
//for(int iter=0; iter<4; iter++)
{
BT_PROFILE("GPU solveContactConstraint");
Solver<TYPE_CL>::solveContactConstraint( m_solver, m_bodyBuffer, m_bodyInfoBufferCPU, contactC, frictionC, m_numContacts );
}
DeviceUtils::waitForCompletion( m_device );
}
}
timerEnd();
#endif
}
//if we ran the solver, it will overwrite the batchIdx so we cannot write back the results
//try to make it work by writing velocity back to rigid body
if (useSolver)
{
BT_PROFILE("writing velocity back to btRigidBody");
for ( int i=0; i<overlappingPairArray.size(); i++)
{
btAssert(i<MAX_BROADPHASE_COLLISION_CL);
btBroadphasePair* pair = &overlappingPairArray[i];
btCollisionObject* colObj0 = (btCollisionObject*)pair->m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject;
int bodyIndex0 = colObj0->getCompanionId();
int bodyIndex1 = colObj1->getCompanionId();
RigidBodyBase::Body* bA = &m_internalData->m_pBufRBodiesCPU->m_ptr[bodyIndex0];
RigidBodyBase::Body* bB = &m_internalData->m_pBufRBodiesCPU->m_ptr[bodyIndex1];
btRigidBody* bodyA = btRigidBody::upcast(colObj0);
if (bodyA && !bodyA->isStaticOrKinematicObject())
{
bodyA->setLinearVelocity(btVector3(
bA->m_linVel.x,
bA->m_linVel.y,
bA->m_linVel.z));
bodyA->setAngularVelocity(btVector3(
bA->m_angVel.x,
bA->m_angVel.y,
bA->m_angVel.z));
}
btRigidBody* bodyB = btRigidBody::upcast(colObj1);
if (bodyB && !bodyB->isStaticOrKinematicObject())
{
bodyB->setLinearVelocity(btVector3(
bB->m_linVel.x,
bB->m_linVel.y,
bB->m_linVel.z));
bodyB->setAngularVelocity(btVector3(
bB->m_angVel.x,
bB->m_angVel.y,
bB->m_angVel.z));
}
}
} else
{
BT_PROFILE("copy Contact4 to btPersistentManifold");
// Now we got the narrowphase info from GPU and need to update rigid bodies with the info and go back to the original pipeline in Bullet physics.
for ( int i = 0; i < nContactOut; i++ )
{
Contact4 contact = (*m_internalData->m_pBufContactOutCPU)[i];
int idxBodyA = contact.m_bodyAPtr;
int idxBodyB = contact.m_bodyBPtr;
btAssert(contact.m_batchIdx>=0);
btAssert(contact.m_batchIdx<overlappingPairArray.size());
btBroadphasePair* pair = &overlappingPairArray[contact.m_batchIdx];
btCollisionObject* colObj0 = (btCollisionObject*)pair->m_pProxy0->m_clientObject;
btCollisionObject* colObj1 = (btCollisionObject*)pair->m_pProxy1->m_clientObject;
if (!pair->m_algorithm)
{
pair->m_algorithm = findAlgorithm(colObj0,colObj1,0);
}
btManifoldResult contactPointResult(colObj0, colObj1);
CustomConvexConvexPairCollision* pairAlgo = (CustomConvexConvexPairCollision*) pair->m_algorithm;
if (!pairAlgo->getManifoldPtr())
{
pairAlgo->createManifoldPtr(colObj0,colObj1,dispatchInfo);
}
contactPointResult.setPersistentManifold(pairAlgo->getManifoldPtr());
contactPointResult.getPersistentManifold()->refreshContactPoints(colObj0->getWorldTransform(),colObj1->getWorldTransform());
const btTransform& transA = colObj0->getWorldTransform();
const btTransform& transB = colObj1->getWorldTransform();
int numPoints = contact.getNPoints();
for ( int k=0; k < numPoints; k++ )
{
btVector3 normalOnBInWorld(
contact.m_worldNormal.x,
contact.m_worldNormal.y,
contact.m_worldNormal.z);
btVector3 pointInWorldOnB(
contact.m_worldPos[k].x,
contact.m_worldPos[k].y,
contact.m_worldPos[k].z);
btScalar depth = contact.m_worldPos[k].w;
if (depth<0)
{
const btVector3 deltaC = transB.getOrigin() - transA.getOrigin();
normalOnBInWorld.normalize();
if((deltaC.dot(normalOnBInWorld))>0.0f)
{
normalOnBInWorld= -normalOnBInWorld;
contactPointResult.addContactPoint(normalOnBInWorld, pointInWorldOnB, depth);
}
else
{
contactPointResult.addContactPoint(normalOnBInWorld, pointInWorldOnB-normalOnBInWorld*depth, depth);
}
}
}
}
}
}
}
#endif //CL_PLATFORM_AMD
}
}

View File

@@ -0,0 +1,70 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef CUSTOM_COLLISION_DISPATCHER_H
#define CUSTOM_COLLISION_DISPATCHER_H
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#define MAX_CONVEX_BODIES_CL 64*1024
#define MAX_PAIRS_PER_BODY_CL 32
#define MAX_CONVEX_SHAPES_CL 8192
#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL)
struct CustomDispatchData;
#ifdef CL_PLATFORM_AMD
#ifdef __APPLE__
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <OpenCL/cl.h>
#endif
#else //__APPLE__
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <CL/cl.h>
#endif
#endif //__APPLE__
#endif
class CustomCollisionDispatcher : public btCollisionDispatcher
{
public:
CustomCollisionDispatcher (btCollisionConfiguration* collisionConfiguration
#ifdef CL_PLATFORM_AMD
, cl_context context = NULL,cl_device_id device = NULL,cl_command_queue queue = NULL
#endif //CL_PLATFORM_AMD
);
virtual ~CustomCollisionDispatcher(void);
protected:
CustomDispatchData* m_internalData;
btBroadphasePair* GetPair(btBroadphasePairArray& pairArray, int idxBodyA, int idxBodyB);
public:
virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher);
};
#endif //CUSTOM_COLLISION_DISPATCHER_H

View File

@@ -0,0 +1,409 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "CustomConvexPairCollision.h"
#include "ConvexHeightFieldShape.h"
#include "CustomConvexShape.h"
#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
#include "Stubs/AdlContact4.h"
#include "Stubs/AdlTransform.h"
CustomConvexConvexPairCollision::CustomConvexConvexPairCollision(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold)
:btConvexConvexAlgorithm(mf,ci,body0,body1,simplexSolver,pdSolver,numPerturbationIterations, minimumPointsPerturbationThreshold)
{
}
CustomConvexConvexPairCollision::~CustomConvexConvexPairCollision()
{
}
#include <Windows.h>
template<typename T>
T atomAdd(const T* ptr, int value)
{
return (T)InterlockedExchangeAdd((LONG*)ptr, value);
}
#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];
#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}
#define REDUCE_MAX(v, n) {int i=0;\
for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }
#define REDUCE_MIN(v, n) {int i=0;\
for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }
int extractManifold(const float4* p, int nPoints, float4& nearNormal, float4& centerOut,
int contactIdx[4])
{
if( nPoints == 0 ) return 0;
nPoints = min2( nPoints, 64 );
float4 center = make_float4(0.f);
{
float4 v[64];
memcpy( v, p, nPoints*sizeof(float4) );
PARALLEL_SUM( v, nPoints );
center = v[0]/(float)nPoints;
}
centerOut = center;
{ // sample 4 directions
if( nPoints < 4 )
{
for(int i=0; i<nPoints; i++) contactIdx[i] = i;
return nPoints;
}
float4 aVector = p[0] - center;
float4 u = cross3( nearNormal, aVector );
float4 v = cross3( nearNormal, u );
u = normalize3( u );
v = normalize3( v );
int idx[4];
float2 max00 = make_float2(0,FLT_MAX);
{
float4 dir0 = u;
float4 dir1 = -u;
float4 dir2 = v;
float4 dir3 = -v;
// idx, distance
{
{
int4 a[64];
for(int ie = 0; ie<nPoints; ie++ )
{
float4 f;
float4 r = p[ie]-center;
f.x = dot3F4( dir0, r );
f.y = dot3F4( dir1, r );
f.z = dot3F4( dir2, r );
f.w = dot3F4( dir3, r );
a[ie].x = ((*(u32*)&f.x) & 0xffffff00);
a[ie].x |= (0xff & ie);
a[ie].y = ((*(u32*)&f.y) & 0xffffff00);
a[ie].y |= (0xff & ie);
a[ie].z = ((*(u32*)&f.z) & 0xffffff00);
a[ie].z |= (0xff & ie);
a[ie].w = ((*(u32*)&f.w) & 0xffffff00);
a[ie].w |= (0xff & ie);
}
for(int ie=0; ie<nPoints; ie++)
{
a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;
a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;
a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;
a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;
}
idx[0] = (int)a[0].x & 0xff;
idx[1] = (int)a[0].y & 0xff;
idx[2] = (int)a[0].z & 0xff;
idx[3] = (int)a[0].w & 0xff;
}
}
{
float2 h[64];
PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );
REDUCE_MIN( h, nPoints );
max00 = h[0];
}
}
contactIdx[0] = idx[0];
contactIdx[1] = idx[1];
contactIdx[2] = idx[2];
contactIdx[3] = idx[3];
// if( max00.y < 0.0f )
// contactIdx[0] = (int)max00.x;
std::sort( contactIdx, contactIdx+4 );
return 4;
}
}
#undef PARALLEL_SUM
#undef PARALLEL_DO
#undef REDUCE_MAX
#undef REDUCE_MIX
int collideStraight(const ConvexHeightField* shapeA,const ConvexHeightField* shapeB,
const float4& bodyApos, Quaternion& bodyAquat,const float4& bodyBpos,const Quaternion& bodyBquat,
ContactPoint4* contactsOut, int& numContacts, int contactCapacity,
float collisionMargin )
{
// Stopwatch sw;
Transform trA;
trA = trSetTransform(bodyApos,bodyAquat);
Transform trB;
trB = trSetTransform(bodyBpos, bodyBquat);
Transform B2A;
{
Transform invTrA = trInvert( trA );
B2A = trMul( invTrA, trB );
}
int nContacts = 0;
{ // testB against A
float4 p[ConvexHeightField::HEIGHT_RES*ConvexHeightField::HEIGHT_RES*6];
int nHits = 0;
const float4* pInB = shapeB->getSamplePoints();
float4 baInB = qtInvRotate( bodyBquat, bodyApos - bodyBpos );
if( shapeA->m_type == CollisionShape::SHAPE_HEIGHT_FIELD )
baInB = make_float4(0,0,0,0);
// sw.start();
for(int iface=0; iface<6; iface++)
{
Aabb aabb = shapeB->m_faceAabbs[iface];
aabb.transform( B2A.m_translation, B2A.m_rotation );
if( !shapeA->m_aabb.overlaps( aabb ) ) continue;
for(int ip=0; ip<ConvexHeightField::HEIGHT_RES*ConvexHeightField::HEIGHT_RES; ip++)
{
int i = iface*ConvexHeightField::HEIGHT_RES*ConvexHeightField::HEIGHT_RES+ip;
if( dot3F4( baInB, pInB[i] ) < 0.f ) continue;
float4 pInA = trMul1( B2A, pInB[i] );
if( shapeA->m_aabb.overlaps( pInA ) )
{
// Stopwatch sw1;
// sw1.start();
float dist = shapeA->queryDistance( pInA );
// sw1.stop();
// m_times[TIME_SAMPLE] += sw1.getMs();
if( dist < collisionMargin )
{
p[nHits] = make_float4(pInA.x, pInA.y, pInA.z, dist);
nHits++;
}
}
}
}
// sw.stop();
// m_times[TIME_TEST] += sw.getMs();
// sw.start();
if( nHits )
{
float4 ab = bodyBpos - bodyApos;
ab = qtInvRotate( bodyAquat, ab );
if( shapeA->m_type == CollisionShape::SHAPE_HEIGHT_FIELD )
{
//todo. sample normal from height field but just fake here
ab = make_float4(0,1,0,0);
}
int cIdx[4];
float4 center;
nContacts = extractManifold( p, nHits, ab, center, cIdx );
float4 contactNormal;
{
shapeA->queryDistanceWithNormal( center, contactNormal );
contactNormal = normalize3( contactNormal );
// u32 cmp = u8vCompress( contactNormal );
// contactNormal = make_float4( u8vGetX(cmp), u8vGetY(cmp), u8vGetZ(cmp), 0 );
}
int writeIdx = atomAdd( &numContacts, 1 );
if( writeIdx+1 < contactCapacity )
{
ContactPoint4& c = contactsOut[writeIdx];
nContacts = min2( nContacts, 4 );
for(int i=0; i<nContacts; i++)
{
c.m_worldPos[i] = transform( p[cIdx[i]], bodyApos, bodyAquat );
c.m_worldPos[i].w = max2( p[cIdx[i]].w - collisionMargin, -2*collisionMargin );
}
c.m_worldNormal = normalize3( qtRotate( bodyAquat, contactNormal ) );
c.m_restituitionCoeff = 0.f;
c.m_frictionCoeff = 0.7f;
//c.m_bodyAPtr = (void*)bodyAIdx;
//c.m_bodyBPtr = (void*)bodyBIdx;
c.getNPoints() = nContacts;
}
}
// sw.stop();
// m_times[TIME_MANIFOLD] += sw.getMs();
}
return nContacts;
}
void CustomConvexConvexPairCollision::createManifoldPtr(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo)
{
m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
m_ownManifold = true;
}
void CustomConvexConvexPairCollision::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
{
#if 1
if (!m_manifoldPtr)
{
//swapped?
m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
m_ownManifold = true;
}
resultOut->setPersistentManifold(m_manifoldPtr);
CustomConvexShape* convex0 = (CustomConvexShape*)body0->getCollisionShape();
CustomConvexShape* convex1 = (CustomConvexShape*)body1->getCollisionShape();
float4 bodyApos;
float4 bodyBpos;
Quaternion bodyAquat;
Quaternion bodyBquat;
const btTransform& transA = body0->getWorldTransform();
const btTransform& transB = body1->getWorldTransform();
const btVector3& pA = body0->getWorldTransform().getOrigin();
const btVector3& pB = body1->getWorldTransform().getOrigin();
btQuaternion qA = body0->getWorldTransform().getRotation();
btQuaternion qB = body1->getWorldTransform().getRotation();
bodyApos.x = pA.getX();
bodyApos.y = pA.getY();
bodyApos.z = pA.getZ();
bodyApos.w = 0.f;
bodyBpos.x = pB.getX();
bodyBpos.y = pB.getY();
bodyBpos.z = pB.getZ();
bodyBpos.w = 0.f;
bodyAquat.x = qA.getX();
bodyAquat.y = qA.getY();
bodyAquat.z = qA.getZ();
bodyAquat.w = qA.getW();
bodyBquat.x = qB.getX();
bodyBquat.y = qB.getY();
bodyBquat.z = qB.getZ();
bodyBquat.w = qB.getW();
#define CAPACITY_CONTACTS 4
ContactPoint4 contactsOut[CAPACITY_CONTACTS];
int freeContactIndex = 0;
int contactCapacity = CAPACITY_CONTACTS;
float collisionMargin = 0.001f;
m_manifoldPtr->refreshContactPoints(body0->getWorldTransform(),body1->getWorldTransform());
collideStraight(convex0->m_ConvexHeightField,convex1->m_ConvexHeightField,
bodyApos, bodyAquat,bodyBpos,bodyBquat,
contactsOut, freeContactIndex, contactCapacity,
collisionMargin );
collideStraight(convex1->m_ConvexHeightField,convex0->m_ConvexHeightField,
bodyBpos, bodyBquat,bodyApos,bodyAquat,
contactsOut, freeContactIndex, contactCapacity,
collisionMargin );
//copy points into manifold
//refresh manifold
btAssert(freeContactIndex<3);
for (int j=0;j<freeContactIndex;j++)
{
int numPoints = contactsOut[j].getNPoints();
// printf("numPoints = %d\n",numPoints);
for (int i=0;i<numPoints;i++)
{
ContactPoint4& c = contactsOut[j];
btVector3 normalOnBInWorld(
c.m_worldNormal.x,
c.m_worldNormal.y,
c.m_worldNormal.z);
btVector3 pointInWorldOnB(
c.m_worldPos[i].x,
c.m_worldPos[i].y,
c.m_worldPos[i].z);
btScalar depth = c.m_worldPos[i].w;
if (depth<0)
{
const btVector3 deltaC = transB.getOrigin() - transA.getOrigin();
if((deltaC.dot(normalOnBInWorld))>0.0f)
{
normalOnBInWorld= -normalOnBInWorld;
}
normalOnBInWorld.normalize();
if (j)
{
resultOut->addContactPoint(normalOnBInWorld, pointInWorldOnB, depth);
} else
{
resultOut->addContactPoint(normalOnBInWorld, pointInWorldOnB-normalOnBInWorld*depth, depth);
}
}
}
}
#else
btConvexConvexAlgorithm::processCollision(body0,body1,dispatchInfo,resultOut);
#endif
}
CustomConvexConvexPairCollision::CreateFunc::CreateFunc(btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver)
:btConvexConvexAlgorithm::CreateFunc(simplexSolver,pdSolver)
{
}
CustomConvexConvexPairCollision::CreateFunc::~CreateFunc()
{
}

View File

@@ -0,0 +1,56 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#ifndef CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H
#define CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H
#include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h"
class CustomConvexConvexPairCollision : public btConvexConvexAlgorithm
{
public:
CustomConvexConvexPairCollision(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
virtual ~CustomConvexConvexPairCollision();
virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
btPersistentManifold* getManifoldPtr()
{
return m_manifoldPtr;
}
void createManifoldPtr(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo);
struct CreateFunc :public btConvexConvexAlgorithm::CreateFunc
{
CreateFunc(btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver);
virtual ~CreateFunc();
virtual btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
{
void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(CustomConvexConvexPairCollision));
return new(mem) CustomConvexConvexPairCollision(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
}
};
};
#endif //CUSTOM_CONVEX_CONVEX_PAIR_COLLISION_H

View File

@@ -0,0 +1,45 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "CustomConvexShape.h"
#include "ConvexHeightFieldShape.h"
#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h"
CustomConvexShape::CustomConvexShape(const btScalar* points,int numPoints, int stride)
:btConvexHullShape(points,numPoints,stride),
m_acceleratedCompanionShapeIndex(-1)
{
m_shapeType = CUSTOM_POLYHEDRAL_SHAPE_TYPE;
initializePolyhedralFeatures();
int numFaces= m_polyhedron->m_faces.size();
float4* eqn = new float4[numFaces];
for (int i=0;i<numFaces;i++)
{
eqn[i].x = m_polyhedron->m_faces[i].m_plane[0];
eqn[i].y = m_polyhedron->m_faces[i].m_plane[1];
eqn[i].z = m_polyhedron->m_faces[i].m_plane[2];
eqn[i].w = m_polyhedron->m_faces[i].m_plane[3];
}
m_ConvexHeightField = new ConvexHeightField(eqn,numFaces);
}
CustomConvexShape::~CustomConvexShape()
{
delete m_ConvexHeightField;
}

View File

@@ -0,0 +1,35 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#ifndef CUSTOM_CONVEX_SHAPE_H
#define CUSTOM_CONVEX_SHAPE_H
#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
class CustomConvexShape : public btConvexHullShape
{
public:
class ConvexHeightField* m_ConvexHeightField;
int m_acceleratedCompanionShapeIndex;
CustomConvexShape(const btScalar* points,int numPoints,int stride);
virtual ~CustomConvexShape();
};
#endif //CUSTOM_CONVEX_SHAPE_H

View File

@@ -0,0 +1,230 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef AABB_H
#define AABB_H
#include "Stubs/AdlMath.h"
#include "Stubs/AdlQuaternion.h"
enum AdlCollisionShapeTypes
{
ADL_SHAPE_SPHERE=2,
ADL_SHAPE_HEIGHT_FIELD,
SHAPE_CONVEX_HEIGHT_FIELD,
};
_MEM_CLASSALIGN16
struct Aabb
{
public:
_MEM_ALIGNED_ALLOCATOR16;
__inline
void setEmpty();
__inline
void includeVolume( const Aabb& aabb );
__inline
void includePoint( const float4& p );
__inline
bool overlaps( const float4& p ) const;
__inline
bool overlaps( const Aabb& aabb ) const;
__inline
float4 center() const;
__inline
int getMajorAxis() const;
__inline
float4 getExtent() const;
__inline
void expandBy( const float4& r );
__inline
static bool overlaps( const Aabb& a, const Aabb& b );
__inline
bool intersect(const float4* from, const float4* to, const float4* invRay) const;
__inline
void transform(const float4& translation, const Quaternion& quat);
__inline
void transform(const float4& translation, const Matrix3x3& rot);
public:
float4 m_max;
float4 m_min;
};
void Aabb::setEmpty()
{
m_max = make_float4( -FLT_MAX );
m_min = make_float4( FLT_MAX );
}
void Aabb::includeVolume(const Aabb& aabb)
{
m_max.x = max2( m_max.x, aabb.m_max.x );
m_min.x = min2( m_min.x, aabb.m_min.x );
m_max.y = max2( m_max.y, aabb.m_max.y );
m_min.y = min2( m_min.y, aabb.m_min.y );
m_max.z = max2( m_max.z, aabb.m_max.z );
m_min.z = min2( m_min.z, aabb.m_min.z );
}
void Aabb::includePoint( const float4& p )
{
m_max.x = max2( m_max.x, p.x );
m_min.x = min2( m_min.x, p.x );
m_max.y = max2( m_max.y, p.y );
m_min.y = min2( m_min.y, p.y );
m_max.z = max2( m_max.z, p.z );
m_min.z = min2( m_min.z, p.z );
}
bool Aabb::overlaps( const float4& p ) const
{
float4 dx = m_max-p;
float4 dm = p-m_min;
return (dx.x >= 0 && dx.y >= 0 && dx.z >= 0)
&& (dm.x >= 0 && dm.y >= 0 && dm.z >= 0);
}
bool Aabb::overlaps( const Aabb& in ) const
{
/*
if( m_max.x < in.m_min.x || m_min.x > in.m_max.x ) return false;
if( m_max.y < in.m_min.y || m_min.y > in.m_max.y ) return false;
if( m_max.z < in.m_min.z || m_min.z > in.m_max.z ) return false;
return true;
*/
return overlaps( *this, in );
}
bool Aabb::overlaps( const Aabb& a, const Aabb& b )
{
if( a.m_max.x < b.m_min.x || a.m_min.x > b.m_max.x ) return false;
if( a.m_max.y < b.m_min.y || a.m_min.y > b.m_max.y ) return false;
if( a.m_max.z < b.m_min.z || a.m_min.z > b.m_max.z ) return false;
return true;
}
float4 Aabb::center() const
{
return 0.5f*(m_max+m_min);
}
int Aabb::getMajorAxis() const
{
float4 extent = getExtent();
int majorAxis = 0;
if( extent.s[1] > extent.s[0] )
majorAxis = 1;
if( extent.s[2] > extent.s[majorAxis] )
majorAxis = 2;
return majorAxis;
}
float4 Aabb::getExtent() const
{
return m_max-m_min;
}
void Aabb::expandBy( const float4& r )
{
m_max += r;
m_min -= r;
}
bool Aabb::intersect(const float4* from, const float4* to, const float4* invRay) const
{
float4 dFar;
dFar = (m_max - *from);
dFar *= *invRay;
float4 dNear;
dNear = (m_min - *from);
dNear *= *invRay;
float4 tFar;
tFar = max2(dFar, dNear);
float4 tNear;
tNear = min2(dFar, dNear);
float farf[] = { tFar.x, tFar.y, tFar.z };
float nearf[] = { tNear.x, tNear.y, tNear.z };
float minFar = min2(farf[0], min2(farf[1], farf[2]));
float maxNear = max2(nearf[0], max2(nearf[1], nearf[2]));
minFar = min2(1.0f, minFar );
maxNear = max2(0.0f, maxNear);
return (minFar >= maxNear);
}
void Aabb::transform(const float4& translation, const Matrix3x3& m)
{
float4 c = center();
Aabb& ans = *this;
float4 e[] = { m.m_row[0]*m_min, m.m_row[1]*m_min, m.m_row[2]*m_min };
float4 f[] = { m.m_row[0]*m_max, m.m_row[1]*m_max, m.m_row[2]*m_max };
ans.m_max = ans.m_min = translation;
{ int j=0;
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
ans.m_min.x += mi.x+mi.y+mi.z;
ans.m_max.x += ma.x+ma.y+ma.z;
}
{ int j=1;
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
ans.m_min.y += mi.x+mi.y+mi.z;
ans.m_max.y += ma.x+ma.y+ma.z;
}
{ int j=2;
float4 mi = make_float4( min2( e[j].x, f[j].x ), min2( e[j].y, f[j].y ), min2( e[j].z, f[j].z ) );
float4 ma = make_float4( max2( e[j].x, f[j].x ), max2( e[j].y, f[j].y ), max2( e[j].z, f[j].z ) );
ans.m_min.z += mi.x+mi.y+mi.z;
ans.m_max.z += ma.x+ma.y+ma.z;
}
}
void Aabb::transform(const float4& translation, const Quaternion& quat)
{
Matrix3x3 m = qtGetRotationMatrix( quat );
transform( translation, m );
}
#endif

View File

@@ -0,0 +1,212 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ARRAY_H
#define ARRAY_H
#include <string.h>
#include <malloc.h>
#include <Common/Base/Error.h>
#include <new.h>
template <class T>
class Array
{
public:
__inline
Array();
__inline
Array(int size);
__inline
~Array();
__inline
T& operator[] (int idx);
__inline
const T& operator[] (int idx) const;
__inline
void pushBack(const T& elem);
__inline
void popBack();
__inline
void clear();
__inline
void setSize(int size);
__inline
int getSize() const;
__inline
T* begin();
__inline
const T* begin() const;
__inline
int indexOf(const T& data) const;
__inline
void removeAt(int idx);
__inline
T& expandOne();
private:
Array(const Array& a){}
private:
enum
{
DEFAULT_SIZE = 128,
INCREASE_SIZE = 128,
};
T* m_data;
int m_size;
int m_capacity;
};
template<class T>
Array<T>::Array()
{
m_size = 0;
m_capacity = DEFAULT_SIZE;
// m_data = new T[ m_capacity ];
m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
for(int i=0; i<m_capacity; i++) new(&m_data[i])T;
}
template<class T>
Array<T>::Array(int size)
{
m_size = size;
m_capacity = size;
// m_data = new T[ m_capacity ];
m_data = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
for(int i=0; i<m_capacity; i++) new(&m_data[i])T;
}
template<class T>
Array<T>::~Array()
{
if( m_data )
{
// delete [] m_data;
_aligned_free( m_data );
m_data = NULL;
}
}
template<class T>
T& Array<T>::operator[](int idx)
{
CLASSERT(idx<m_size);
return m_data[idx];
}
template<class T>
const T& Array<T>::operator[](int idx) const
{
CLASSERT(idx<m_size);
return m_data[idx];
}
template<class T>
void Array<T>::pushBack(const T& elem)
{
if( m_size == m_capacity )
{
int oldCap = m_capacity;
m_capacity += INCREASE_SIZE;
// T* s = new T[m_capacity];
T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
memcpy( s, m_data, sizeof(T)*oldCap );
// delete [] m_data;
_aligned_free( m_data );
m_data = s;
}
m_data[ m_size++ ] = elem;
}
template<class T>
void Array<T>::popBack()
{
CLASSERT( m_size>0 );
m_size--;
}
template<class T>
void Array<T>::clear()
{
m_size = 0;
}
template<class T>
void Array<T>::setSize(int size)
{
if( size > m_capacity )
{
int oldCap = m_capacity;
m_capacity = size;
// T* s = new T[m_capacity];
T* s = (T*)_aligned_malloc(sizeof(T)*m_capacity, 16);
for(int i=0; i<m_capacity; i++) new(&s[i])T;
memcpy( s, m_data, sizeof(T)*oldCap );
// delete [] m_data;
_aligned_free( m_data );
m_data = s;
}
m_size = size;
}
template<class T>
int Array<T>::getSize() const
{
return m_size;
}
template<class T>
const T* Array<T>::begin() const
{
return m_data;
}
template<class T>
T* Array<T>::begin()
{
return m_data;
}
template<class T>
int Array<T>::indexOf(const T& data) const
{
for(int i=0; i<m_size; i++)
{
if( data == m_data[i] ) return i;
}
return -1;
}
template<class T>
void Array<T>::removeAt(int idx)
{
CLASSERT(idx<m_size);
m_data[idx] = m_data[--m_size];
}
template<class T>
T& Array<T>::expandOne()
{
setSize( m_size+1 );
return m_data[ m_size-1 ];
}
#endif

View File

@@ -0,0 +1,111 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef COLLIDE_UTILS_H
#define COLLIDE_UTILS_H
#include "Stubs/AdlMath.h"
class CollideUtils
{
public:
template<bool FLIPSIGN>
static bool collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin = 0.f);
__inline
static float castRay(const float4& v0, const float4& v1, const float4& v2,
const float4& rayFrom, const float4& rayTo, float margin = 0.0f, float4* bCrdOut = NULL);
};
template<bool FLIPSIGN>
bool CollideUtils::collide(const float4& a, const float4& b, const float4& c, const float4& p, float4& normalOut, float margin)
{
float4 ab, bc, ca;
ab = b-a;
bc = c-b;
ca = a-c;
float4 ap, bp, cp;
ap = p-a;
bp = p-b;
cp = p-c;
float4 n;
n = cross3(ab, -1.f*ca);
float4 abp = cross3( ab, ap );
float4 bcp = cross3( bc, bp );
float4 cap = cross3( ca, cp );
float s0 = dot3F4(n,abp);
float s1 = dot3F4(n,bcp);
float s2 = dot3F4(n,cap);
// if(( s0<0.f && s1<0.f && s2<0.f ) || ( s0>0.f && s1>0.f && s2>0.f ))
if(( s0<margin && s1<margin && s2<margin ) || ( s0>-margin && s1>-margin && s2>-margin ))
{
n = normalize3( n );
n.w = dot3F4(n,ap);
normalOut = (FLIPSIGN)? -n : n;
return true;
}
return false;
}
__inline
float CollideUtils::castRay(const float4& v0, const float4& v1, const float4& v2,
const float4& rayFrom, const float4& rayTo, float margin, float4* bCrdOut)
{
float t, v, w;
float4 ab; ab = v1 - v0;
float4 ac; ac = v2 - v0;
float4 qp; qp = rayFrom - rayTo;
float4 normal = cross3( ab, ac );
float d = dot3F4( qp, normal );
float odd = 1.f/d;
float4 ap; ap = rayFrom - v0;
t = dot3F4( ap, normal );
t *= odd;
// if( t < 0.f || t > 1.f ) return -1;
float4 e = cross3( qp, ap );
v = dot3F4( ac, e );
v *= odd;
if( v < -margin || v > 1.f+margin ) return -1;
w = -dot3F4( ab, e );
w *= odd;
// if( w < 0.f || w > 1.f ) return -1;
if( w < -margin || w > 1.f+margin ) return -1;
float u = 1.f-v-w;
if( u < -margin || u > 1.f+margin ) return -1;
if( bCrdOut )
{
bCrdOut->x = u;
bCrdOut->y = v;
bCrdOut->z = w;
}
return t;
}
#endif

View File

@@ -0,0 +1,49 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef COLLISION_SHAPE_H
#define COLLISION_SHAPE_H
#include "Stubs/AdlMath.h"
#include "Stubs/AdlAabb.h"
_MEM_CLASSALIGN16
class CollisionShape
{
public:
_MEM_ALIGNED_ALLOCATOR16;
enum Type
{
SHAPE_HEIGHT_FIELD,
SHAPE_CONVEX_HEIGHT_FIELD,
SHAPE_PLANE,
MAX_NUM_SHAPE_TYPES,
};
CollisionShape( Type type, float collisionMargin = 0.0025f ) : m_type( type ){ m_collisionMargin = collisionMargin; }
virtual ~CollisionShape(){}
virtual float queryDistance(const float4& p) const = 0;
virtual bool queryDistanceWithNormal(const float4& p, float4& normalOut) const = 0;
public:
Type m_type;
Aabb m_aabb;
float m_collisionMargin;
};
#endif

View File

@@ -0,0 +1,49 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_CONSTRAINT4_H
#define ADL_CONSTRAINT4_H
struct Constraint4
{
_MEM_ALIGNED_ALLOCATOR16;
float4 m_linear;
float4 m_worldPos[4];
float4 m_center; // friction
float m_jacCoeffInv[4];
float m_b[4];
float m_appliedRambdaDt[4];
float m_fJacCoeffInv[2]; // friction
float m_fAppliedRambdaDt[2]; // friction
u32 m_bodyA;
u32 m_bodyB;
u32 m_batchIdx;
u32 m_paddings[1];
__inline
void setFrictionCoeff(float value) { m_linear.w = value; }
__inline
float getFrictionCoeff() const { return m_linear.w; }
};
#endif //ADL_CONSTRAINT4_H

View File

@@ -0,0 +1,102 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_CONTACT4_H
#define ADL_CONTACT4_H
#ifdef CL_PLATFORM_AMD
#include "AdlConstraint4.h"
#include "Adl/Adl.h"
typedef adl::Buffer<Constraint4>* SolverData;
#else
typedef void* SolverData;
#endif
typedef void* ShapeDataType;
struct Contact4
{
_MEM_ALIGNED_ALLOCATOR16;
float4 m_worldPos[4];
float4 m_worldNormal;
// float m_restituitionCoeff;
// float m_frictionCoeff;
u16 m_restituitionCoeffCmp;
u16 m_frictionCoeffCmp;
int m_batchIdx;
u32 m_bodyAPtr;
u32 m_bodyBPtr;
// todo. make it safer
int& getBatchIdx() { return m_batchIdx; }
float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); }
void setRestituitionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (u16)(c*0xffff); }
float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); }
void setFrictionCoeff( float c ) { ADLASSERT( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (u16)(c*0xffff); }
float& getNPoints() { return m_worldNormal.w; }
float getNPoints() const { return m_worldNormal.w; }
float getPenetration(int idx) const { return m_worldPos[idx].w; }
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
};
struct ContactPoint4
{
float4 m_worldPos[4];
union
{
float4 m_worldNormal;
struct Data
{
int m_padding[3];
float m_nPoints; // for cl
}m_data;
};
float m_restituitionCoeff;
float m_frictionCoeff;
// int m_nPoints;
// int m_padding0;
void* m_bodyAPtr;
void* m_bodyBPtr;
// int m_padding1;
// int m_padding2;
float& getNPoints() { return m_data.m_nPoints; }
float getNPoints() const { return m_data.m_nPoints; }
float getPenetration(int idx) const { return m_worldPos[idx].w; }
// __inline
// void load(int idx, const ContactPoint& src);
// __inline
// void store(int idx, ContactPoint& dst) const;
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
};
#endif //ADL_CONTACT4_H

View File

@@ -0,0 +1,80 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef CL_ERROR_H
#define CL_ERROR_H
#ifdef DX11RENDER
#include <windows.h>
#endif
#ifdef _DEBUG
#include <assert.h>
#define CLASSERT(x) if(!(x)){__debugbreak(); }
#define ADLASSERT(x) if(!(x)){__debugbreak(); }
#else
#define CLASSERT(x) if(x){}
#define ADLASSERT(x) if(x){}
#endif
#ifdef _DEBUG
#define COMPILE_TIME_ASSERT(x) {int compileTimeAssertFailed[x]; compileTimeAssertFailed[0];}
#else
#define COMPILE_TIME_ASSERT(x)
#endif
#ifdef _DEBUG
#include <stdarg.h>
#include <stdio.h>
__inline
void debugPrintf(const char *fmt, ...)
{
va_list arg;
va_start(arg, fmt);
#ifdef DX11RENDER
char buf[256];
vsprintf_s( buf, 256, fmt, arg );
#ifdef UNICODE
WCHAR wbuf[256];
int sizeWide = MultiByteToWideChar(0,0,buf,-1,wbuf,0);
MultiByteToWideChar(0,0,buf,-1,wbuf,sizeWide);
// swprintf_s( wbuf, 256, L"%s", buf );
OutputDebugString( wbuf );
#else
OutputDebugString( buf );
#endif
#else
vprintf(fmt, arg);
#endif
va_end(arg);
}
#else
__inline
void debugPrintf(const char *fmt, ...)
{
}
#endif
#define WARN(msg) debugPrintf("WARNING: %s\n", msg);
#endif

View File

@@ -0,0 +1,216 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef CL_MATH_H
#define CL_MATH_H
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <xmmintrin.h>
#include "AdlError.h"
#include <algorithm>
#define pxSort std::sort
#define PI 3.14159265358979323846f
#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
#define _MEM_CLASSALIGN16 __declspec(align(16))
#define _MEM_ALIGNED_ALLOCATOR16 void* operator new(size_t size) { return _aligned_malloc( size, 16 ); } \
void operator delete(void *p) { _aligned_free( p ); } \
void* operator new[](size_t size) { return _aligned_malloc( size, 16 ); } \
void operator delete[](void *p) { _aligned_free( p ); } \
void* operator new(size_t size, void* p) { return p; } \
void operator delete(void *p, void* pp) {}
template<class T>
T nextPowerOf2(T n)
{
n -= 1;
for(int i=0; i<sizeof(T)*8; i++)
n = n | (n>>i);
return n+1;
}
_MEM_CLASSALIGN16
struct float4
{
_MEM_ALIGNED_ALLOCATOR16;
union
{
struct
{
float x,y,z,w;
};
struct
{
float s[4];
};
__m128 m_quad;
};
};
__forceinline
unsigned int isZero(const float4& a)
{
return (a.x == 0.f) & (a.y == 0.f) & (a.z == 0.f) & (a.w == 0.f);
}
_MEM_CLASSALIGN16
struct int4
{
_MEM_ALIGNED_ALLOCATOR16;
union
{
struct
{
int x,y,z,w;
};
struct
{
int s[4];
};
};
};
struct int2
{
union
{
struct
{
int x,y;
};
struct
{
int s[2];
};
};
};
struct float2
{
union
{
struct
{
float x,y;
};
struct
{
float s[2];
};
};
};
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
#include "Adlfloat4.inl"
//#include <Common/Math/float4SSE.inl>
template<typename T>
void swap2(T& a, T& b)
{
T tmp = a;
a = b;
b = tmp;
}
__inline
void randSeed(int seed)
{
srand( seed );
}
template<typename T>
__inline
T randRange(const T& minV, const T& maxV)
{
float r = (rand()%10000)/10000.f;
T range = maxV - minV;
return (T)(minV + r*range);
}
template<>
__inline
float4 randRange(const float4& minV, const float4& maxV)
{
float4 r = make_float4( (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f, (rand()%10000)/10000.f );
float4 range = maxV - minV;
return (minV + r*range);
}
struct SortData
{
union
{
u32 m_key;
struct { u16 m_key16[2]; };
};
u32 m_value;
friend bool operator <(const SortData& a, const SortData& b)
{
return a.m_key < b.m_key;
}
};
template<typename T>
T* addByteOffset(void* baseAddr, u32 offset)
{
return (T*)(((u32)baseAddr)+offset);
}
struct Pair32
{
Pair32(){}
Pair32(u32 a, u32 b) : m_a(a), m_b(b){}
u32 m_a;
u32 m_b;
};
struct PtrPair
{
PtrPair(){}
PtrPair(void* a, void* b) : m_a(a), m_b(b){}
template<typename T>
PtrPair(T* a, T* b) : m_a((void*)a), m_b((void*)b){}
void* m_a;
void* m_b;
};
#endif

View File

@@ -0,0 +1,194 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef MATRIX3X3_H
#define MATRIX3X3_H
#include "AdlMath.h"
///////////////////////////////////////
// Matrix3x3
///////////////////////////////////////
typedef
_MEM_CLASSALIGN16 struct
{
_MEM_ALIGNED_ALLOCATOR16;
float4 m_row[3];
}Matrix3x3;
__inline
Matrix3x3 mtZero();
__inline
Matrix3x3 mtIdentity();
__inline
Matrix3x3 mtDiagonal(float a, float b, float c);
__inline
Matrix3x3 mtTranspose(const Matrix3x3& m);
__inline
Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b);
__inline
float4 mtMul1(const Matrix3x3& a, const float4& b);
__inline
Matrix3x3 mtMul2(float a, const Matrix3x3& b);
__inline
float4 mtMul3(const float4& b, const Matrix3x3& a);
__inline
Matrix3x3 mtInvert(const Matrix3x3& m);
__inline
Matrix3x3 mtZero()
{
Matrix3x3 m;
m.m_row[0] = make_float4(0.f);
m.m_row[1] = make_float4(0.f);
m.m_row[2] = make_float4(0.f);
return m;
}
__inline
Matrix3x3 mtIdentity()
{
Matrix3x3 m;
m.m_row[0] = make_float4(1,0,0);
m.m_row[1] = make_float4(0,1,0);
m.m_row[2] = make_float4(0,0,1);
return m;
}
__inline
Matrix3x3 mtDiagonal(float a, float b, float c)
{
Matrix3x3 m;
m.m_row[0] = make_float4(a,0,0);
m.m_row[1] = make_float4(0,b,0);
m.m_row[2] = make_float4(0,0,c);
return m;
}
__inline
Matrix3x3 mtTranspose(const Matrix3x3& m)
{
Matrix3x3 out;
out.m_row[0] = make_float4(m.m_row[0].s[0], m.m_row[1].s[0], m.m_row[2].s[0], 0.f);
out.m_row[1] = make_float4(m.m_row[0].s[1], m.m_row[1].s[1], m.m_row[2].s[1], 0.f);
out.m_row[2] = make_float4(m.m_row[0].s[2], m.m_row[1].s[2], m.m_row[2].s[2], 0.f);
return out;
}
__inline
Matrix3x3 mtMul(const Matrix3x3& a, const Matrix3x3& b)
{
Matrix3x3 transB;
transB = mtTranspose( b );
Matrix3x3 ans;
for(int i=0; i<3; i++)
{
ans.m_row[i].s[0] = dot3F4(a.m_row[i],transB.m_row[0]);
ans.m_row[i].s[1] = dot3F4(a.m_row[i],transB.m_row[1]);
ans.m_row[i].s[2] = dot3F4(a.m_row[i],transB.m_row[2]);
}
return ans;
}
__inline
float4 mtMul1(const Matrix3x3& a, const float4& b)
{
float4 ans;
ans.s[0] = dot3F4( a.m_row[0], b );
ans.s[1] = dot3F4( a.m_row[1], b );
ans.s[2] = dot3F4( a.m_row[2], b );
return ans;
}
__inline
Matrix3x3 mtMul2(float a, const Matrix3x3& b)
{
Matrix3x3 ans;
ans.m_row[0] = a*b.m_row[0];
ans.m_row[1] = a*b.m_row[1];
ans.m_row[2] = a*b.m_row[2];
return ans;
}
__inline
float4 mtMul3(const float4& a, const Matrix3x3& b)
{
float4 ans;
ans.x = a.x*b.m_row[0].x + a.y*b.m_row[1].x + a.z*b.m_row[2].x;
ans.y = a.x*b.m_row[0].y + a.y*b.m_row[1].y + a.z*b.m_row[2].y;
ans.z = a.x*b.m_row[0].z + a.y*b.m_row[1].z + a.z*b.m_row[2].z;
return ans;
}
__inline
Matrix3x3 mtInvert(const Matrix3x3& m)
{
float det = m.m_row[0].s[0]*m.m_row[1].s[1]*m.m_row[2].s[2]+m.m_row[1].s[0]*m.m_row[2].s[1]*m.m_row[0].s[2]+m.m_row[2].s[0]*m.m_row[0].s[1]*m.m_row[1].s[2]
-m.m_row[0].s[0]*m.m_row[2].s[1]*m.m_row[1].s[2]-m.m_row[2].s[0]*m.m_row[1].s[1]*m.m_row[0].s[2]-m.m_row[1].s[0]*m.m_row[0].s[1]*m.m_row[2].s[2];
CLASSERT( det );
Matrix3x3 ans;
ans.m_row[0].s[0] = m.m_row[1].s[1]*m.m_row[2].s[2] - m.m_row[1].s[2]*m.m_row[2].s[1];
ans.m_row[0].s[1] = m.m_row[0].s[2]*m.m_row[2].s[1] - m.m_row[0].s[1]*m.m_row[2].s[2];
ans.m_row[0].s[2] = m.m_row[0].s[1]*m.m_row[1].s[2] - m.m_row[0].s[2]*m.m_row[1].s[1];
ans.m_row[0].w = 0.f;
ans.m_row[1].s[0] = m.m_row[1].s[2]*m.m_row[2].s[0] - m.m_row[1].s[0]*m.m_row[2].s[2];
ans.m_row[1].s[1] = m.m_row[0].s[0]*m.m_row[2].s[2] - m.m_row[0].s[2]*m.m_row[2].s[0];
ans.m_row[1].s[2] = m.m_row[0].s[2]*m.m_row[1].s[0] - m.m_row[0].s[0]*m.m_row[1].s[2];
ans.m_row[1].w = 0.f;
ans.m_row[2].s[0] = m.m_row[1].s[0]*m.m_row[2].s[1] - m.m_row[1].s[1]*m.m_row[2].s[0];
ans.m_row[2].s[1] = m.m_row[0].s[1]*m.m_row[2].s[0] - m.m_row[0].s[0]*m.m_row[2].s[1];
ans.m_row[2].s[2] = m.m_row[0].s[0]*m.m_row[1].s[1] - m.m_row[0].s[1]*m.m_row[1].s[0];
ans.m_row[2].w = 0.f;
ans = mtMul2((1.0f/det), ans);
return ans;
}
__inline
Matrix3x3 mtSet( const float4& a, const float4& b, const float4& c )
{
Matrix3x3 m;
m.m_row[0] = a;
m.m_row[1] = b;
m.m_row[2] = c;
return m;
}
__inline
Matrix3x3 operator+(const Matrix3x3& a, const Matrix3x3& b)
{
Matrix3x3 out;
out.m_row[0] = a.m_row[0] + b.m_row[0];
out.m_row[1] = a.m_row[1] + b.m_row[1];
out.m_row[2] = a.m_row[2] + b.m_row[2];
return out;
}
#endif

View File

@@ -0,0 +1,155 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef QUATERNION_H
#define QUATERNION_H
#include "AdlMatrix3x3.h"
typedef float4 Quaternion;
__inline
Quaternion qtSet(const float4& axis, float angle);
__inline
Quaternion qtMul(const Quaternion& a, const Quaternion& b);
__inline
float4 qtRotate(const Quaternion& q, const float4& vec);
__inline
float4 qtInvRotate(const Quaternion& q, const float4& vec);
__inline
Quaternion qtInvert(const Quaternion& q);
__inline
Matrix3x3 qtGetRotationMatrix(const Quaternion& quat);
__inline
Quaternion qtNormalize(const Quaternion& q);
__inline
Quaternion qtGetIdentity() { return make_float4(0,0,0,1); }
__inline
Quaternion qtSet(const float4& axis, float angle)
{
float4 nAxis = normalize3( axis );
Quaternion q;
q.s[0] = nAxis.s[0]*sin(angle/2);
q.s[1] = nAxis.s[1]*sin(angle/2);
q.s[2] = nAxis.s[2]*sin(angle/2);
q.s[3] = cos(angle/2);
return q;
}
__inline
Quaternion qtMul(const Quaternion& a, const Quaternion& b)
{
Quaternion ans;
ans = cross3( a, b );
ans += a.s[3]*b + b.s[3]*a;
ans.s[3] = a.s[3]*b.s[3] - (a.s[0]*b.s[0]+a.s[1]*b.s[1]+a.s[2]*b.s[2]);
return ans;
}
__inline
float4 qtRotate(const Quaternion& q, const float4& vec)
{
Quaternion vecQ = vec;
vecQ.s[3] = 0.f;
Quaternion qInv = qtInvert( q );
float4 out = qtMul(qtMul(q,vecQ),qInv);
return out;
}
__inline
float4 qtInvRotate(const Quaternion& q, const float4& vec)
{
return qtRotate( qtInvert( q ), vec );
}
__inline
Quaternion qtInvert(const Quaternion& q)
{
Quaternion ans;
ans.s[0] = -q.s[0];
ans.s[1] = -q.s[1];
ans.s[2] = -q.s[2];
ans.s[3] = q.s[3];
return ans;
}
__inline
Matrix3x3 qtGetRotationMatrix(const Quaternion& quat)
{
float4 quat2 = make_float4(quat.s[0]*quat.s[0], quat.s[1]*quat.s[1], quat.s[2]*quat.s[2], 0.f);
Matrix3x3 out;
out.m_row[0].s[0]=1-2*quat2.s[1]-2*quat2.s[2];
out.m_row[0].s[1]=2*quat.s[0]*quat.s[1]-2*quat.s[3]*quat.s[2];
out.m_row[0].s[2]=2*quat.s[0]*quat.s[2]+2*quat.s[3]*quat.s[1];
out.m_row[0].s[3] = 0.f;
out.m_row[1].s[0]=2*quat.s[0]*quat.s[1]+2*quat.s[3]*quat.s[2];
out.m_row[1].s[1]=1-2*quat2.s[0]-2*quat2.s[2];
out.m_row[1].s[2]=2*quat.s[1]*quat.s[2]-2*quat.s[3]*quat.s[0];
out.m_row[1].s[3] = 0.f;
out.m_row[2].s[0]=2*quat.s[0]*quat.s[2]-2*quat.s[3]*quat.s[1];
out.m_row[2].s[1]=2*quat.s[1]*quat.s[2]+2*quat.s[3]*quat.s[0];
out.m_row[2].s[2]=1-2*quat2.s[0]-2*quat2.s[1];
out.m_row[2].s[3] = 0.f;
return out;
}
__inline
Quaternion qtGetQuaternion(const Matrix3x3* m)
{
Quaternion q;
q.w = sqrtf( m[0].m_row[0].x + m[0].m_row[1].y + m[0].m_row[2].z + 1 ) * 0.5f;
float inv4w = 1.f/(4.f*q.w);
q.x = (m[0].m_row[2].y-m[0].m_row[1].z)*inv4w;
q.y = (m[0].m_row[0].z-m[0].m_row[2].x)*inv4w;
q.z = (m[0].m_row[1].x-m[0].m_row[0].y)*inv4w;
return q;
}
__inline
Quaternion qtNormalize(const Quaternion& q)
{
return normalize4(q);
}
__inline
float4 transform(const float4& p, const float4& translation, const Quaternion& orientation)
{
return qtRotate( orientation, p ) + translation;
}
__inline
float4 invTransform(const float4& p, const float4& translation, const Quaternion& orientation)
{
return qtRotate( qtInvert( orientation ), p-translation ); // use qtInvRotate
}
#endif

View File

@@ -0,0 +1,59 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef ADL_RIGID_BODY_H
#define ADL_RIGID_BODY_H
#include "AdlQuaternion.h"
class RigidBodyBase
{
public:
_MEM_CLASSALIGN16
struct Body
{
_MEM_ALIGNED_ALLOCATOR16;
float4 m_pos;
Quaternion m_quat;
float4 m_linVel;
float4 m_angVel;
u32 m_shapeIdx;
u32 m_shapeType;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
};
struct Inertia
{
/* u16 m_shapeType;
u16 m_shapeIdx;
float m_restituitionCoeff;
float m_frictionCoeff;
int m_padding;
*/
Matrix3x3 m_invInertia;
Matrix3x3 m_initInvInertia;
};
};
#endif// ADL_RIGID_BODY_H

View File

@@ -0,0 +1,61 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#ifndef _ADL_TRANSFORM_H
#define _ADL_TRANSFORM_H
#include "AdlMath.h"
#include "AdlQuaternion.h"
#include "AdlMatrix3x3.h"
struct Transform
{
float4 m_translation;
Matrix3x3 m_rotation;
};
Transform trSetTransform(const float4& translation, const Quaternion& quat)
{
Transform tr;
tr.m_translation = translation;
tr.m_rotation = qtGetRotationMatrix( quat );
return tr;
}
Transform trInvert( const Transform& tr )
{
Transform ans;
ans.m_rotation = mtTranspose( tr.m_rotation );
ans.m_translation = mtMul1( ans.m_rotation, -tr.m_translation );
return ans;
}
Transform trMul(const Transform& trA, const Transform& trB)
{
Transform ans;
ans.m_rotation = mtMul( trA.m_rotation, trB.m_rotation );
ans.m_translation = mtMul1( trA.m_rotation, trB.m_translation ) + trA.m_translation;
return ans;
}
float4 trMul1(const Transform& tr, const float4& p)
{
return mtMul1( tr.m_rotation, p ) + tr.m_translation;
}
#endif //_ADL_TRANSFORM_H

View File

@@ -0,0 +1,373 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0);
#define CHECK_ALIGNMENT(a) a;
__inline
float4 make_float4(float x, float y, float z, float w = 0.f)
{
float4 v;
v.x = x; v.y = y; v.z = z; v.w = w;
return v;
}
__inline
float4 make_float4(float x)
{
return make_float4(x,x,x,x);
}
__inline
float4 make_float4(const int4& x)
{
return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]);
}
__inline
float2 make_float2(float x, float y)
{
float2 v;
v.s[0] = x; v.s[1] = y;
return v;
}
__inline
float2 make_float2(float x)
{
return make_float2(x,x);
}
__inline
float2 make_float2(const int2& x)
{
return make_float2((float)x.s[0], (float)x.s[1]);
}
__inline
int4 make_int4(int x, int y, int z, int w = 0)
{
int4 v;
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
return v;
}
__inline
int4 make_int4(int x)
{
return make_int4(x,x,x,x);
}
__inline
int4 make_int4(const float4& x)
{
return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w);
}
__inline
int2 make_int2(int a, int b)
{
int2 ans; ans.x = a; ans.y = b;
return ans;
}
__inline
float4 operator-(const float4& a)
{
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
__inline
float4 operator*(const float4& a, const float4& b)
{
CLASSERT((u32(&a) & 0xf) == 0);
float4 out;
out.s[0] = a.s[0]*b.s[0];
out.s[1] = a.s[1]*b.s[1];
out.s[2] = a.s[2]*b.s[2];
out.s[3] = a.s[3]*b.s[3];
return out;
}
__inline
float4 operator*(float a, const float4& b)
{
return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
}
__inline
float4 operator*(const float4& b, float a)
{
CHECK_ALIGNMENT(b);
return make_float4(a*b.s[0], a*b.s[1], a*b.s[2], a*b.s[3]);
}
__inline
void operator*=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a.s[0]*=b.s[0];
a.s[1]*=b.s[1];
a.s[2]*=b.s[2];
a.s[3]*=b.s[3];
}
__inline
void operator*=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
a.s[0]*=b;
a.s[1]*=b;
a.s[2]*=b;
a.s[3]*=b;
}
//
__inline
float4 operator/(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.s[0] = a.s[0]/b.s[0];
out.s[1] = a.s[1]/b.s[1];
out.s[2] = a.s[2]/b.s[2];
out.s[3] = a.s[3]/b.s[3];
return out;
}
__inline
float4 operator/(const float4& b, float a)
{
CHECK_ALIGNMENT(b);
return make_float4(b.s[0]/a, b.s[1]/a, b.s[2]/a, b.s[3]/a);
}
__inline
void operator/=(float4& a, const float4& b)
{
a.s[0]/=b.s[0];
a.s[1]/=b.s[1];
a.s[2]/=b.s[2];
a.s[3]/=b.s[3];
}
__inline
void operator/=(float4& a, float b)
{
CLASSERT((u32(&a) & 0xf) == 0);
a.s[0]/=b;
a.s[1]/=b;
a.s[2]/=b;
a.s[3]/=b;
}
//
__inline
float4 operator+(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.s[0] = a.s[0]+b.s[0];
out.s[1] = a.s[1]+b.s[1];
out.s[2] = a.s[2]+b.s[2];
out.s[3] = a.s[3]+b.s[3];
return out;
}
__inline
float4 operator+(const float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.s[0] = a.s[0]+b;
out.s[1] = a.s[1]+b;
out.s[2] = a.s[2]+b;
out.s[3] = a.s[3]+b;
return out;
}
__inline
float4 operator-(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.s[0] = a.s[0]-b.s[0];
out.s[1] = a.s[1]-b.s[1];
out.s[2] = a.s[2]-b.s[2];
out.s[3] = a.s[3]-b.s[3];
return out;
}
__inline
float4 operator-(const float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.s[0] = a.s[0]-b;
out.s[1] = a.s[1]-b;
out.s[2] = a.s[2]-b;
out.s[3] = a.s[3]-b;
return out;
}
__inline
void operator+=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a.s[0]+=b.s[0];
a.s[1]+=b.s[1];
a.s[2]+=b.s[2];
a.s[3]+=b.s[3];
}
__inline
void operator+=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
a.s[0]+=b;
a.s[1]+=b;
a.s[2]+=b;
a.s[3]+=b;
}
__inline
void operator-=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a.s[0]-=b.s[0];
a.s[1]-=b.s[1];
a.s[2]-=b.s[2];
a.s[3]-=b.s[3];
}
__inline
void operator-=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
a.s[0]-=b;
a.s[1]-=b;
a.s[2]-=b;
a.s[3]-=b;
}
__inline
float4 cross3(const float4& a, const float4& b)
{
return make_float4(a.s[1]*b.s[2]-a.s[2]*b.s[1],
a.s[2]*b.s[0]-a.s[0]*b.s[2],
a.s[0]*b.s[1]-a.s[1]*b.s[0],
0);
}
__inline
float dot3F4(const float4& a, const float4& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z;
}
__inline
float length3(const float4& a)
{
return sqrtf(dot3F4(a,a));
}
__inline
float dot4(const float4& a, const float4& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
}
// for height
__inline
float dot3w1(const float4& point, const float4& eqn)
{
return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w;
}
__inline
float4 normalize3(const float4& a)
{
float length = sqrtf(dot3F4(a, a));
return 1.f/length * a;
}
__inline
float4 normalize4(const float4& a)
{
float length = sqrtf(dot4(a, a));
return 1.f/length * a;
}
__inline
float4 createEquation(const float4& a, const float4& b, const float4& c)
{
float4 eqn;
float4 ab = b-a;
float4 ac = c-a;
eqn = normalize3( cross3(ab, ac) );
eqn.w = -dot3F4(eqn,a);
return eqn;
}
template<typename T>
__inline
T max2(const T& a, const T& b)
{
return (a>b)? a:b;
}
template<typename T>
__inline
T min2(const T& a, const T& b)
{
return (a<b)? a:b;
}
template<>
__inline
float4 max2(const float4& a, const float4& b)
{
return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) );
}
template<>
__inline
float4 min2(const float4& a, const float4& b)
{
return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) );
}

View File

@@ -0,0 +1,381 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
//#define CHECK_ALIGNMENT(a) CLASSERT((u32(&(a)) & 0xf) == 0);
#define CHECK_ALIGNMENT(a) a;
__inline
float4 make_float4(float x, float y, float z, float w = 0.f)
{
float4 v;
v.m_quad = _mm_set_ps(w,z,y,x);
return v;
}
__inline
float4 make_float4(float x)
{
return make_float4(x,x,x,x);
}
__inline
float4 make_float4(const int4& x)
{
return make_float4((float)x.s[0], (float)x.s[1], (float)x.s[2], (float)x.s[3]);
}
__inline
float2 make_float2(float x, float y)
{
float2 v;
v.s[0] = x; v.s[1] = y;
return v;
}
__inline
float2 make_float2(float x)
{
return make_float2(x,x);
}
__inline
float2 make_float2(const int2& x)
{
return make_float2((float)x.s[0], (float)x.s[1]);
}
__inline
int4 make_int4(int x, int y, int z, int w = 0)
{
int4 v;
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
return v;
}
__inline
int4 make_int4(int x)
{
return make_int4(x,x,x,x);
}
__inline
int4 make_int4(const float4& x)
{
return make_int4((int)x.x, (int)x.y, (int)x.z, (int)x.w);
}
__inline
int2 make_int2(int a, int b)
{
int2 ans; ans.x = a; ans.y = b;
return ans;
}
__inline
float4 operator-(const float4& a)
{
float4 zero; zero.m_quad = _mm_setzero_ps();
float4 ans; ans.m_quad = _mm_sub_ps( zero.m_quad, a.m_quad );
return ans;
}
__inline
float4 operator*(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.m_quad = _mm_mul_ps( a.m_quad, b.m_quad );
return out;
}
__inline
float4 operator*(float a, const float4& b)
{
float4 av; av.m_quad = _mm_set1_ps( a );
return av*b;
}
__inline
float4 operator*(const float4& b, float a)
{
CHECK_ALIGNMENT(b);
float4 av; av.m_quad = _mm_set1_ps( a );
return av*b;
}
__inline
void operator*=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a = a*b;
}
__inline
void operator*=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 bv; bv.m_quad = _mm_set1_ps( b );
a = a*bv;
}
//
__inline
float4 operator/(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.m_quad = _mm_div_ps( a.m_quad, b.m_quad );
return out;
}
__inline
float4 operator/(const float4& b, float a)
{
CHECK_ALIGNMENT(b);
float4 av; av.m_quad = _mm_set1_ps( a );
float4 out;
out = b/av;
return out;
}
__inline
void operator/=(float4& a, const float4& b)
{
a = a/b;
}
__inline
void operator/=(float4& a, float b)
{
CLASSERT((u32(&a) & 0xf) == 0);
float4 bv; bv.m_quad = _mm_set1_ps( b );
a = a/bv;
}
//
__inline
float4 operator+(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.m_quad = _mm_add_ps( a.m_quad, b.m_quad );
return out;
}
__inline
float4 operator+(const float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 bv; bv.m_quad = _mm_set1_ps( b );
return a+bv;
}
__inline
float4 operator-(const float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
float4 out;
out.m_quad = _mm_sub_ps( a.m_quad, b.m_quad );
return out;
}
__inline
float4 operator-(const float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 bv; bv.m_quad = _mm_set1_ps( b );
return a-bv;
}
__inline
void operator+=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a = a + b;
}
__inline
void operator+=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 bv; bv.m_quad = _mm_set1_ps( b );
a = a + bv;
}
__inline
void operator-=(float4& a, const float4& b)
{
CHECK_ALIGNMENT(a);
a = a - b;
}
__inline
void operator-=(float4& a, float b)
{
CHECK_ALIGNMENT(a);
float4 bv; bv.m_quad = _mm_set1_ps( b );
a = a - bv;
}
__inline
float4 cross3(const float4& a, const float4& b)
{ // xnamathvector.inl
union IntVec
{
unsigned int m_i[4];
__m128 m_v;
};
IntVec mask3 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000};
__m128 V1 = a.m_quad;
__m128 V2 = b.m_quad;
__m128 vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
// z2,x2,y2,w2
__m128 vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
// Perform the left operation
__m128 vResult = _mm_mul_ps(vTemp1,vTemp2);
// z1,x1,y1,w1
vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
// y2,z2,x2,w2
vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
// Perform the right operation
vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
// Subract the right from left, and return answer
vResult = _mm_sub_ps(vResult,vTemp1);
// Set w to zero
float4 ans; ans.m_quad = _mm_and_ps(vResult,mask3.m_v);
return ans;
}
__inline
float dot3F4(const float4& a, const float4& b)
{
// return a.x*b.x+a.y*b.y+a.z*b.z;
// Perform the dot product
__m128 V1 = a.m_quad;
__m128 V2 = b.m_quad;
__m128 vDot = _mm_mul_ps(V1,V2);
// x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
__m128 vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
// Result.vector4_f32[0] = x+y
vDot = _mm_add_ss(vDot,vTemp);
// x=Dot.vector4_f32[2]
vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
// Result.vector4_f32[0] = (x+y)+z
vDot = _mm_add_ss(vDot,vTemp);
// Splat x
float4 ans; ans.m_quad = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
return ans.x;
}
__inline
float length3(const float4& a)
{
return sqrtf(dot3F4(a,a));
}
__inline
float dot4(const float4& a, const float4& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
}
// for height
__inline
float dot3w1(const float4& point, const float4& eqn)
{
return point.x*eqn.x+point.y*eqn.y+point.z*eqn.z+eqn.w;
}
__inline
float4 normalize3(const float4& a)
{
float length = sqrtf(dot3F4(a, a));
return 1.f/length * a;
}
__inline
float4 normalize4(const float4& a)
{
float length = sqrtf(dot4(a, a));
return 1.f/length * a;
}
__inline
float4 createEquation(const float4& a, const float4& b, const float4& c)
{
float4 eqn;
float4 ab = b-a;
float4 ac = c-a;
eqn = normalize3( cross3(ab, ac) );
eqn.w = -dot3F4(eqn,a);
return eqn;
}
template<typename T>
__inline
T max2(const T& a, const T& b)
{
return (a>b)? a:b;
}
template<typename T>
__inline
T min2(const T& a, const T& b)
{
return (a<b)? a:b;
}
template<>
__inline
float4 max2(const float4& a, const float4& b)
{
return make_float4( max2(a.x,b.x), max2(a.y,b.y), max2(a.z,b.z), max2(a.w,b.w) );
}
template<>
__inline
float4 min2(const float4& a, const float4& b)
{
return make_float4( min2(a.x,b.x), min2(a.y,b.y), min2(a.z,b.z), min2(a.w,b.w) );
}

View File

@@ -0,0 +1,154 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma once
#include <Adl/Adl.h>
//#include <Common/Base/SyncObjects.h>
#include "AdlMath.h"
#include "AdlContact4.h"
#include "AdlRigidBody.h"
#include "../ConvexHeightFieldShape.h"
//#include "TypeDefinition.h"
//#include "RigidBody.h"
//#include "ConvexHeightFieldShape.h"
namespace adl
{
class ShapeBase;
class ChNarrowphaseBase
{
public:
struct Config
{
float m_collisionMargin;
};
/*
typedef struct
{
// m_normal.w == height in u8
float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];
u32 m_height4[HEIGHT_RES*HEIGHT_RES*6];
float m_scale;
float m_padding0;
float m_padding1;
float m_padding2;
} ShapeData;
*/
};
template<DeviceType TYPE>
class ChNarrowphase : public ChNarrowphaseBase
{
public:
typedef Launcher::BufferInfo BufferInfo;
struct Data
{
const Device* m_device;
Kernel* m_supportCullingKernel;
Kernel* m_narrowphaseKernel;
Kernel* m_narrowphaseWithPlaneKernel;
Buffer<u32>* m_counterBuffer;
};
enum
{
N_TASKS = 4,
HEIGHT_RES = ConvexHeightField::HEIGHT_RES,
};
struct ShapeData
{
float4 m_normal[HEIGHT_RES*HEIGHT_RES*6];
u32 m_height4[HEIGHT_RES*HEIGHT_RES*6];
u32 m_supportHeight4[HEIGHT_RES*HEIGHT_RES*6];
float m_scale;
float m_padding0;
float m_padding1;
float m_padding2;
};
struct ConstData
{
int m_nPairs;
float m_collisionMargin;
int m_capacity;
int m_paddings[1];
};
static
Data* allocate( const Device* device );
static
void deallocate( Data* data );
/*
static
Buffer<ShapeData>* allocateShapeBuffer( const Device* device, int capacity );
static
void deallocateShapeBuffer( Buffer<ShapeData>* shapeBuf );
static
void setShape( Buffer<ShapeData>* shapeBuf, ShapeBase* shape, int idx, float collisionMargin );
*/
static
ShapeDataType allocateShapeBuffer( const Device* device, int capacity );
static
void deallocateShapeBuffer( ShapeDataType shapeBuf );
static
void setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin = 0.f );
static
void setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin = 0.f );
// Run NarrowphaseKernel
//template<bool USE_OMP>
static
void execute( Data* data, const Buffer<int2>* pairs, int nPairs,
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg );
// Run NarrowphaseWithPlaneKernel
//template<bool USE_OMP>
static
void execute( Data* data, const Buffer<int2>* pairs, int nPairs,
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
const Buffer<float4>* vtxBuf, const Buffer<int4>* idxBuf,
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg );
// Run SupportCullingKernel
//template<bool USE_OMP>
static
int culling( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
const ShapeDataType shapeBuf, const Buffer<int2>* pairsOut, const Config& cfg );
};
//#include <AdlPhysics/Narrowphase/ChNarrowphase.inl>
//#include <AdlPhysics/Narrowphase/ChNarrowphaseHost.inl>
#include "ChNarrowphase.inl"
};

View File

@@ -0,0 +1,303 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
//#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels"
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\ChNarrowphaseKernels"
#define KERNEL0 "SupportCullingKernel"
#define KERNEL1 "NarrowphaseKernel"
#include "ChNarrowphaseKernels.h"
class ChNarrowphaseImp
{
public:
static
__inline
u32 u32Pack(u8 x, u8 y, u8 z, u8 w)
{
return (x) | (y<<8) | (z<<16) | (w<<24);
}
};
template<DeviceType TYPE>
typename ChNarrowphase<TYPE>::Data* ChNarrowphase<TYPE>::allocate( const Device* device )
{
char options[100];
const char* src[] =
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
{narrowphaseKernelsCL, 0};
#else
{0,0};
#endif
//sprintf(options, "-I ..\\..\\ -Wf,--c++");
sprintf(options, "-I .\\NarrowPhaseCL\\");
Data* data = new Data;
data->m_device = device;
data->m_supportCullingKernel = device->getKernel( PATH, KERNEL0, options,src[TYPE] );
data->m_narrowphaseKernel = device->getKernel( PATH, KERNEL1, options, src[TYPE]);
data->m_narrowphaseWithPlaneKernel = device->getKernel( PATH, "NarrowphaseWithPlaneKernel", options,src[TYPE]);
data->m_counterBuffer = new Buffer<u32>( device, 1 );
return data;
}
template<DeviceType TYPE>
void ChNarrowphase<TYPE>::deallocate( Data* data )
{
delete data->m_counterBuffer;
delete data;
}
template<DeviceType TYPE>
ShapeDataType ChNarrowphase<TYPE>::allocateShapeBuffer( const Device* device, int capacity )
{
ADLASSERT( device->m_type == TYPE );
return new Buffer<ShapeData>( device, capacity );
}
template<DeviceType TYPE>
void ChNarrowphase<TYPE>::deallocateShapeBuffer( ShapeDataType shapeBuf )
{
Buffer<ShapeData>* s = (Buffer<ShapeData>*)shapeBuf;
delete s;
}
template<DeviceType TYPE>
void ChNarrowphase<TYPE>::setShape( ShapeDataType shapeBuf, ShapeBase* shape, int idx, float collisionMargin )
{
ConvexHeightField* cvxShape = new ConvexHeightField( shape );
Buffer<ShapeData>* dst = (Buffer<ShapeData>*)shapeBuf;
cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) );
{
ShapeData s;
{
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6; j++)
{
s.m_normal[j] = cvxShape->m_normal[j];
}
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6/4; j++)
{
s.m_height4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] );
s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] );
}
s.m_scale = cvxShape->m_scale;
}
dst->write( &s, 1, idx );
DeviceUtils::waitForCompletion( dst->m_device );
}
delete cvxShape;
}
template<DeviceType TYPE>
void ChNarrowphase<TYPE>::setShape( ShapeDataType shapeBuf, ConvexHeightField* cvxShape, int idx, float collisionMargin )
{
Buffer<ShapeData>* dst = (Buffer<ShapeData>*)shapeBuf;
cvxShape->m_aabb.expandBy( make_float4( collisionMargin ) );
{
ShapeData s;
{
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6; j++)
{
s.m_normal[j] = cvxShape->m_normal[j];
}
for(int j=0; j<HEIGHT_RES*HEIGHT_RES*6/4; j++)
{
s.m_height4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_data[4*j], cvxShape->m_data[4*j+1], cvxShape->m_data[4*j+2], cvxShape->m_data[4*j+3] );
s.m_supportHeight4[j] = ChNarrowphaseImp::u32Pack( cvxShape->m_supportHeight[4*j], cvxShape->m_supportHeight[4*j+1], cvxShape->m_supportHeight[4*j+2], cvxShape->m_supportHeight[4*j+3] );
}
s.m_scale = cvxShape->m_scale;
}
dst->write( &s, 1, idx );
DeviceUtils::waitForCompletion( dst->m_device );
}
}
// Run NarrowphaseKernel
template<DeviceType TYPE>
//template<bool USE_OMP>
void ChNarrowphase<TYPE>::execute( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
const ShapeDataType shapeBuf,
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg )
{
if( nPairs == 0 ) return;
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
ADLASSERT( shapeBuffer->getType() == TYPE );
const Device* device = data->m_device;
Buffer<int2>* gPairsInNative
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
Buffer<RigidBodyBase::Body>* gBodyInNative
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
Buffer<Contact4>* gContactOutNative
= BufferUtils::map<TYPE, true>( data->m_device, contactOut ); // this might not be empty
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
ConstData cdata;
cdata.m_nPairs = nPairs;
cdata.m_collisionMargin = cfg.m_collisionMargin;
cdata.m_capacity = contactOut->getSize() - nContacts;
u32 n = nContacts;
data->m_counterBuffer->write( &n, 1 );
// DeviceUtils::waitForCompletion( device );
{
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
BufferInfo( gContactOutNative ),
BufferInfo( data->m_counterBuffer ) };
Launcher launcher( data->m_device, data->m_narrowphaseKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nPairs*64, 64 );
}
data->m_counterBuffer->read( &n, 1 );
DeviceUtils::waitForCompletion( device );
BufferUtils::unmap<false>( gPairsInNative, pairs );
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
BufferUtils::unmap<true>( gContactOutNative, contactOut );
nContacts = min2((int)n, contactOut->getSize() );
}
// Run NarrowphaseWithPlaneKernel
template<DeviceType TYPE>
//template<bool USE_OMP>
void ChNarrowphase<TYPE>::execute( Data* data, const Buffer<int2>* pairs, int nPairs,
const Buffer<RigidBodyBase::Body>* bodyBuf, const ShapeDataType shapeBuf,
const Buffer<float4>* vtxBuf, const Buffer<int4>* idxBuf,
Buffer<Contact4>* contactOut, int& nContacts, const Config& cfg )
{
if( nPairs == 0 ) return;
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
ADLASSERT( shapeBuffer->getType() == TYPE );
const Device* device = data->m_device;
Buffer<int2>* gPairsInNative
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
Buffer<RigidBodyBase::Body>* gBodyInNative
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
Buffer<Contact4>* gContactOutNative
= BufferUtils::map<TYPE, true>( data->m_device, contactOut ); // this might not be empty
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
ConstData cdata;
cdata.m_nPairs = nPairs;
cdata.m_collisionMargin = cfg.m_collisionMargin;
cdata.m_capacity = contactOut->getSize() - nContacts;
u32 n = nContacts;
data->m_counterBuffer->write( &n, 1 );
// DeviceUtils::waitForCompletion( device );
{
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
BufferInfo( gContactOutNative ),
BufferInfo( data->m_counterBuffer ) };
Launcher launcher( data->m_device, data->m_narrowphaseWithPlaneKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nPairs*64, 64 );
}
data->m_counterBuffer->read( &n, 1 );
DeviceUtils::waitForCompletion( device );
BufferUtils::unmap<false>( gPairsInNative, pairs );
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
BufferUtils::unmap<true>( gContactOutNative, contactOut );
nContacts = min2((int)n, contactOut->getSize() );
}
// Run SupportCullingKernel
template<DeviceType TYPE>
//template<bool USE_OMP>
int ChNarrowphase<TYPE>::culling( Data* data, const Buffer<int2>* pairs, int nPairs, const Buffer<RigidBodyBase::Body>* bodyBuf,
const ShapeDataType shapeBuf, const Buffer<int2>* pairsOut, const Config& cfg )
{
if( nPairs == 0 ) return 0;
Buffer<ShapeData>* shapeBuffer = (Buffer<ShapeData>*)shapeBuf;
ADLASSERT( shapeBuffer->getType() == TYPE );
const Device* device = data->m_device;
Buffer<int2>* gPairsInNative
= BufferUtils::map<TYPE, true>( data->m_device, pairs );
Buffer<RigidBodyBase::Body>* gBodyInNative
= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
Buffer<int2>* gPairsOutNative
= BufferUtils::map<TYPE, false>( data->m_device, pairsOut );
//
Buffer<ConstData> constBuffer( device, 1, BufferBase::BUFFER_CONST );
ConstData cdata;
cdata.m_nPairs = nPairs;
cdata.m_collisionMargin = cfg.m_collisionMargin;
cdata.m_capacity = pairsOut->getSize();
u32 n = 0;
data->m_counterBuffer->write( &n, 1 );
// DeviceUtils::waitForCompletion( device );
{
BufferInfo bInfo[] = { BufferInfo( gPairsInNative, true ), BufferInfo( shapeBuffer ), BufferInfo( gBodyInNative ),
BufferInfo( gPairsOutNative ), BufferInfo( data->m_counterBuffer ) };
Launcher launcher( data->m_device, data->m_supportCullingKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nPairs, 64 );
}
data->m_counterBuffer->read( &n, 1 );
DeviceUtils::waitForCompletion( device );
/*
if( gPairsInNative != pairs ) delete gPairsInNative;
if( gBodyInNative != bodyBuf ) delete gBodyInNative;
if( gPairsOutNative != pairsOut )
{
gPairsOutNative->read( pairsOut->m_ptr, n );
DeviceUtils::waitForCompletion( device );
delete gPairsOutNative;
}
*/
BufferUtils::unmap<false>( gPairsInNative, pairs );
BufferUtils::unmap<false>( gBodyInNative, bodyBuf );
BufferUtils::unmap<true>( gPairsOutNative, pairsOut );
return min2((int)n, pairsOut->getSize() );
}
#undef PATH
#undef KERNEL0
#undef KERNEL1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,203 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma once
#ifndef __ADL_SOLVER_H
#define __ADL_SOLVER_H
#include <Adl/Adl.h>
#include <AdlPrimitives/Math/Math.h>
#include <AdlPrimitives/Search/BoundSearch.h>
#include <AdlPrimitives/Sort/RadixSort.h>
#include <AdlPrimitives/Scan/PrefixScan.h>
#include <AdlPrimitives/Sort/RadixSort32.h>
//#include <AdlPhysics/TypeDefinition.h>
#include "AdlRigidBody.h"
#include "AdlContact4.h"
//#include "AdlPhysics/Batching/Batching.h>
#define MYF4 float4
#define MAKE_MYF4 make_float4
//#define MYF4 float4sse
//#define MAKE_MYF4 make_float4sse
#include "AdlConstraint4.h"
namespace adl
{
class SolverBase
{
public:
struct ConstraintData
{
ConstraintData(): m_b(0.f), m_appliedRambdaDt(0.f) {}
float4 m_linear; // have to be normalized
float4 m_angular0;
float4 m_angular1;
float m_jacCoeffInv;
float m_b;
float m_appliedRambdaDt;
u32 m_bodyAPtr;
u32 m_bodyBPtr;
bool isInvalid() const { return ((u32)m_bodyAPtr+(u32)m_bodyBPtr) == 0; }
float getFrictionCoeff() const { return m_linear.w; }
void setFrictionCoeff(float coeff) { m_linear.w = coeff; }
};
struct ConstraintCfg
{
ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {}
float m_positionDrift;
float m_positionConstraintCoeff;
float m_dt;
bool m_enableParallelSolve;
float m_averageExtent;
int m_staticIdx;
};
static
__inline
Buffer<Contact4>* allocateContact4( const Device* device, int capacity )
{
return new Buffer<Contact4>( device, capacity );
}
static
__inline
void deallocateContact4( Buffer<Contact4>* data ) { delete data; }
static
__inline
SolverData allocateConstraint4( const Device* device, int capacity )
{
return new Buffer<Constraint4>( device, capacity );
}
static
__inline
void deallocateConstraint4( SolverData data ) { delete (Buffer<Constraint4>*)data; }
static
__inline
void* allocateFrictionConstraint( const Device* device, int capacity, u32 type = 0 )
{
return 0;
}
static
__inline
void deallocateFrictionConstraint( void* data )
{
}
enum
{
N_SPLIT = 16,
N_BATCHES = 4,
N_OBJ_PER_SPLIT = 10,
N_TASKS_PER_BATCH = N_SPLIT*N_SPLIT,
};
};
template<DeviceType TYPE>
class Solver : public SolverBase
{
public:
typedef Launcher::BufferInfo BufferInfo;
struct Data
{
Data() : m_nIterations(4){}
const Device* m_device;
void* m_parallelSolveData;
int m_nIterations;
Kernel* m_batchingKernel;
Kernel* m_batchSolveKernel;
Kernel* m_contactToConstraintKernel;
Kernel* m_setSortDataKernel;
Kernel* m_reorderContactKernel;
Kernel* m_copyConstraintKernel;
//typename RadixSort<TYPE>::Data* m_sort;
typename RadixSort32<TYPE>::Data* m_sort32;
typename BoundSearch<TYPE>::Data* m_search;
typename PrefixScan<TYPE>::Data* m_scan;
Buffer<SortData>* m_sortDataBuffer;
Buffer<Contact4>* m_contactBuffer;
};
enum
{
DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000,
};
static
Data* allocate( const Device* device, int pairCapacity );
static
void deallocate( Data* data );
static
void reorderConvertToConstraints( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const ConstraintCfg& cfg );
static
void solveContactConstraint( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* inertiaBuf,
SolverData constraint, void* additionalData, int n );
// static
// int createSolveTasks( int batchIdx, Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
// SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity );
//private:
static
void convertToConstraints( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const ConstraintCfg& cfg );
static
void sortContacts( Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
Buffer<Contact4>* contactsIn, void* additionalData,
int nContacts, const ConstraintCfg& cfg );
static
void batchContacts( Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx );
};
#include "Solver.inl"
#include "SolverHost.inl"
};
#undef MYF4
#undef MAKE_MYF4
#endif //__ADL_SOLVER_H

View File

@@ -0,0 +1,762 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#define PATH "..\\..\\dynamics\\basic_demo\\Stubs\\SolverKernels"
#define BATCHING_PATH "..\\..\\dynamics\\basic_demo\\Stubs\\batchingKernels"
#define KERNEL1 "SingleBatchSolveKernel"
#define KERNEL2 "BatchSolveKernel"
#define KERNEL3 "ContactToConstraintKernel"
#define KERNEL4 "SetSortDataKernel"
#define KERNEL5 "ReorderContactKernel"
#include "SolverKernels.h"
#include "batchingKernels.h"
struct SolverDebugInfo
{
int m_valInt0;
int m_valInt1;
int m_valInt2;
int m_valInt3;
int m_valInt4;
int m_valInt5;
int m_valInt6;
int m_valInt7;
int m_valInt8;
int m_valInt9;
int m_valInt10;
int m_valInt11;
int m_valInt12;
int m_valInt13;
int m_valInt14;
int m_valInt15;
float m_val0;
float m_val1;
float m_val2;
float m_val3;
};
class SolverDeviceInl
{
public:
struct ParallelSolveData
{
Buffer<u32>* m_numConstraints;
Buffer<u32>* m_offsets;
};
};
template<DeviceType TYPE>
typename Solver<TYPE>::Data* Solver<TYPE>::allocate( const Device* device, int pairCapacity )
{
const char* src[] =
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
{solverKernelsCL, 0};
#else
{0,0};
#endif
const char* src2[] =
#if defined(ADL_LOAD_KERNEL_FROM_STRING)
{batchingKernelsCL, 0};
#else
{0,0};
#endif
Data* data = new Data;
data->m_device = device;
bool cacheBatchingKernel = true;
data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", src2[TYPE],cacheBatchingKernel);
//data->m_batchingKernel = device->getKernel( BATCHING_PATH, "CreateBatches", "-I ..\\..\\ ", 0,cacheBatchingKernel);
bool cacheSolverKernel = true;
data->m_batchSolveKernel = device->getKernel( PATH, KERNEL2, "-I ..\\..\\ ", src[TYPE],cacheSolverKernel );
data->m_contactToConstraintKernel = device->getKernel( PATH, KERNEL3,
"-I ..\\..\\ ", src[TYPE] );
data->m_setSortDataKernel = device->getKernel( PATH, KERNEL4,
"-I ..\\..\\ ", src[TYPE] );
data->m_reorderContactKernel = device->getKernel( PATH, KERNEL5,
"-I ..\\..\\ ", src[TYPE] );
data->m_copyConstraintKernel = device->getKernel( PATH, "CopyConstraintKernel",
"-I ..\\..\\ ", src[TYPE] );
data->m_parallelSolveData = new SolverDeviceInl::ParallelSolveData;
{
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
solveData->m_numConstraints = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
solveData->m_offsets = new Buffer<u32>( device, N_SPLIT*N_SPLIT );
}
const int sortSize = NEXTMULTIPLEOF( pairCapacity, 512 );
//data->m_sort = RadixSort<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
data->m_sort32 = RadixSort32<TYPE>::allocate( data->m_device, sortSize );//todo. remove hardcode this
data->m_search = BoundSearch<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
data->m_scan = PrefixScan<TYPE>::allocate( data->m_device, N_SPLIT*N_SPLIT );
data->m_sortDataBuffer = new Buffer<SortData>( data->m_device, sortSize );
if( pairCapacity < DYNAMIC_CONTACT_ALLOCATION_THRESHOLD )
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, pairCapacity );
else
data->m_contactBuffer = 0;
return data;
}
template<DeviceType TYPE>
void Solver<TYPE>::deallocate( Data* data )
{
{
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
delete solveData->m_numConstraints;
delete solveData->m_offsets;
delete solveData;
}
// RadixSort<TYPE>::deallocate( data->m_sort );
RadixSort32<TYPE>::deallocate(data->m_sort32);
BoundSearch<TYPE>::deallocate( data->m_search );
PrefixScan<TYPE>::deallocate( data->m_scan );
delete data->m_sortDataBuffer;
if( data->m_contactBuffer ) delete data->m_contactBuffer;
delete data;
}
template<DeviceType TYPE>
void Solver<TYPE>::reorderConvertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
{
if( data->m_contactBuffer )
{
if( data->m_contactBuffer->getSize() < nContacts )
{
BT_PROFILE("delete data->m_contactBuffer;");
delete data->m_contactBuffer;
data->m_contactBuffer = 0;
}
}
if( data->m_contactBuffer == 0 )
{
BT_PROFILE("new data->m_contactBuffer;");
data->m_contactBuffer = new Buffer<Contact4>( data->m_device, nContacts );
}
Stopwatch sw;
Buffer<Contact4>* contactNative = BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn, nContacts );
//DeviceUtils::Config dhCfg;
//Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
if( cfg.m_enableParallelSolve )
{
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
DeviceUtils::waitForCompletion( data->m_device );
sw.start();
// contactsIn -> data->m_contactBuffer
{
BT_PROFILE("sortContacts");
Solver<TYPE>::sortContacts( data, bodyBuf, contactNative, additionalData, nContacts, cfg );
DeviceUtils::waitForCompletion( data->m_device );
}
sw.split();
if(0)
{
Contact4* tmp = new Contact4[nContacts];
data->m_contactBuffer->read( tmp, nContacts );
DeviceUtils::waitForCompletion( data->m_contactBuffer->m_device );
contactNative->write( tmp, nContacts );
DeviceUtils::waitForCompletion( contactNative->m_device );
delete [] tmp;
}
else
{
BT_PROFILE("m_copyConstraintKernel");
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
int4 cdata; cdata.x = nContacts;
BufferInfo bInfo[] = { BufferInfo( data->m_contactBuffer ), BufferInfo( contactNative ) };
// Launcher launcher( data->m_device, data->m_device->getKernel( PATH, "CopyConstraintKernel", "-I ..\\..\\ -Wf,--c++", 0 ) );
Launcher launcher( data->m_device, data->m_copyConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nContacts, 64 );
DeviceUtils::waitForCompletion( data->m_device );
}
{
BT_PROFILE("batchContacts");
Solver<TYPE>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, cfg.m_staticIdx );
}
}
{
BT_PROFILE("waitForCompletion (batchContacts)");
DeviceUtils::waitForCompletion( data->m_device );
}
sw.split();
//================
if(0)
{
// Solver<TYPE_HOST>::Data* solverHost = Solver<TYPE_HOST>::allocate( deviceHost, nContacts );
// Solver<TYPE_HOST>::convertToConstraints( solverHost, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
// Solver<TYPE_HOST>::deallocate( solverHost );
}
else
{
BT_PROFILE("convertToConstraints");
Solver<TYPE>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, cfg );
}
{
BT_PROFILE("convertToConstraints waitForCompletion");
DeviceUtils::waitForCompletion( data->m_device );
}
sw.stop();
{
BT_PROFILE("printf");
float t[5];
sw.getMs( t, 3 );
// printf("%3.2f, %3.2f, %3.2f, ", t[0], t[1], t[2]);
}
{
BT_PROFILE("deallocate and unmap");
//DeviceUtils::deallocate( deviceHost );
BufferUtils::unmap<true>( contactNative, contactsIn, nContacts );
}
}
template<DeviceType TYPE>
void Solver<TYPE>::solveContactConstraint( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
SolverData constraint, void* additionalData, int n )
{
if(0)
{
DeviceUtils::Config dhCfg;
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
{
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
Solver<TYPE_HOST>::solveContactConstraint( hostData, bodyBuf, shapeBuf, constraint, additionalData, n );
Solver<TYPE_HOST>::deallocate( hostData );
}
DeviceUtils::deallocate( deviceHost );
return;
}
ADLASSERT( data );
Buffer<Constraint4>* cBuffer =0;
Buffer<RigidBodyBase::Body>* gBodyNative=0;
Buffer<RigidBodyBase::Inertia>* gShapeNative =0;
Buffer<Constraint4>* gConstraintNative =0;
{
BT_PROFILE("map");
cBuffer = (Buffer<Constraint4>*)constraint;
gBodyNative= BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
gShapeNative= BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
gConstraintNative = BufferUtils::map<TYPE, true>( data->m_device, cBuffer );
DeviceUtils::waitForCompletion( data->m_device );
}
Buffer<int4> constBuffer;
int4 cdata = make_int4( n, 0, 0, 0 );
{
SolverDeviceInl::ParallelSolveData* solveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
const int nn = N_SPLIT*N_SPLIT;
cdata.x = 0;
cdata.y = 250;
#if 0
//check how the cells are filled
unsigned int* hostCounts = new unsigned int[N_SPLIT*N_SPLIT];
solveData->m_numConstraints->read(hostCounts,N_SPLIT*N_SPLIT);
DeviceUtils::waitForCompletion( data->m_device );
for (int i=0;i<N_SPLIT*N_SPLIT;i++)
{
if (hostCounts[i])
{
printf("hostCounts[%d]=%d\n",i,hostCounts[i]);
}
}
delete[] hostCounts;
#endif
int numWorkItems = 64*nn/N_BATCHES;
#ifdef DEBUG_ME
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
#endif
{
BT_PROFILE("m_batchSolveKernel iterations");
for(int iter=0; iter<data->m_nIterations; iter++)
{
for(int ib=0; ib<N_BATCHES; ib++)
{
#ifdef DEBUG_ME
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
gpuDebugInfo.write(debugInfo,numWorkItems);
#endif
cdata.z = ib;
cdata.w = N_SPLIT;
BufferInfo bInfo[] = {
BufferInfo( gBodyNative ),
BufferInfo( gShapeNative ),
BufferInfo( gConstraintNative ),
BufferInfo( solveData->m_numConstraints ),
BufferInfo( solveData->m_offsets )
#ifdef DEBUG_ME
, BufferInfo(&gpuDebugInfo)
#endif
};
Launcher launcher( data->m_device, data->m_batchSolveKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( numWorkItems, 64 );
#ifdef DEBUG_ME
DeviceUtils::waitForCompletion( data->m_device );
gpuDebugInfo.read(debugInfo,numWorkItems);
DeviceUtils::waitForCompletion( data->m_device );
for (int i=0;i<numWorkItems;i++)
{
if (debugInfo[i].m_valInt2>0)
{
printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
}
if (debugInfo[i].m_valInt3>0)
{
printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
}
}
#endif //DEBUG_ME
}
}
DeviceUtils::waitForCompletion( data->m_device );
}
cdata.x = 1;
{
BT_PROFILE("m_batchSolveKernel iterations2");
for(int iter=0; iter<data->m_nIterations; iter++)
{
for(int ib=0; ib<N_BATCHES; ib++)
{
cdata.z = ib;
cdata.w = N_SPLIT;
BufferInfo bInfo[] = {
BufferInfo( gBodyNative ),
BufferInfo( gShapeNative ),
BufferInfo( gConstraintNative ),
BufferInfo( solveData->m_numConstraints ),
BufferInfo( solveData->m_offsets )
#ifdef DEBUG_ME
,BufferInfo(&gpuDebugInfo)
#endif //DEBUG_ME
};
Launcher launcher( data->m_device, data->m_batchSolveKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( 64*nn/N_BATCHES, 64 );
}
}
DeviceUtils::waitForCompletion( data->m_device );
}
#ifdef DEBUG_ME
delete[] debugInfo;
#endif //DEBUG_ME
}
{
BT_PROFILE("unmap");
BufferUtils::unmap<true>( gBodyNative, bodyBuf );
BufferUtils::unmap<false>( gShapeNative, shapeBuf );
BufferUtils::unmap<true>( gConstraintNative, cBuffer );
DeviceUtils::waitForCompletion( data->m_device );
}
}
template<DeviceType TYPE>
void Solver<TYPE>::convertToConstraints( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const ConstraintCfg& cfg )
{
ADLASSERT( data->m_device->m_type == TYPE_CL );
Buffer<RigidBodyBase::Body>* bodyNative =0;
Buffer<RigidBodyBase::Inertia>* shapeNative =0;
Buffer<Contact4>* contactNative =0;
Buffer<Constraint4>* constraintNative =0;
{
BT_PROFILE("map buffers");
bodyNative = BufferUtils::map<TYPE, true>( data->m_device, bodyBuf );
shapeNative = BufferUtils::map<TYPE, true>( data->m_device, shapeBuf );
contactNative= BufferUtils::map<TYPE, true>( data->m_device, contactsIn );
constraintNative = BufferUtils::map<TYPE, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
}
struct CB
{
int m_nContacts;
float m_dt;
float m_positionDrift;
float m_positionConstraintCoeff;
};
{
BT_PROFILE("m_contactToConstraintKernel");
CB cdata;
cdata.m_nContacts = nContacts;
cdata.m_dt = cfg.m_dt;
cdata.m_positionDrift = cfg.m_positionDrift;
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( shapeNative ),
BufferInfo( constraintNative )};
Launcher launcher( data->m_device, data->m_contactToConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nContacts, 64 );
DeviceUtils::waitForCompletion( data->m_device );
}
{
BT_PROFILE("unmap");
BufferUtils::unmap<false>( bodyNative, bodyBuf );
BufferUtils::unmap<false>( shapeNative, shapeBuf );
BufferUtils::unmap<false>( contactNative, contactsIn );
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
}
}
template<DeviceType TYPE>
void Solver<TYPE>::sortContacts( typename Solver<TYPE>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
Buffer<Contact4>* contactsIn, void* additionalData,
int nContacts, const typename Solver<TYPE>::ConstraintCfg& cfg )
{
ADLASSERT( data->m_device->m_type == TYPE_CL );
Buffer<RigidBodyBase::Body>* bodyNative
= BufferUtils::map<TYPE_CL, true>( data->m_device, bodyBuf );
Buffer<Contact4>* contactNative
= BufferUtils::map<TYPE_CL, true>( data->m_device, contactsIn );
const int sortAlignment = 512; // todo. get this out of sort
if( cfg.m_enableParallelSolve )
{
SolverDeviceInl::ParallelSolveData* nativeSolveData = (SolverDeviceInl::ParallelSolveData*)data->m_parallelSolveData;
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
{ // 2. set cell idx
struct CB
{
int m_nContacts;
int m_staticIdx;
float m_scale;
int m_nSplit;
};
ADLASSERT( sortSize%64 == 0 );
CB cdata;
cdata.m_nContacts = nContacts;
cdata.m_staticIdx = cfg.m_staticIdx;
cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent);
cdata.m_nSplit = N_SPLIT;
Buffer<CB> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( bodyNative ), BufferInfo( data->m_sortDataBuffer ) };
Launcher launcher( data->m_device, data->m_setSortDataKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( sortSize, 64 );
}
{ // 3. sort by cell idx
int n = N_SPLIT*N_SPLIT;
int sortBit = 32;
//if( n <= 0xffff ) sortBit = 16;
//if( n <= 0xff ) sortBit = 8;
RadixSort32<TYPE>::execute( data->m_sort32, *data->m_sortDataBuffer,sortSize);
}
{ // 4. find entries
BoundSearch<TYPE>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, N_SPLIT*N_SPLIT, BoundSearchBase::COUNT );
PrefixScan<TYPE>::execute( data->m_scan, *countsNative, *offsetsNative, N_SPLIT*N_SPLIT );
}
{ // 5. sort constraints by cellIdx
// todo. preallocate this
// ADLASSERT( contactsIn->getType() == TYPE_HOST );
// Buffer<Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer
{
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
int4 cdata; cdata.x = nContacts;
BufferInfo bInfo[] = { BufferInfo( contactNative ), BufferInfo( data->m_contactBuffer ), BufferInfo( data->m_sortDataBuffer ) };
Launcher launcher( data->m_device, data->m_reorderContactKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nContacts, 64 );
}
// BufferUtils::unmap<true>( out, contactsIn, nContacts );
}
}
BufferUtils::unmap<false>( bodyNative, bodyBuf );
BufferUtils::unmap<false>( contactNative, contactsIn );
}
template<DeviceType TYPE>
void Solver<TYPE>::batchContacts( typename Solver<TYPE>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
{
ADLASSERT( data->m_device->m_type == TYPE_CL );
if(0)
{
BT_PROFILE("CPU classTestKernel/Kernel (batch generation?)");
DeviceUtils::Config dhCfg;
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
{
Solver<TYPE_HOST>::Data* hostData = Solver<TYPE_HOST>::allocate( deviceHost, 0 );
Solver<TYPE_HOST>::batchContacts( hostData, contacts, nContacts, n, offsets, staticIdx );
Solver<TYPE_HOST>::deallocate( hostData );
}
DeviceUtils::deallocate( deviceHost );
return;
}
Buffer<Contact4>* contactNative
= BufferUtils::map<TYPE_CL, true>( data->m_device, contacts, nContacts );
Buffer<u32>* nNative
= BufferUtils::map<TYPE_CL, true>( data->m_device, n );
Buffer<u32>* offsetsNative
= BufferUtils::map<TYPE_CL, true>( data->m_device, offsets );
{
BT_PROFILE("GPU classTestKernel/Kernel (batch generation?)");
Buffer<int4> constBuffer( data->m_device, 1, BufferBase::BUFFER_CONST );
int4 cdata;
cdata.x = nContacts;
cdata.y = 0;
cdata.z = staticIdx;
int numWorkItems = 64*N_SPLIT*N_SPLIT;
#ifdef BATCH_DEBUG
SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
adl::Buffer<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
gpuDebugInfo.write(debugInfo,numWorkItems);
#endif
BufferInfo bInfo[] = {
BufferInfo( contactNative ),
BufferInfo( data->m_contactBuffer ),
BufferInfo( nNative ),
BufferInfo( offsetsNative )
#ifdef BATCH_DEBUG
, BufferInfo(&gpuDebugInfo)
#endif
};
Launcher launcher( data->m_device, data->m_batchingKernel);
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( numWorkItems, 64 );
DeviceUtils::waitForCompletion( data->m_device );
#ifdef BATCH_DEBUG
aaaa
Contact4* hostContacts = new Contact4[nContacts];
data->m_contactBuffer->read(hostContacts,nContacts);
DeviceUtils::waitForCompletion( data->m_device );
gpuDebugInfo.read(debugInfo,numWorkItems);
DeviceUtils::waitForCompletion( data->m_device );
for (int i=0;i<numWorkItems;i++)
{
if (debugInfo[i].m_valInt1>0)
{
printf("catch\n");
}
if (debugInfo[i].m_valInt2>0)
{
printf("catch22\n");
}
if (debugInfo[i].m_valInt3>0)
{
printf("catch666\n");
}
if (debugInfo[i].m_valInt4>0)
{
printf("catch777\n");
}
}
delete[] debugInfo;
#endif //BATCH_DEBUG
}
if(0)
{
u32* nhost = new u32[N_SPLIT*N_SPLIT];
nNative->read( nhost, N_SPLIT*N_SPLIT );
Contact4* chost = new Contact4[nContacts];
data->m_contactBuffer->read( chost, nContacts );
DeviceUtils::waitForCompletion( data->m_device );
printf(">>");
int nonzero = 0;
u32 maxn = 0;
for(int i=0; i<N_SPLIT*N_SPLIT; i++)
{
printf("%d-", nhost[i]);
nonzero += (nhost[i]==0)? 0:1;
maxn = max2( nhost[i], maxn );
}
printf("\nnonzero:zero = %d:%d (%d)\n", nonzero, N_SPLIT*N_SPLIT-nonzero, maxn);
printf("\n\n");
int prev = 0;
int prevIdx = 0;
int maxNBatches = 0;
for(int i=0; i<nContacts; i++)
{
// printf("(%d, %d:%d),", chost[i].m_batchIdx, chost[i].m_bodyAPtr, chost[i].m_bodyBPtr);
if( prev != 0 && chost[i].m_batchIdx == 0 )
{
maxNBatches = max2( maxNBatches, prev );
printf("\n[%d]", prev);
//for(int j=prevIdx; j<i; j++)
//{
// printf("(%d:%d),", chost[j].m_bodyAPtr, chost[j].m_bodyBPtr);
//}
//printf("\n");
prevIdx = i;
}
printf("%d,", chost[i].m_batchIdx);
prev = chost[i].m_batchIdx;
}
printf("\n");
printf("Max: %d\n", maxNBatches);
delete [] chost;
delete [] nhost;
}
// copy buffer to buffer
contactNative->write( *data->m_contactBuffer, nContacts );
DeviceUtils::waitForCompletion( data->m_device );
if(0)
{
DeviceUtils::Config dhCfg;
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, dhCfg );
{
HostBuffer<Contact4> host( deviceHost, nContacts );
contactNative->read( host.m_ptr, nContacts );
DeviceUtils::waitForCompletion( data->m_device );
for(int i=0; i<nContacts; i++)
{
ADLASSERT( host[i].m_bodyAPtr <= (u32)staticIdx );
ADLASSERT( host[i].m_bodyBPtr <= (u32)staticIdx );
}
}
DeviceUtils::deallocate( deviceHost );
}
BufferUtils::unmap<true>( contactNative, contacts );
BufferUtils::unmap<false>( nNative, n );
BufferUtils::unmap<false>( offsetsNative, offsets );
}
#undef PATH
#undef KERNEL1
#undef KERNEL2
#undef KERNEL3
#undef KERNEL4
#undef KERNEL5

View File

@@ -0,0 +1,848 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
class SolverInl
{
public:
typedef SolverBase::ConstraintData ConstraintData;
static
__forceinline
void setLinearAndAngular(const MYF4& n, const MYF4& r0, const MYF4& r1,
MYF4& linear, MYF4& angular0, MYF4& angular1)
{
linear = -n;
angular0 = -cross3(r0, n);
angular1 = cross3(r1, n);
}
static
__forceinline
float calcJacCoeff(const MYF4& linear0, const MYF4& linear1, const MYF4& angular0, const MYF4& angular1,
float invMass0, const Matrix3x3& invInertia0, float invMass1, const Matrix3x3& invInertia1)
{
// linear0,1 are normlized
float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
float jmj1 = dot3F4(mtMul3(angular0,invInertia0), angular0);
float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
float jmj3 = dot3F4(mtMul3(angular1,invInertia1), angular1);
return -1.f/(jmj0+jmj1+jmj2+jmj3);
}
static
__forceinline
float calcRelVel(const MYF4& l0, const MYF4& l1, const MYF4& a0, const MYF4& a1,
const MYF4& linVel0, const MYF4& angVel0, const MYF4& linVel1, const MYF4& angVel1)
{
return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
}
static
__forceinline
void setConstraint4( const MYF4& posA, const MYF4& linVelA, const MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
const MYF4& posB, const MYF4& linVelB, const MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
const Contact4& src, const SolverBase::ConstraintCfg& cfg,
Constraint4& dstC )
{
dstC.m_bodyA = (u32)src.m_bodyAPtr;
dstC.m_bodyB = (u32)src.m_bodyBPtr;
float dtInv = 1.f/cfg.m_dt;
for(int ic=0; ic<4; ic++)
{
dstC.m_appliedRambdaDt[ic] = 0.f;
}
dstC.m_fJacCoeffInv[0] = dstC.m_fJacCoeffInv[1] = 0.f;
const MYF4& n = src.m_worldNormal;
dstC.m_linear = -n;
dstC.setFrictionCoeff( src.getFrictionCoeff() );
for(int ic=0; ic<4; ic++)
{
MYF4 r0 = src.m_worldPos[ic] - posA;
MYF4 r1 = src.m_worldPos[ic] - posB;
if( ic >= src.getNPoints() )
{
dstC.m_jacCoeffInv[ic] = 0.f;
continue;
}
float relVelN;
{
MYF4 linear, angular0, angular1;
setLinearAndAngular(n, r0, r1, linear, angular0, angular1);
dstC.m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, invInertiaA, invMassB, invInertiaB );
relVelN = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB);
float e = src.getRestituitionCoeff();
if( relVelN*relVelN < 0.004f ) e = 0.f;
dstC.m_b[ic] = e*relVelN;
dstC.m_b[ic] += (src.getPenetration(ic) + cfg.m_positionDrift)*cfg.m_positionConstraintCoeff*dtInv;
dstC.m_appliedRambdaDt[ic] = 0.f;
}
}
if( src.getNPoints() > 1 )
{ // prepare friction
MYF4 center = MAKE_MYF4(0.f);
for(int i=0; i<src.getNPoints(); i++) center += src.m_worldPos[i];
center /= (float)src.getNPoints();
MYF4 tangent[2];
tangent[0] = cross3( src.m_worldNormal, src.m_worldPos[0]-center );
tangent[1] = cross3( tangent[0], src.m_worldNormal );
tangent[0] = normalize3( tangent[0] );
tangent[1] = normalize3( tangent[1] );
MYF4 r[2];
r[0] = center - posA;
r[1] = center - posB;
for(int i=0; i<2; i++)
{
MYF4 linear, angular0, angular1;
setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
dstC.m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, invInertiaA, invMassB, invInertiaB );
dstC.m_fAppliedRambdaDt[i] = 0.f;
}
dstC.m_center = center;
}
else
{
// single point constraint
}
for(int i=0; i<4; i++)
{
if( i<src.getNPoints() )
{
dstC.m_worldPos[i] = src.m_worldPos[i];
}
else
{
dstC.m_worldPos[i] = MAKE_MYF4(0.f);
}
}
}
/*
struct Constraint4
{
float4 m_linear; X
float4 m_angular0[4]; X
float4 m_angular1[4]; center
float m_jacCoeffInv[4]; [0,1]
float m_b[4]; X
float m_appliedRambdaDt[4]; [0,1]
void* m_bodyAPtr; X
void* m_bodyBPtr; X
};
*/
static
__inline
void solveFriction(Constraint4& cs,
const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
float maxRambdaDt[4], float minRambdaDt[4])
{
if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
const MYF4& center = cs.m_center;
MYF4 n = -cs.m_linear;
MYF4 tangent[2];
tangent[0] = cross3( n, cs.m_worldPos[0]-center );
tangent[1] = cross3( tangent[0], n );
tangent[0] = normalize3( tangent[0] );
tangent[1] = normalize3( tangent[1] );
MYF4 angular0, angular1, linear;
MYF4 r0 = center - posA;
MYF4 r1 = center - posB;
for(int i=0; i<2; i++)
{
setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB );
rambdaDt *= cs.m_fJacCoeffInv[i];
{
float prevSum = cs.m_fAppliedRambdaDt[i];
float updated = prevSum;
updated += rambdaDt;
updated = max2( updated, minRambdaDt[i] );
updated = min2( updated, maxRambdaDt[i] );
rambdaDt = updated - prevSum;
cs.m_fAppliedRambdaDt[i] = updated;
}
MYF4 linImp0 = invMassA*linear*rambdaDt;
MYF4 linImp1 = invMassB*(-linear)*rambdaDt;
MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
linVelA += linImp0;
angVelA += angImp0;
linVelB += linImp1;
angVelB += angImp1;
}
{ // angular damping for point constraint
MYF4 ab = normalize3( posB - posA );
MYF4 ac = normalize3( center - posA );
if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
{
float angNA = dot3F4( n, angVelA );
float angNB = dot3F4( n, angVelB );
angVelA -= (angNA*0.1f)*n;
angVelB -= (angNB*0.1f)*n;
}
}
}
template<bool JACOBI>
static
__inline
void solveContact(Constraint4& cs,
const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
float maxRambdaDt[4], float minRambdaDt[4])
{
MYF4 dLinVelA = MAKE_MYF4(0.f);
MYF4 dAngVelA = MAKE_MYF4(0.f);
MYF4 dLinVelB = MAKE_MYF4(0.f);
MYF4 dAngVelB = MAKE_MYF4(0.f);
for(int ic=0; ic<4; ic++)
{
// dont necessary because this makes change to 0
if( cs.m_jacCoeffInv[ic] == 0.f ) continue;
{
MYF4 angular0, angular1, linear;
MYF4 r0 = cs.m_worldPos[ic] - posA;
MYF4 r1 = cs.m_worldPos[ic] - posB;
setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 );
float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
rambdaDt *= cs.m_jacCoeffInv[ic];
{
float prevSum = cs.m_appliedRambdaDt[ic];
float updated = prevSum;
updated += rambdaDt;
updated = max2( updated, minRambdaDt[ic] );
updated = min2( updated, maxRambdaDt[ic] );
rambdaDt = updated - prevSum;
cs.m_appliedRambdaDt[ic] = updated;
}
MYF4 linImp0 = invMassA*linear*rambdaDt;
MYF4 linImp1 = invMassB*(-linear)*rambdaDt;
MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
if( JACOBI )
{
dLinVelA += linImp0;
dAngVelA += angImp0;
dLinVelB += linImp1;
dAngVelB += angImp1;
}
else
{
linVelA += linImp0;
angVelA += angImp0;
linVelB += linImp1;
angVelB += angImp1;
}
}
}
if( JACOBI )
{
linVelA += dLinVelA;
angVelA += dAngVelA;
linVelB += dLinVelB;
angVelB += dAngVelB;
}
}
enum
{
N_SPLIT = SolverBase::N_SPLIT,
};
// for parallel solve
struct ParallelSolveData
{
u32 m_n[N_SPLIT*N_SPLIT];
u32 m_offset[N_SPLIT*N_SPLIT];
};
static
__inline
int sortConstraintByBatch(Contact4* cs, int n, int ignoreIdx, int simdWidth = -1)
{
SortData* sortData;
{
BT_PROFILE("new");
sortData = new SortData[n];
}
u32* idxBuffer = new u32[n];
u32* idxSrc = idxBuffer;
u32* idxDst = idxBuffer;
int nIdxSrc, nIdxDst;
const int N_FLG = 256;
const int FLG_MASK = N_FLG-1;
u32 flg[N_FLG/32];
#if defined(_DEBUG)
for(int i=0; i<n; i++) cs[i].getBatchIdx() = -1;
#endif
for(int i=0; i<n; i++) idxSrc[i] = i;
nIdxSrc = n;
int batchIdx = 0;
{
BT_PROFILE("batching");
while( nIdxSrc )
{
nIdxDst = 0;
int nCurrentBatch = 0;
// clear flag
for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
for(int i=0; i<nIdxSrc; i++)
{
int idx = idxSrc[i];
ADLASSERT( idx < n );
// check if it can go
int aIdx = cs[idx].m_bodyAPtr & FLG_MASK;
int bIdx = cs[idx].m_bodyBPtr & FLG_MASK;
u32 aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
u32 bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
aUnavailable = (ignoreIdx==cs[idx].m_bodyAPtr)? 0:aUnavailable;
bUnavailable = (ignoreIdx==cs[idx].m_bodyBPtr)? 0:bUnavailable;
if( aUnavailable==0 && bUnavailable==0 ) // ok
{
flg[ aIdx/32 ] |= (1<<(aIdx&31));
flg[ bIdx/32 ] |= (1<<(bIdx&31));
cs[idx].getBatchIdx() = batchIdx;
sortData[idx].m_key = batchIdx;
sortData[idx].m_value = idx;
{
nCurrentBatch++;
if( nCurrentBatch == simdWidth )
{
nCurrentBatch = 0;
for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
}
}
}
else
{
idxDst[nIdxDst++] = idx;
}
}
swap2( idxSrc, idxDst );
swap2( nIdxSrc, nIdxDst );
batchIdx ++;
}
}
{
BT_PROFILE("radix sort data");
// sort SortData
Device::Config cfg;
Device* deviceHost = DeviceUtils::allocate( TYPE_HOST, cfg );
{
Buffer<SortData> sortBuffer; sortBuffer.setRawPtr( deviceHost, sortData, n );
RadixSort<TYPE_HOST>::Data* sort = RadixSort<TYPE_HOST>::allocate( deviceHost, n );
RadixSort<TYPE_HOST>::execute( sort, sortBuffer, n );
RadixSort<TYPE_HOST>::deallocate( sort );
}
DeviceUtils::deallocate( deviceHost );
}
{
BT_PROFILE("reorder");
// reorder
Contact4* old = new Contact4[n];
memcpy( old, cs, sizeof(Contact4)*n);
for(int i=0; i<n; i++)
{
int idx = sortData[i].m_value;
cs[i] = old[idx];
}
delete [] old;
}
{
BT_PROFILE("delete");
delete [] idxBuffer;
delete [] sortData;
}
#if defined(_DEBUG)
// debugPrintf( "nBatches: %d\n", batchIdx );
for(int i=0; i<n; i++) ADLASSERT( cs[i].getBatchIdx() != -1 );
#endif
return batchIdx;
}
};
enum
{
// N_SPLIT = SOLVER_N_SPLIT,
// MAX_TASKS_PER_BATCH = N_SPLIT*N_SPLIT/4,
};
struct SolveTask// : public ThreadPool::Task
{
SolveTask(const Buffer<RigidBodyBase::Body>* bodies, const Buffer<RigidBodyBase::Inertia>* shapes, const Buffer<Constraint4>* constraints,
int start, int nConstraints)
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
m_solveFriction( true ){}
u16 getType(){ return 0; }
void run(int tIdx)
{
HostBuffer<RigidBodyBase::Body>& hBody = *(HostBuffer<RigidBodyBase::Body>*)m_bodies;
HostBuffer<RigidBodyBase::Inertia>& hShape = *(HostBuffer<RigidBodyBase::Inertia>*)m_shapes;
HostBuffer<Constraint4>& hc = *(HostBuffer<Constraint4>*)m_constraints;
for(int ic=0; ic<m_nConstraints; ic++)
{
int i = m_start + ic;
float frictionCoeff = hc[i].getFrictionCoeff();
int aIdx = (int)hc[i].m_bodyA;
int bIdx = (int)hc[i].m_bodyB;
RigidBodyBase::Body& bodyA = hBody[aIdx];
RigidBodyBase::Body& bodyB = hBody[bIdx];
if( !m_solveFriction )
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
SolverInl::solveContact<false>( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia,
bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia,
maxRambdaDt, minRambdaDt );
}
else
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
float sum = 0;
for(int j=0; j<4; j++)
{
sum +=hc[i].m_appliedRambdaDt[j];
}
frictionCoeff = 0.7f;
for(int j=0; j<4; j++)
{
maxRambdaDt[j] = frictionCoeff*sum;
minRambdaDt[j] = -maxRambdaDt[j];
}
SolverInl::solveFriction( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia,
bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia,
maxRambdaDt, minRambdaDt );
}
}
}
const Buffer<RigidBodyBase::Body>* m_bodies;
const Buffer<RigidBodyBase::Inertia>* m_shapes;
const Buffer<Constraint4>* m_constraints;
int m_start;
int m_nConstraints;
bool m_solveFriction;
};
template<>
static Solver<adl::TYPE_HOST>::Data* Solver<adl::TYPE_HOST>::allocate( const Device* device, int pairCapacity )
{
Solver<adl::TYPE_HOST>::Data* data = new Data;
data->m_device = device;
data->m_parallelSolveData = 0;
return data;
}
template<>
static void Solver<adl::TYPE_HOST>::deallocate( Solver<TYPE_HOST>::Data* data )
{
if( data->m_parallelSolveData ) delete (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
delete data;
}
void sortContacts2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
Buffer<Contact4>* contactsIn, void* additionalData,
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
{
ADLASSERT( data->m_device->m_type == TYPE_HOST );
HostBuffer<RigidBodyBase::Body>* bodyNative
= (HostBuffer<RigidBodyBase::Body>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
HostBuffer<Contact4>* contactNative
= (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contactsIn);
if( cfg.m_enableParallelSolve )
{
ADLASSERT( data->m_parallelSolveData == 0 );
data->m_parallelSolveData = new SolverInl::ParallelSolveData;
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
HostBuffer<SortData> sortData( data->m_device, nContacts );
{ // 2. set cell idx
float spacing = adl::SolverBase::N_OBJ_PER_SPLIT*cfg.m_averageExtent;
float xScale = 1.f/spacing;
for(int i=0; i<nContacts; i++)
{
int idx = ((*contactNative)[i].m_bodyAPtr==cfg.m_staticIdx)? (*contactNative)[i].m_bodyBPtr:(*contactNative)[i].m_bodyAPtr;
float4& p = (*bodyNative)[idx].m_pos;
int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*xScale)&(adl::SolverBase::N_SPLIT-1);
int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*xScale)&(adl::SolverBase::N_SPLIT-1);
ADLASSERT( xIdx >= 0 && xIdx < adl::SolverBase::N_SPLIT );
ADLASSERT( zIdx >= 0 && zIdx < adl::SolverBase::N_SPLIT );
sortData[i].m_key = (xIdx+zIdx*adl::SolverBase::N_SPLIT);
sortData[i].m_value = i;
}
}
{ // 3. sort by cell idx
RadixSort<TYPE_HOST>::Data* sData = RadixSort<TYPE_HOST>::allocate( data->m_device, nContacts );
RadixSort<TYPE_HOST>::execute( sData, sortData, nContacts );
RadixSort<TYPE_HOST>::deallocate( sData );
}
{ // 4. find entries
HostBuffer<u32> counts; counts.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
HostBuffer<u32> offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
{
BoundSearch<TYPE_HOST>::Data* sData = BoundSearch<TYPE_HOST>::allocate( data->m_device );
PrefixScan<TYPE_HOST>::Data* pData = PrefixScan<TYPE_HOST>::allocate( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
BoundSearch<TYPE_HOST>::execute( sData, sortData, nContacts, counts, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, BoundSearchBase::COUNT );
PrefixScan<TYPE_HOST>::execute( pData, counts, offsets, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
BoundSearch<TYPE_HOST>::deallocate( sData );
PrefixScan<TYPE_HOST>::deallocate( pData );
}
#if defined(_DEBUG)
{
HostBuffer<u32> n0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
HostBuffer<u32> offset0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
n0[i] = 0;
offset0[i] = 0;
}
for(int i=0; i<nContacts; i++)
{
int idx = sortData[i].m_key;
n0[idx]++;
}
// scan
int sum = 0;
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
offset0[i] = sum;
sum += n0[i];
}
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
ADLASSERT( n0[i] == counts[i] );
ADLASSERT( offset0[i] == offsets[i] );
}
}
#endif
}
{ // 5. sort constraints by cellIdx
Contact4* old = new Contact4[nContacts];
memcpy( old, contactNative->m_ptr, sizeof(Contact4)*nContacts );
for(int i=0; i<nContacts; i++)
{
int srcIdx = sortData[i].m_value;
(*contactNative)[i] = old[srcIdx];
}
delete [] old;
}
}
BufferUtils::unmap<false>( bodyNative, bodyBuf );
BufferUtils::unmap<true>( contactNative, contactsIn );
}
static void reorderConvertToConstraints2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
adl::Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
{
sortContacts2( data, bodyBuf, contactsIn, additionalData, nContacts, cfg );
{
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
Buffer<u32> n; n.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
Buffer<u32> offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
Solver<TYPE_HOST>::batchContacts( data, contactsIn, nContacts, &n, &offsets, cfg.m_staticIdx );
printf("hello\n");
}
Solver<TYPE_HOST>::convertToConstraints( data, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
}
template<DeviceType TYPE>
static void solveContactConstraint( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
SolverData constraint, void* additionalData, int n )
{
Buffer<RigidBodyBase::Body>* bodyNative
= BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
Buffer<RigidBodyBase::Inertia>* shapeNative
= BufferUtils::map<TYPE_HOST, true>( data->m_device, shapeBuf );
Buffer<Constraint4>* constraintNative
= BufferUtils::map<TYPE_HOST, true>( data->m_device, (const Buffer<Constraint4>*)constraint );
for(int iter=0; iter<data->m_nIterations; iter++)
{
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
task.m_solveFriction = false;
task.run(0);
}
for(int iter=0; iter<data->m_nIterations; iter++)
{
SolveTask task( bodyNative, shapeNative, constraintNative, 0, n );
task.m_solveFriction = true;
task.run(0);
}
BufferUtils::unmap<true>( bodyNative, bodyBuf );
BufferUtils::unmap<false>( shapeNative, shapeBuf );
BufferUtils::unmap<false>( constraintNative, (const Buffer<Constraint4>*)constraint );
}
#if 0
static
int createSolveTasks( int batchIdx, Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf, const Buffer<RigidBodyBase::Inertia>* shapeBuf,
SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity )
{
/*
ADLASSERT( (N_SPLIT&1) == 0 );
ADLASSERT( batchIdx < N_BATCHES );
ADLASSERT( data->m_device->m_type == TYPE_HOST );
ADLASSERT( data->m_parallelSolveData );
SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData;
data->m_batchIdx = 0;
const int nx = N_SPLIT/2;
int nTasksCreated = 0;
// for(int ii=0; ii<2; ii++)
for(batchIdx=0; batchIdx<4; batchIdx++)
{
int2 offset = make_int2( batchIdx&1, batchIdx>>1 );
for(int ix=0; ix<nx; ix++) for(int iy=0; iy<nx; iy++)
{
int xIdx = ix*2 + offset.x;
int yIdx = iy*2 + offset.y;
int cellIdx = xIdx+yIdx*N_SPLIT;
int n = solveData->m_n[cellIdx];
int start = solveData->m_offset[cellIdx];
if( n == 0 ) continue;
SolveTask* task = new SolveTask( bodyBuf, shapeBuf, (const Buffer<Constraint4>*)constraint, start, n );
// task->m_solveFriction = (ii==0)? false:true;
tasksOut[nTasksCreated++] = task;
}
}
return nTasksCreated;
*/
ADLASSERT(0);
return 0;
}
#endif
static void convertToConstraints2( Solver<TYPE_HOST>::Data* data, const Buffer<RigidBodyBase::Body>* bodyBuf,
const Buffer<RigidBodyBase::Inertia>* shapeBuf,
Buffer<Contact4>* contactsIn, SolverData contactCOut, void* additionalData,
int nContacts, const Solver<TYPE_HOST>::ConstraintCfg& cfg )
{
ADLASSERT( data->m_device->m_type == TYPE_HOST );
HostBuffer<RigidBodyBase::Body>* bodyNative
= (HostBuffer<RigidBodyBase::Body>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, bodyBuf );
HostBuffer<RigidBodyBase::Inertia>* shapeNative
= (HostBuffer<RigidBodyBase::Inertia>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, shapeBuf );
HostBuffer<Contact4>* contactNative
= (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contactsIn );
HostBuffer<Constraint4>* constraintNative
= (HostBuffer<Constraint4>*)BufferUtils::map<TYPE_HOST, false>( data->m_device, (Buffer<Constraint4>*)contactCOut );
{
#if !defined(_DEBUG)
#pragma omp parallel for
#endif
for(int i=0; i<nContacts; i++)
{
// new (constraintNative+i)Constraint4;
Contact4& contact = (*contactNative)[i];
if( contact.isInvalid() ) continue;
int aIdx = (int)contact.m_bodyAPtr;
int bIdx = (int)contact.m_bodyBPtr;
{
const RigidBodyBase::Body& bodyA = (*bodyNative)[aIdx];
const RigidBodyBase::Body& bodyB = (*bodyNative)[bIdx];
MYF4 posA( bodyA.m_pos );
MYF4 linVelA( bodyA.m_linVel );
MYF4 angVelA( bodyA.m_angVel );
MYF4 posB( bodyB.m_pos );
MYF4 linVelB( bodyB.m_linVel );
MYF4 angVelB( bodyB.m_angVel );
bool aIsInactive = ( isZero( linVelA ) && isZero( angVelA ) );
bool bIsInactive = ( isZero( linVelB ) && isZero( angVelB ) );
SolverInl::setConstraint4( posA, linVelA, angVelA,
//(*bodyNative)[aIdx].m_invMass, (*shapeNative)[aIdx].m_invInertia,
(aIsInactive)? 0.f : (*bodyNative)[aIdx].m_invMass, (aIsInactive)? mtZero() : (*shapeNative)[aIdx].m_invInertia,
posB, linVelB, angVelB,
//(*bodyNative)[bIdx].m_invMass, (*shapeNative)[bIdx].m_invInertia,
(bIsInactive)? 0.f : (*bodyNative)[bIdx].m_invMass, (bIsInactive)? mtZero() : (*shapeNative)[bIdx].m_invInertia,
contact, cfg,
(*constraintNative)[i] );
(*constraintNative)[i].m_batchIdx = contact.getBatchIdx();
}
}
}
BufferUtils::unmap<false>( bodyNative, bodyBuf );
BufferUtils::unmap<false>( shapeNative, shapeBuf );
BufferUtils::unmap<false>( contactNative, contactsIn );
BufferUtils::unmap<true>( constraintNative, (Buffer<Constraint4>*)contactCOut );
}
static void batchContacts2( Solver<TYPE_HOST>::Data* data, Buffer<Contact4>* contacts, int nContacts, Buffer<u32>* n, Buffer<u32>* offsets, int staticIdx )
{
ADLASSERT( data->m_device->m_type == TYPE_HOST );
HostBuffer<Contact4>* contactNative =0;
HostBuffer<u32>* nNative =0;
HostBuffer<u32>* offsetsNative =0;
int sz = sizeof(Contact4);
int sz2 = sizeof(int2);
{
BT_PROFILE("BufferUtils::map");
contactNative = (HostBuffer<Contact4>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, contacts, nContacts );
}
{
BT_PROFILE("BufferUtils::map2");
nNative = (HostBuffer<u32>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, n );
offsetsNative= (HostBuffer<u32>*)BufferUtils::map<TYPE_HOST, true>( data->m_device, offsets );
}
{
BT_PROFILE("sortConstraintByBatch");
int numNonzeroGrid=0;
int maxNumBatches = 0;
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
int n = (*nNative)[i];
int offset = (*offsetsNative)[i];
if( n )
{
numNonzeroGrid++;
int numBatches = SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx,-1 ); // on GPU
maxNumBatches = max(numBatches,maxNumBatches);
// SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx ); // on CPU
}
}
printf("maxNumBatches = %d\n", maxNumBatches);
}
{
BT_PROFILE("BufferUtils::unmap");
BufferUtils::unmap<true>( contactNative, contacts, nContacts );
}
{
BT_PROFILE("BufferUtils::unmap2");
BufferUtils::unmap<false>( nNative, n );
BufferUtils::unmap<false>( offsetsNative, offsets );
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,338 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
#pragma OPENCL EXTENSION cl_amd_printf : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#ifdef cl_ext_atomic_counters_32
#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
#else
#define counter32_t volatile __global int*
#endif
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
#define GET_GROUP_IDX get_group_id(0)
#define GET_LOCAL_IDX get_local_id(0)
#define GET_GLOBAL_IDX get_global_id(0)
#define GET_GROUP_SIZE get_local_size(0)
#define GET_NUM_GROUPS get_num_groups(0)
#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
#define AtomInc(x) atom_inc(&(x))
#define AtomInc1(x, out) out = atom_inc(&(x))
#define AppendInc(x, out) out = atomic_inc(x)
#define AtomAdd(x, value) atom_add(&(x), value)
#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
#define AtomXhg(x, value) atom_xchg ( &(x), value )
#define SELECT_UINT4( b, a, condition ) select( b,a,condition )
#define make_float4 (float4)
#define make_float2 (float2)
#define make_uint4 (uint4)
#define make_int4 (int4)
#define make_uint2 (uint2)
#define make_int2 (int2)
#define max2 max
#define min2 min
#define WG_SIZE 64
typedef struct
{
float4 m_worldPos[4];
float4 m_worldNormal;
u32 m_coeffs;
int m_batchIdx;
u32 m_bodyA;
u32 m_bodyB;
}Contact4;
typedef struct
{
int m_n;
int m_start;
int m_staticIdx;
int m_paddings[1];
} ConstBuffer;
typedef struct
{
u32 m_a;
u32 m_b;
u32 m_idx;
}Elem;
#define STACK_SIZE (WG_SIZE*10)
//#define STACK_SIZE (WG_SIZE)
#define RING_SIZE 1024
#define RING_SIZE_MASK (RING_SIZE-1)
#define CHECK_SIZE (WG_SIZE)
#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)
#define RING_END ldsTmp
u32 readBuf(__local u32* buff, int idx)
{
idx = idx % (32*CHECK_SIZE);
int bitIdx = idx%32;
int bufIdx = idx/32;
return buff[bufIdx] & (1<<bitIdx);
}
void writeBuf(__local u32* buff, int idx)
{
idx = idx % (32*CHECK_SIZE);
int bitIdx = idx%32;
int bufIdx = idx/32;
// buff[bufIdx] |= (1<<bitIdx);
atom_or( &buff[bufIdx], (1<<bitIdx) );
}
u32 tryWrite(__local u32* buff, int idx)
{
idx = idx % (32*CHECK_SIZE);
int bitIdx = idx%32;
int bufIdx = idx/32;
u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );
return ((ans >> bitIdx)&1) == 0;
}
// batching on the GPU
__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut,
__global u32* gN, __global u32* gStart,
ConstBuffer cb )
{
__local u32 ldsStackIdx[STACK_SIZE];
__local u32 ldsStackEnd;
__local Elem ldsRingElem[RING_SIZE];
__local u32 ldsRingEnd;
__local u32 ldsTmp;
__local u32 ldsCheckBuffer[CHECK_SIZE];
__local u32 ldsFixedBuffer[CHECK_SIZE];
__local u32 ldsGEnd;
__local u32 ldsDstEnd;
int wgIdx = GET_GROUP_IDX;
int lIdx = GET_LOCAL_IDX;
const int m_n = gN[wgIdx];
const int m_start = gStart[wgIdx];
const int m_staticIdx = cb.m_staticIdx;
if( lIdx == 0 )
{
ldsRingEnd = 0;
ldsGEnd = 0;
ldsStackEnd = 0;
ldsDstEnd = m_start;
}
// while(1)
for(int ie=0; ie<250; ie++)
{
ldsFixedBuffer[lIdx] = 0;
for(int giter=0; giter<4; giter++)
{
int ringCap = GET_RING_CAPACITY;
// 1. fill ring
if( ldsGEnd < m_n )
{
while( ringCap > WG_SIZE )
{
if( ldsGEnd >= m_n ) break;
if( lIdx < ringCap - WG_SIZE )
{
int srcIdx;
AtomInc1( ldsGEnd, srcIdx );
if( srcIdx < m_n )
{
int dstIdx;
AtomInc1( ldsRingEnd, dstIdx );
int a = gConstraints[m_start+srcIdx].m_bodyA;
int b = gConstraints[m_start+srcIdx].m_bodyB;
ldsRingElem[dstIdx].m_a = (a>b)? b:a;
ldsRingElem[dstIdx].m_b = (a>b)? a:b;
ldsRingElem[dstIdx].m_idx = srcIdx;
}
}
ringCap = GET_RING_CAPACITY;
}
}
GROUP_LDS_BARRIER;
// 2. fill stack
__local Elem* dst = ldsRingElem;
if( lIdx == 0 ) RING_END = 0;
int srcIdx=lIdx;
int end = ldsRingEnd;
{
for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)
{
Elem e;
if(srcIdx<end) e = ldsRingElem[srcIdx];
bool done = (srcIdx<end)?false:true;
for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;
if( !done )
{
int aUsed = readBuf( ldsFixedBuffer, e.m_a);
int bUsed = readBuf( ldsFixedBuffer, e.m_b);
if( aUsed==0 && bUsed==0 )
{
int aAvailable;
int bAvailable;
aAvailable = tryWrite( ldsCheckBuffer, e.m_a );
bAvailable = tryWrite( ldsCheckBuffer, e.m_b );
//aAvailable = (m_staticIdx == e.m_a)? 1: aAvailable;
//bAvailable = (m_staticIdx == e.m_b)? 1: bAvailable;
bool success = (aAvailable && bAvailable);
if(success)
{
writeBuf( ldsFixedBuffer, e.m_a );
writeBuf( ldsFixedBuffer, e.m_b );
}
done = success;
}
}
// put it aside
if(srcIdx<end)
{
if( done )
{
int dstIdx; AtomInc1( ldsStackEnd, dstIdx );
if( dstIdx < STACK_SIZE )
ldsStackIdx[dstIdx] = e.m_idx;
else{
done = false;
AtomAdd( ldsStackEnd, -1 );
}
}
if( !done )
{
int dstIdx; AtomInc1( RING_END, dstIdx );
dst[dstIdx] = e;
}
}
// if filled, flush
if( ldsStackEnd == STACK_SIZE )
{
for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)
{
int idx = m_start + ldsStackIdx[i];
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
gConstraintsOut[ dstIdx ].m_batchIdx = ie;
}
if( lIdx == 0 ) ldsStackEnd = 0;
//for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE)
ldsFixedBuffer[lIdx] = 0;
}
}
}
if( lIdx == 0 ) ldsRingEnd = RING_END;
}
GROUP_LDS_BARRIER;
for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)
{
int idx = m_start + ldsStackIdx[i];
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
gConstraintsOut[ dstIdx ].m_batchIdx = ie;
}
// in case it couldn't consume any pair. Flush them
// todo. Serial batch worth while?
if( ldsStackEnd == 0 )
{
for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)
{
int idx = m_start + ldsRingElem[i].m_idx;
int dstIdx; AtomInc1( ldsDstEnd, dstIdx );
gConstraintsOut[ dstIdx ] = gConstraints[ idx ];
gConstraintsOut[ dstIdx ].m_batchIdx = 100+i;
}
GROUP_LDS_BARRIER;
if( lIdx == 0 ) ldsRingEnd = 0;
}
if( lIdx == 0 ) ldsStackEnd = 0;
GROUP_LDS_BARRIER;
// termination
if( ldsGEnd == m_n && ldsRingEnd == 0 )
break;
}
}

View File

@@ -0,0 +1,371 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Takahiro Harada
static const char* batchingKernelsCL= \
"\n"
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
"\n"
"#ifdef cl_ext_atomic_counters_32\n"
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
"#else\n"
"#define counter32_t volatile __global int*\n"
"#endif\n"
"\n"
"\n"
"typedef unsigned int u32;\n"
"typedef unsigned short u16;\n"
"typedef unsigned char u8;\n"
"\n"
"#define GET_GROUP_IDX get_group_id(0)\n"
"#define GET_LOCAL_IDX get_local_id(0)\n"
"#define GET_GLOBAL_IDX get_global_id(0)\n"
"#define GET_GROUP_SIZE get_local_size(0)\n"
"#define GET_NUM_GROUPS get_num_groups(0)\n"
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
"#define AtomInc(x) atom_inc(&(x))\n"
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
"#define AppendInc(x, out) out = atomic_inc(x)\n"
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
"\n"
"\n"
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
"\n"
"#define make_float4 (float4)\n"
"#define make_float2 (float2)\n"
"#define make_uint4 (uint4)\n"
"#define make_int4 (int4)\n"
"#define make_uint2 (uint2)\n"
"#define make_int2 (int2)\n"
"\n"
"\n"
"#define max2 max\n"
"#define min2 min\n"
"\n"
"\n"
"#define WG_SIZE 64\n"
"\n"
"\n"
"\n"
"typedef struct \n"
"{\n"
" float4 m_worldPos[4];\n"
" float4 m_worldNormal;\n"
" u32 m_coeffs;\n"
" int m_batchIdx;\n"
"\n"
" u32 m_bodyA;\n"
" u32 m_bodyB;\n"
"}Contact4;\n"
"\n"
"typedef struct \n"
"{\n"
" int m_n;\n"
" int m_start;\n"
" int m_staticIdx;\n"
" int m_paddings[1];\n"
"} ConstBuffer;\n"
"\n"
"typedef struct \n"
"{\n"
" u32 m_a;\n"
" u32 m_b;\n"
" u32 m_idx;\n"
"}Elem;\n"
"\n"
"#define STACK_SIZE (WG_SIZE*10)\n"
"//#define STACK_SIZE (WG_SIZE)\n"
"#define RING_SIZE 1024\n"
"#define RING_SIZE_MASK (RING_SIZE-1)\n"
"#define CHECK_SIZE (WG_SIZE)\n"
"\n"
"\n"
"#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n"
"#define RING_END ldsTmp\n"
"\n"
"u32 readBuf(__local u32* buff, int idx)\n"
"{\n"
" idx = idx % (32*CHECK_SIZE);\n"
" int bitIdx = idx%32;\n"
" int bufIdx = idx/32;\n"
" return buff[bufIdx] & (1<<bitIdx);\n"
"}\n"
"\n"
"void writeBuf(__local u32* buff, int idx)\n"
"{\n"
" idx = idx % (32*CHECK_SIZE);\n"
" int bitIdx = idx%32;\n"
" int bufIdx = idx/32;\n"
"// buff[bufIdx] |= (1<<bitIdx);\n"
" atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
"}\n"
"\n"
"u32 tryWrite(__local u32* buff, int idx)\n"
"{\n"
" idx = idx % (32*CHECK_SIZE);\n"
" int bitIdx = idx%32;\n"
" int bufIdx = idx/32;\n"
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
" return ((ans >> bitIdx)&1) == 0;\n"
"}\n"
"\n"
"typedef struct \n"
"{\n"
" int m_valInt0;\n"
" int m_valInt1;\n"
" int m_valInt2;\n"
" int m_valInt3;\n"
"\n"
" int m_valInt4;\n"
" int m_valInt5;\n"
" int m_valInt6;\n"
" int m_valInt7;\n"
"\n"
" int m_valInt8;\n"
" int m_valInt9;\n"
" int m_valInt10;\n"
" int m_valInt11;\n"
" \n"
" int m_valInt12;\n"
" int m_valInt13;\n"
" int m_valInt14;\n"
" int m_valInt15;\n"
"\n"
"\n"
" float m_fval0;\n"
" float m_fval1;\n"
" float m_fval2;\n"
" float m_fval3;\n"
"} SolverDebugInfo;\n"
"\n"
"// batching on the GPU\n"
"__kernel void CreateBatches( __global Contact4* gConstraints, __global Contact4* gConstraintsOut, //__global u32* gRes, \n"
" __global u32* gN, __global u32* gStart, \n"
"// __global SolverDebugInfo* debugInfo, \n"
" ConstBuffer cb )\n"
"{\n"
" __local u32 ldsStackIdx[STACK_SIZE];\n"
" __local u32 ldsStackEnd;\n"
" __local Elem ldsRingElem[RING_SIZE];\n"
" __local u32 ldsRingEnd;\n"
" __local u32 ldsTmp;\n"
" __local u32 ldsCheckBuffer[CHECK_SIZE];\n"
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
" __local u32 ldsGEnd;\n"
" __local u32 ldsDstEnd;\n"
"\n"
" int wgIdx = GET_GROUP_IDX;\n"
" int lIdx = GET_LOCAL_IDX;\n"
" \n"
" const int m_n = gN[wgIdx];\n"
" const int m_start = gStart[wgIdx];\n"
" const int m_staticIdx = cb.m_staticIdx;\n"
" \n"
" if( lIdx == 0 )\n"
" {\n"
" ldsRingEnd = 0;\n"
" ldsGEnd = 0;\n"
" ldsStackEnd = 0;\n"
" ldsDstEnd = m_start;\n"
" }\n"
" \n"
"// while(1)\n"
" for(int ie=0; ie<250; ie++)\n"
" {\n"
" ldsFixedBuffer[lIdx] = 0;\n"
"\n"
" for(int giter=0; giter<4; giter++)\n"
" {\n"
" int ringCap = GET_RING_CAPACITY;\n"
" \n"
" // 1. fill ring\n"
" if( ldsGEnd < m_n )\n"
" {\n"
" while( ringCap > WG_SIZE )\n"
" {\n"
" if( ldsGEnd >= m_n ) break;\n"
" if( lIdx < ringCap - WG_SIZE )\n"
" {\n"
" int srcIdx;\n"
" AtomInc1( ldsGEnd, srcIdx );\n"
" if( srcIdx < m_n )\n"
" {\n"
" int dstIdx;\n"
" AtomInc1( ldsRingEnd, dstIdx );\n"
" \n"
" int a = gConstraints[m_start+srcIdx].m_bodyA;\n"
" int b = gConstraints[m_start+srcIdx].m_bodyB;\n"
" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n"
" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n"
" ldsRingElem[dstIdx].m_idx = srcIdx;\n"
" }\n"
" }\n"
" ringCap = GET_RING_CAPACITY;\n"
" }\n"
" }\n"
"\n"
" GROUP_LDS_BARRIER;\n"
" \n"
" // 2. fill stack\n"
" __local Elem* dst = ldsRingElem;\n"
" if( lIdx == 0 ) RING_END = 0;\n"
"\n"
" int srcIdx=lIdx;\n"
" int end = ldsRingEnd;\n"
"\n"
" {\n"
" for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n"
" {\n"
" Elem e;\n"
" if(srcIdx<end) e = ldsRingElem[srcIdx];\n"
" bool done = (srcIdx<end)?false:true;\n"
"\n"
" for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n"
" \n"
" if( !done )\n"
" {\n"
" int aUsed = readBuf( ldsFixedBuffer, e.m_a);\n"
" int bUsed = readBuf( ldsFixedBuffer, e.m_b);\n"
"\n"
" if( aUsed==0 && bUsed==0 )\n"
" {\n"
" int aAvailable;\n"
" int bAvailable;\n"
"\n"
" aAvailable = tryWrite( ldsCheckBuffer, e.m_a );\n"
" bAvailable = tryWrite( ldsCheckBuffer, e.m_b );\n"
"\n"
" //aAvailable = (m_staticIdx == e.m_a)? 1: aAvailable;\n"
" //bAvailable = (m_staticIdx == e.m_b)? 1: bAvailable;\n"
"\n"
" bool success = (aAvailable && bAvailable);\n"
" if(success)\n"
" {\n"
" writeBuf( ldsFixedBuffer, e.m_a );\n"
" writeBuf( ldsFixedBuffer, e.m_b );\n"
" }\n"
" done = success;\n"
" }\n"
" }\n"
"\n"
" // put it aside\n"
" if(srcIdx<end)\n"
" {\n"
" if( done )\n"
" {\n"
" int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n"
" if( dstIdx < STACK_SIZE )\n"
" ldsStackIdx[dstIdx] = e.m_idx;\n"
" else{\n"
" done = false;\n"
" AtomAdd( ldsStackEnd, -1 );\n"
" }\n"
" }\n"
" if( !done )\n"
" {\n"
" int dstIdx; AtomInc1( RING_END, dstIdx );\n"
" dst[dstIdx] = e;\n"
" }\n"
" }\n"
"\n"
" // if filled, flush\n"
" if( ldsStackEnd == STACK_SIZE )\n"
" {\n"
" for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n"
" {\n"
" int idx = m_start + ldsStackIdx[i];\n"
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
" }\n"
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
"\n"
" //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n"
" ldsFixedBuffer[lIdx] = 0;\n"
" }\n"
" }\n"
" }\n"
"\n"
" if( lIdx == 0 ) ldsRingEnd = RING_END;\n"
" }\n"
"\n"
" GROUP_LDS_BARRIER;\n"
"\n"
" for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n"
" {\n"
" int idx = m_start + ldsStackIdx[i];\n"
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n"
" }\n"
"\n"
" // in case it couldn't consume any pair. Flush them\n"
" // todo. Serial batch worth while?\n"
" if( ldsStackEnd == 0 )\n"
" {\n"
" for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n"
" {\n"
" int idx = m_start + ldsRingElem[i].m_idx;\n"
" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n"
" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n"
" gConstraintsOut[ dstIdx ].m_batchIdx = 100+i;\n"
" }\n"
" GROUP_LDS_BARRIER;\n"
" if( lIdx == 0 ) ldsRingEnd = 0;\n"
" }\n"
"\n"
" if( lIdx == 0 ) ldsStackEnd = 0;\n"
"\n"
" GROUP_LDS_BARRIER;\n"
"\n"
" // termination\n"
" if( ldsGEnd == m_n && ldsRingEnd == 0 )\n"
" break;\n"
" }\n"
"\n"
"\n"
"}\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
"\n"
;

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env python
import sys
import os
import shutil
arg = sys.argv[1]
fh = open(arg)
print 'static const char* '+sys.argv[2]+'= \\'
for line in fh.readlines():
a = line.strip('\n')
print '"'+a+'\\n"'
print ';'

View File

@@ -0,0 +1,6 @@
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
@echo Warning:
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
pause

View File

@@ -0,0 +1,10 @@
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h
stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h
@echo Warning:
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
pause

View File

@@ -0,0 +1,8 @@
stringify.py batchingKernels.cl batchingKernelsCL >batchingKernels.h
@echo Warning:
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
pause

View File

@@ -0,0 +1,8 @@
stringify.py ChNarrowphaseKernels.cl narrowphaseKernelsCL >ChNarrowphaseKernels.h
@echo Warning:
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
pause

View File

@@ -0,0 +1,8 @@
stringify.py SolverKernels.cl solverKernelsCL >SolverKernels.h
@echo Warning:
@echo You might still need to find/replace for \\n (due to macros) and replace #include statements by their content
pause

View File

@@ -0,0 +1,77 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "BasicDemo.h"
#include "GlutStuff.h"
#include "btBulletDynamicsCommon.h"
#include "LinearMath/btHashMap.h"
#ifdef CL_PLATFORM_AMD
#include "../../opencl/basic_initialize/btOpenCLUtils.h"
extern cl_context g_cxMainContext;
extern cl_command_queue g_cqCommandQue;
extern cl_device_id g_clDevice;
#endif
int main(int argc,char** argv)
{
#ifdef CL_PLATFORM_AMD
int ciErrNum = 0;
const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
cl_device_type deviceType = CL_DEVICE_TYPE_GPU;//CPU;//GPU;
void* glCtx=0;
void* glDC = 0;
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
if (numDev>0)
{
int deviceIndex =0;
g_clDevice = btOpenCLUtils::getDevice(g_cxMainContext,deviceIndex);
btOpenCLDeviceInfo clInfo;
btOpenCLUtils::getDeviceInfo(g_clDevice,clInfo);
btOpenCLUtils::printDeviceInfo(g_clDevice);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_clDevice, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
#endif //#ifdef CL_PLATFORM_AMD
BasicDemo ccdDemo;
ccdDemo.initPhysics();
#ifdef CHECK_MEMORY_LEAKS
ccdDemo.exitPhysics();
#else
glutmain(argc, argv,1024,600,"Bullet Physics Demo. http://bulletphysics.org",&ccdDemo);
#endif
//setupGUI(1024,768);
glutMainLoop();
//default glut doesn't return from mainloop
return 0;
}

View File

@@ -0,0 +1,34 @@
-- include "AMD"
if os.is("Windows") then
project "basic_bullet2_demo"
language "C++"
kind "ConsoleApp"
targetdir "../../bin"
includedirs {
".",
"../../bullet2",
"../testbed",
"../../rendering/Gwen",
}
links { "testbed",
"bullet2",
"gwen"
}
initOpenGL()
initGlut()
files {
"**.cpp",
"**.h"
}
end

View File

@@ -0,0 +1,88 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEBUG_CAST_RESULT_H
#define DEBUG_CAST_RESULT_H
#include "BulletCollision/NarrowPhaseCollision/btConvexCast.h"
#include "LinearMath/btTransform.h"
#include "GL_ShapeDrawer.h"
#include "GlutStuff.h"
#ifdef WIN32
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#endif
struct btDebugCastResult : public btConvexCast::CastResult
{
btTransform m_fromTrans;
const btPolyhedralConvexShape* m_shape;
btVector3 m_linVel;
btVector3 m_angVel;
GL_ShapeDrawer* m_shapeDrawer;
btDebugCastResult(const btTransform& fromTrans,const btPolyhedralConvexShape* shape,
const btVector3& linVel,const btVector3& angVel,GL_ShapeDrawer* drawer)
:m_fromTrans(fromTrans),
m_shape(shape),
m_linVel(linVel),
m_angVel(angVel),
m_shapeDrawer(drawer)
{
}
virtual void drawCoordSystem(const btTransform& tr)
{
btScalar m[16];
tr.getOpenGLMatrix(m);
glPushMatrix();
btglLoadMatrix(m);
glBegin(GL_LINES);
btglColor3(1, 0, 0);
btglVertex3(0, 0, 0);
btglVertex3(1, 0, 0);
btglColor3(0, 1, 0);
btglVertex3(0, 0, 0);
btglVertex3(0, 1, 0);
btglColor3(0, 0, 1);
btglVertex3(0, 0, 0);
btglVertex3(0, 0, 1);
glEnd();
glPopMatrix();
}
virtual void DebugDraw(btScalar fraction)
{
btVector3 worldBoundsMin(-1000,-1000,-1000);
btVector3 worldBoundsMax(1000,1000,1000);
btScalar m[16];
btTransform hitTrans;
btTransformUtil::integrateTransform(m_fromTrans,m_linVel,m_angVel,fraction,hitTrans);
hitTrans.getOpenGLMatrix(m);
if (m_shapeDrawer)
m_shapeDrawer->drawOpenGL(m,m_shape,btVector3(1,0,0),btIDebugDraw::DBG_NoDebug,worldBoundsMin,worldBoundsMax);
}
};
#endif //DEBUG_CAST_RESULT_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef DEMO_APPLICATION_H
#define DEMO_APPLICATION_H
#include "GlutStuff.h"
#include "GL_ShapeDrawer.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "LinearMath/btVector3.h"
#include "LinearMath/btMatrix3x3.h"
#include "LinearMath/btTransform.h"
#include "LinearMath/btQuickprof.h"
#include "LinearMath/btAlignedObjectArray.h"
class btCollisionShape;
class btDynamicsWorld;
class btRigidBody;
class btTypedConstraint;
class DemoApplication
{
protected:
void displayProfileString(int xOffset,int yStart,char* message);
class CProfileIterator* m_profileIterator;
protected:
#ifdef USE_BT_CLOCK
btClock m_clock;
#endif //USE_BT_CLOCK
///this is the most important class
btDynamicsWorld* m_dynamicsWorld;
///constraint for mouse picking
btTypedConstraint* m_pickConstraint;
virtual void removePickingConstraint();
btCollisionShape* m_shootBoxShape;
float m_cameraDistance;
int m_debugMode;
float m_ele;
float m_azi;
btVector3 m_cameraPosition;
btVector3 m_cameraTargetPosition;//look at
int m_mouseOldX;
int m_mouseOldY;
int m_mouseButtons;
public:
int m_modifierKeys;
protected:
float m_scaleBottom;
float m_scaleFactor;
btVector3 m_cameraUp;
int m_forwardAxis;
int m_glutScreenWidth;
int m_glutScreenHeight;
float m_frustumZNear;
float m_frustumZFar;
int m_ortho;
float m_ShootBoxInitialSpeed;
bool m_stepping;
bool m_singleStep;
bool m_idle;
int m_lastKey;
virtual float showProfileInfo(int& xOffset,int& yStart, int yIncr);
void renderscene(int pass);
GL_ShapeDrawer* m_shapeDrawer;
bool m_enableshadows;
btVector3 m_sundirection;
btScalar m_defaultContactProcessingThreshold;
public:
DemoApplication();
virtual ~DemoApplication();
btDynamicsWorld* getDynamicsWorld()
{
return m_dynamicsWorld;
}
virtual void initPhysics() = 0;
virtual void setDrawClusters(bool drawClusters)
{
}
void overrideGLShapeDrawer (GL_ShapeDrawer* shapeDrawer);
void setOrthographicProjection();
void resetPerspectiveProjection();
bool setTexturing(bool enable) { return(m_shapeDrawer->enableTexture(enable)); }
bool setShadows(bool enable) { bool p=m_enableshadows;m_enableshadows=enable;return(p); }
bool getTexturing() const
{
return m_shapeDrawer->hasTextureEnabled();
}
bool getShadows() const
{
return m_enableshadows;
}
int getDebugMode()
{
return m_debugMode ;
}
void setDebugMode(int mode);
void setAzi(float azi)
{
m_azi = azi;
}
void setCameraUp(const btVector3& camUp)
{
m_cameraUp = camUp;
}
void setCameraForwardAxis(int axis)
{
m_forwardAxis = axis;
}
virtual void myinit();
void toggleIdle();
virtual void updateCamera();
btVector3 getCameraPosition()
{
return m_cameraPosition;
}
btVector3 getCameraTargetPosition()
{
return m_cameraTargetPosition;
}
btScalar getDeltaTimeMicroseconds()
{
#ifdef USE_BT_CLOCK
btScalar dt = (btScalar)m_clock.getTimeMicroseconds();
m_clock.reset();
return dt;
#else
return btScalar(16666.);
#endif
}
void setFrustumZPlanes(float zNear, float zFar)
{
m_frustumZNear = zNear;
m_frustumZFar = zFar;
}
///glut callbacks
float getCameraDistance();
void setCameraDistance(float dist);
void moveAndDisplay();
virtual void clientMoveAndDisplay() = 0;
virtual void clientResetScene();
///Demo functions
virtual void setShootBoxShape ();
virtual void shootBox(const btVector3& destination);
btVector3 getRayTo(int x,int y);
btRigidBody* localCreateRigidBody(float mass, const btTransform& startTransform,btCollisionShape* shape);
///callback methods by glut
virtual void keyboardCallback(unsigned char key, int x, int y);
virtual void keyboardUpCallback(unsigned char key, int x, int y) {}
virtual void specialKeyboard(int key, int x, int y){}
virtual void specialKeyboardUp(int key, int x, int y){}
virtual void reshape(int w, int h);
virtual void mouseFunc(int button, int state, int x, int y);
virtual void mouseMotionFunc(int x,int y);
virtual void displayCallback();
virtual void renderme();
virtual void swapBuffers() = 0;
virtual void updateModifierKeys() = 0;
void stepLeft();
void stepRight();
void stepFront();
void stepBack();
void zoomIn();
void zoomOut();
bool isIdle() const
{
return m_idle;
}
void setIdle(bool idle)
{
m_idle = idle;
}
};
#endif //DEMO_APPLICATION_H

View File

@@ -0,0 +1,139 @@
#include "GLDebugDrawer.h"
#include "GLDebugFont.h"
#include "GlutStuff.h"
#include <stdio.h> //printf debugging
GLDebugDrawer::GLDebugDrawer()
:m_debugMode(0)
{
}
void GLDebugDrawer::drawLine(const btVector3& from,const btVector3& to,const btVector3& fromColor, const btVector3& toColor)
{
glBegin(GL_LINES);
glColor3f(fromColor.getX(), fromColor.getY(), fromColor.getZ());
glVertex3d(from.getX(), from.getY(), from.getZ());
glColor3f(toColor.getX(), toColor.getY(), toColor.getZ());
glVertex3d(to.getX(), to.getY(), to.getZ());
glEnd();
}
void GLDebugDrawer::drawLine(const btVector3& from,const btVector3& to,const btVector3& color)
{
drawLine(from,to,color,color);
}
void GLDebugDrawer::drawSphere (const btVector3& p, btScalar radius, const btVector3& color)
{
glColor4f (color.getX(), color.getY(), color.getZ(), btScalar(1.0f));
glPushMatrix ();
glTranslatef (p.getX(), p.getY(), p.getZ());
int lats = 5;
int longs = 5;
int i, j;
for(i = 0; i <= lats; i++) {
btScalar lat0 = SIMD_PI * (-btScalar(0.5) + (btScalar) (i - 1) / lats);
btScalar z0 = radius*sin(lat0);
btScalar zr0 = radius*cos(lat0);
btScalar lat1 = SIMD_PI * (-btScalar(0.5) + (btScalar) i / lats);
btScalar z1 = radius*sin(lat1);
btScalar zr1 = radius*cos(lat1);
glBegin(GL_QUAD_STRIP);
for(j = 0; j <= longs; j++) {
btScalar lng = 2 * SIMD_PI * (btScalar) (j - 1) / longs;
btScalar x = cos(lng);
btScalar y = sin(lng);
glNormal3f(x * zr0, y * zr0, z0);
glVertex3f(x * zr0, y * zr0, z0);
glNormal3f(x * zr1, y * zr1, z1);
glVertex3f(x * zr1, y * zr1, z1);
}
glEnd();
}
glPopMatrix();
}
void GLDebugDrawer::drawBox (const btVector3& boxMin, const btVector3& boxMax, const btVector3& color, btScalar alpha)
{
btVector3 halfExtent = (boxMax - boxMin) * btScalar(0.5f);
btVector3 center = (boxMax + boxMin) * btScalar(0.5f);
//glEnable(GL_BLEND); // Turn blending On
//glBlendFunc(GL_SRC_ALPHA, GL_ONE);
glColor4f (color.getX(), color.getY(), color.getZ(), alpha);
glPushMatrix ();
glTranslatef (center.getX(), center.getY(), center.getZ());
glScaled(2*halfExtent[0], 2*halfExtent[1], 2*halfExtent[2]);
// glutSolidCube(1.0);
glPopMatrix ();
//glDisable(GL_BLEND);
}
void GLDebugDrawer::drawTriangle(const btVector3& a,const btVector3& b,const btVector3& c,const btVector3& color,btScalar alpha)
{
// if (m_debugMode > 0)
{
const btVector3 n=btCross(b-a,c-a).normalized();
glBegin(GL_TRIANGLES);
glColor4f(color.getX(), color.getY(), color.getZ(),alpha);
glNormal3d(n.getX(),n.getY(),n.getZ());
glVertex3d(a.getX(),a.getY(),a.getZ());
glVertex3d(b.getX(),b.getY(),b.getZ());
glVertex3d(c.getX(),c.getY(),c.getZ());
glEnd();
}
}
void GLDebugDrawer::setDebugMode(int debugMode)
{
m_debugMode = debugMode;
}
void GLDebugDrawer::draw3dText(const btVector3& location,const char* textString)
{
glRasterPos3f(location.x(), location.y(), location.z());
//BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),textString);
}
void GLDebugDrawer::reportErrorWarning(const char* warningString)
{
printf("%s\n",warningString);
}
void GLDebugDrawer::drawContactPoint(const btVector3& pointOnB,const btVector3& normalOnB,btScalar distance,int lifeTime,const btVector3& color)
{
{
btVector3 to=pointOnB+normalOnB*1;//distance;
const btVector3&from = pointOnB;
glColor4f(color.getX(), color.getY(), color.getZ(),1.f);
//glColor4f(0,0,0,1.f);
glBegin(GL_LINES);
glVertex3d(from.getX(), from.getY(), from.getZ());
glVertex3d(to.getX(), to.getY(), to.getZ());
glEnd();
// glRasterPos3f(from.x(), from.y(), from.z());
// char buf[12];
// sprintf(buf," %d",lifeTime);
//BMF_DrawString(BMF_GetFont(BMF_kHelvetica10),buf);
}
}

View File

@@ -0,0 +1,38 @@
#ifndef GL_DEBUG_DRAWER_H
#define GL_DEBUG_DRAWER_H
#include "LinearMath/btIDebugDraw.h"
class GLDebugDrawer : public btIDebugDraw
{
int m_debugMode;
public:
GLDebugDrawer();
virtual void drawLine(const btVector3& from,const btVector3& to,const btVector3& fromColor, const btVector3& toColor);
virtual void drawLine(const btVector3& from,const btVector3& to,const btVector3& color);
virtual void drawSphere (const btVector3& p, btScalar radius, const btVector3& color);
virtual void drawBox (const btVector3& boxMin, const btVector3& boxMax, const btVector3& color, btScalar alpha);
virtual void drawTriangle(const btVector3& a,const btVector3& b,const btVector3& c,const btVector3& color,btScalar alpha);
virtual void drawContactPoint(const btVector3& PointOnB,const btVector3& normalOnB,btScalar distance,int lifeTime,const btVector3& color);
virtual void reportErrorWarning(const char* warningString);
virtual void draw3dText(const btVector3& location,const char* textString);
virtual void setDebugMode(int debugMode);
virtual int getDebugMode() const { return m_debugMode;}
};
#endif//GL_DEBUG_DRAWER_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_DEBUG_FONT_H
#define BT_DEBUG_FONT_H
#include "LinearMath/btVector3.h"
void GLDebugDrawStringInternal(int x,int y,const char* string,const btVector3& rgb, bool enableBlend, int spacing);
void GLDebugDrawStringInternal(int x,int y,const char* string,const btVector3& rgb);
void GLDebugDrawString(int x,int y,const char* string);
void GLDebugResetFont(int screenWidth,int screenHeight);
#endif //BT_DEBUG_FONT_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,70 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef GL_SHAPE_DRAWER_H
#define GL_SHAPE_DRAWER_H
class btCollisionShape;
class btShapeHull;
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btVector3.h"
#include "BulletCollision/CollisionShapes/btShapeHull.h"
/// OpenGL shape drawing
class GL_ShapeDrawer
{
protected:
struct ShapeCache
{
struct Edge { btVector3 n[2];int v[2]; };
ShapeCache(btConvexShape* s) : m_shapehull(s) {}
btShapeHull m_shapehull;
btAlignedObjectArray<Edge> m_edges;
};
//clean-up memory of dynamically created shape hulls
btAlignedObjectArray<ShapeCache*> m_shapecaches;
unsigned int m_texturehandle;
bool m_textureenabled;
bool m_textureinitialized;
ShapeCache* cache(btConvexShape*);
public:
GL_ShapeDrawer();
virtual ~GL_ShapeDrawer();
///drawOpenGL might allocate temporary memoty, stores pointer in shape userpointer
virtual void drawOpenGL(btScalar* m, const btCollisionShape* shape, const btVector3& color,int debugMode,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax);
virtual void drawShadow(btScalar* m, const btVector3& extrusion,const btCollisionShape* shape,const btVector3& worldBoundsMin,const btVector3& worldBoundsMax);
bool enableTexture(bool enable) { bool p=m_textureenabled;m_textureenabled=enable;return(p); }
bool hasTextureEnabled() const
{
return m_textureenabled;
}
static void drawCylinder(float radius,float halfHeight, int upAxis);
void drawSphere(btScalar r, int lats, int longs);
static void drawCoordSystem();
};
void OGL_displaylist_register_shape(btCollisionShape * shape);
void OGL_displaylist_clean();
#endif //GL_SHAPE_DRAWER_H

View File

@@ -0,0 +1,76 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "GL_Simplex1to4.h"
#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
#include "GL_ShapeDrawer.h"
#ifdef _WIN32
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#endif
#include "GlutStuff.h"
#include "LinearMath/btTransform.h"
GL_Simplex1to4::GL_Simplex1to4()
:m_simplexSolver(0)
{
}
///
/// Debugging method calcClosest calculates the closest point to the origin, using m_simplexSolver
///
void GL_Simplex1to4::calcClosest(btScalar* m)
{
btTransform tr;
tr.setFromOpenGLMatrix(m);
GL_ShapeDrawer::drawCoordSystem();
if (m_simplexSolver)
{
m_simplexSolver->reset();
bool res;
btVector3 v;
for (int i=0;i<m_numVertices;i++)
{
v = tr(m_vertices[i]);
m_simplexSolver->addVertex(v,v,btVector3(0.f,0.f,0.f));
res = m_simplexSolver->closest(v);
}
//draw v?
glDisable(GL_LIGHTING);
glBegin(GL_LINES);
btglColor3(1.f, 0.f, 0.f);
btglVertex3(0.f, 0.f, 0.f);
btglVertex3(v.x(),v.y(),v.z());
glEnd();
glEnable(GL_LIGHTING);
}
}

View File

@@ -0,0 +1,40 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef GL_SIMPLEX_1TO4_H
#define GL_SIMPLEX_1TO4_H
#include "BulletCollision/CollisionShapes/btTetrahedronShape.h"
#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
///GL_Simplex1to4 is a class to debug a Simplex Solver with 1 to 4 points.
///Can be used by GJK.
class GL_Simplex1to4 : public btBU_Simplex1to4
{
btSimplexSolverInterface* m_simplexSolver;
public:
GL_Simplex1to4();
void calcClosest(btScalar* m);
void setSimplexSolver(btSimplexSolverInterface* simplexSolver) {
m_simplexSolver = simplexSolver;
}
};
#endif //GL_SIMPLEX_1TO4_H

View File

@@ -0,0 +1,87 @@
#ifndef _WINDOWS
#include "GlutDemoApplication.h"
#include "GlutStuff.h"
#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h"
#include "BulletDynamics/Dynamics/btRigidBody.h"
void GlutDemoApplication::updateModifierKeys()
{
m_modifierKeys = 0;
if (glutGetModifiers() & GLUT_ACTIVE_ALT)
m_modifierKeys |= BT_ACTIVE_ALT;
if (glutGetModifiers() & GLUT_ACTIVE_CTRL)
m_modifierKeys |= BT_ACTIVE_CTRL;
if (glutGetModifiers() & GLUT_ACTIVE_SHIFT)
m_modifierKeys |= BT_ACTIVE_SHIFT;
}
void GlutDemoApplication::specialKeyboard(int key, int x, int y)
{
(void)x;
(void)y;
switch (key)
{
case GLUT_KEY_F1:
{
break;
}
case GLUT_KEY_F2:
{
break;
}
case GLUT_KEY_END:
{
int numObj = getDynamicsWorld()->getNumCollisionObjects();
if (numObj)
{
btCollisionObject* obj = getDynamicsWorld()->getCollisionObjectArray()[numObj-1];
getDynamicsWorld()->removeCollisionObject(obj);
btRigidBody* body = btRigidBody::upcast(obj);
if (body && body->getMotionState())
{
delete body->getMotionState();
}
delete obj;
}
break;
}
case GLUT_KEY_LEFT : stepLeft(); break;
case GLUT_KEY_RIGHT : stepRight(); break;
case GLUT_KEY_UP : stepFront(); break;
case GLUT_KEY_DOWN : stepBack(); break;
case GLUT_KEY_PAGE_UP : zoomIn(); break;
case GLUT_KEY_PAGE_DOWN : zoomOut(); break;
case GLUT_KEY_HOME : toggleIdle(); break;
default:
// std::cout << "unused (special) key : " << key << std::endl;
break;
}
glutPostRedisplay();
}
void GlutDemoApplication::swapBuffers()
{
glutSwapBuffers();
}
#endif //_WINDOWS

View File

@@ -0,0 +1,34 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef GLUT_DEMO_APPLICATION_H
#define GLUT_DEMO_APPLICATION_H
#include "DemoApplication.h"
class GlutDemoApplication : public DemoApplication
{
public:
void specialKeyboard(int key, int x, int y);
virtual void swapBuffers();
virtual void updateModifierKeys();
};
#endif //GLUT_DEMO_APPLICATION_H

View File

@@ -0,0 +1,119 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _WINDOWS
#include "DemoApplication.h"
//glut is C code, this global gDemoApplication links glut to the C++ demo
static DemoApplication* gDemoApplication = 0;
#include "GlutStuff.h"
static void glutKeyboardCallback(unsigned char key, int x, int y)
{
gDemoApplication->keyboardCallback(key,x,y);
}
static void glutKeyboardUpCallback(unsigned char key, int x, int y)
{
gDemoApplication->keyboardUpCallback(key,x,y);
}
static void glutSpecialKeyboardCallback(int key, int x, int y)
{
gDemoApplication->specialKeyboard(key,x,y);
}
static void glutSpecialKeyboardUpCallback(int key, int x, int y)
{
gDemoApplication->specialKeyboardUp(key,x,y);
}
static void glutReshapeCallback(int w, int h)
{
gDemoApplication->reshape(w,h);
}
static void glutMoveAndDisplayCallback()
{
gDemoApplication->moveAndDisplay();
}
static void glutMouseFuncCallback(int button, int state, int x, int y)
{
gDemoApplication->mouseFunc(button,state,x,y);
}
static void glutMotionFuncCallback(int x,int y)
{
gDemoApplication->mouseMotionFunc(x,y);
}
static void glutDisplayCallback(void)
{
gDemoApplication->displayCallback();
}
int glutmain(int argc, char **argv,int width,int height,const char* title,DemoApplication* demoApp) {
gDemoApplication = demoApp;
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA | GLUT_DEPTH | GLUT_STENCIL);
glutInitWindowPosition(0, 0);
glutInitWindowSize(width, height);
glutCreateWindow(title);
#ifdef BT_USE_FREEGLUT
glutSetOption (GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_GLUTMAINLOOP_RETURNS);
#endif
gDemoApplication->myinit();
glutKeyboardFunc(glutKeyboardCallback);
glutKeyboardUpFunc(glutKeyboardUpCallback);
glutSpecialFunc(glutSpecialKeyboardCallback);
glutSpecialUpFunc(glutSpecialKeyboardUpCallback);
glutReshapeFunc(glutReshapeCallback);
//createMenu();
glutIdleFunc(glutMoveAndDisplayCallback);
glutMouseFunc(glutMouseFuncCallback);
glutPassiveMotionFunc(glutMotionFuncCallback);
glutMotionFunc(glutMotionFuncCallback);
glutDisplayFunc( glutDisplayCallback );
glutMoveAndDisplayCallback();
//enable vsync to avoid tearing on Apple (todo: for Windows)
#if defined(__APPLE__) && !defined (VMDMESA)
int swap_interval = 1;
CGLContextObj cgl_context = CGLGetCurrentContext();
CGLSetParameter(cgl_context, kCGLCPSwapInterval, &swap_interval);
#endif
return 0;
}
#endif //_WINDOWS

View File

@@ -0,0 +1,86 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2012 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef GLUT_STUFF_H
#define GLUT_STUFF_H
#ifdef _WIN32//for glut.h
#include <windows.h>
#endif
//think different
#if defined(__APPLE__) && !defined (VMDMESA)
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#include <GLUT/glut.h>
#else
#ifdef _WINDOWS
#include <windows.h>
#include <GL/gl.h>
#include <GL/glu.h>
#else
#include <GL/gl.h>
#include <GL/glut.h>
#endif //_WINDOWS
#endif //APPLE
#ifdef _WINDOWS
#define BT_ACTIVE_ALT VK_LMENU
#else
#define BT_KEY_K 'k'
#define BT_KEY_LEFT GLUT_KEY_LEFT
#define BT_KEY_RIGHT GLUT_KEY_RIGHT
#define BT_KEY_UP GLUT_KEY_UP
#define BT_KEY_DOWN GLUT_KEY_DOWN
#define BT_KEY_F1 GLUT_KEY_F1
#define BT_KEY_F2 GLUT_KEY_F2
#define BT_KEY_F3 GLUT_KEY_F3
#define BT_KEY_F4 GLUT_KEY_F4
#define BT_KEY_F5 GLUT_KEY_F5
#define BT_KEY_PAGEUP GLUT_KEY_PAGE_UP
#define BT_KEY_PAGEDOWN GLUT_KEY_PAGE_DOWN
#define BT_KEY_END GLUT_KEY_END
#define BT_KEY_HOME GLUT_KEY_HOME
#define BT_ACTIVE_ALT GLUT_ACTIVE_ALT
#define BT_ACTIVE_CTRL GLUT_ACTIVE_ALT
#define BT_ACTIVE_SHIFT GLUT_ACTIVE_SHIFT
#endif
#if BT_USE_FREEGLUT
#include "GL/freeglut_ext.h" //to be able to return from glutMainLoop()
#endif
class DemoApplication;
int glutmain(int argc, char **argv,int width,int height,const char* title,DemoApplication* demoApp);
#if defined(BT_USE_DOUBLE_PRECISION)
#define btglLoadMatrix glLoadMatrixd
#define btglMultMatrix glMultMatrixd
#define btglColor3 glColor3d
#define btglVertex3 glVertex3d
#else
#define btglLoadMatrix glLoadMatrixf
#define btglMultMatrix glMultMatrixf
#define btglColor3 glColor3f
#define btglVertex3 glVertex3d
#endif
#endif //GLUT_STUFF_H

View File

@@ -0,0 +1,86 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "RenderTexture.h"
#include <memory.h>
renderTexture::renderTexture(int width,int height)
:m_height(height),m_width(width)
{
m_buffer = new unsigned char[m_width*m_height*4];
//clear screen
memset(m_buffer,0,m_width*m_height*4);
//clear screen version 2
for (int x=0;x<m_width;x++)
{
for (int y=0;y<m_height;y++)
{
setPixel(x,y,btVector4(float(x),float(y),0.f,1.f));
}
}
}
void renderTexture::grapicalPrintf(char* str, void* fontData, int rasterposx,int rasterposy)
{
unsigned char c;
int x=0;
int xx=0;
while ((c = (unsigned char) *str++)) {
x=xx;
unsigned char* fontPtr = (unsigned char*) fontData;
char ch = c-32;
int sx=ch%16;
int sy=ch/16;
for (int i=sx*16;i<(sx*16+16);i++)
{
int y=0;
for (int j=sy*16;j<(sy*16+16);j++)
{
unsigned char packedColor = (fontPtr[i*3+255*256*3-(256*j)*3]);
//float colorf = packedColor ? 0.f : 1.f;
float colorf = packedColor/255.f;// ? 0.f : 1.f;
btVector4 rgba(colorf,colorf,colorf,1.f);
//if (colorf)
{
//setPixel(rasterposx+x,rasterposy+y,rgba);
addPixel(rasterposx+x,rasterposy+y,rgba);
}
//bit >>=1;
y++;
}
x++;
}
//xx+=16;
xx+=10;
}
}
renderTexture::~renderTexture()
{
delete [] m_buffer;
}

View File

@@ -0,0 +1,73 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef RENDER_TEXTURE_H
#define RENDER_TEXTURE_H
#include "LinearMath/btVector3.h"
#include "GLDebugFont.h"
///
///renderTexture provides a software-render context (setpixel/printf)
///
class renderTexture
{
int m_height;
int m_width;
unsigned char* m_buffer;
public:
renderTexture(int width,int height);
~renderTexture();
///rgba input is in range [0..1] for each component
inline void setPixel(int x,int y,const btVector4& rgba)
{
unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4];
pixel[0] = (unsigned char)(255.*rgba.getX());
pixel[1] = (unsigned char)(255.*rgba.getY());
pixel[2] = (unsigned char)(255.*rgba.getZ());
pixel[3] = (unsigned char)(255.*rgba.getW());
}
inline void addPixel(int x,int y,const btVector4& rgba)
{
unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4];
pixel[0] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[0] + btScalar(255.f)*rgba.getX()));
pixel[1] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[1] + btScalar(255.f)*rgba.getY()));
pixel[2] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[2] + btScalar(255.f)*rgba.getZ()));
// pixel[3] = (unsigned char)btMin(btScalar(255.f),((btScalar)pixel[3] + btScalar(255.f)*rgba.getW()));
}
inline btVector4 getPixel(int x,int y)
{
unsigned char* pixel = &m_buffer[ (x+y*m_width) * 4];
return btVector4(pixel[0]*1.f/255.f,
pixel[1]*1.f/255.f,
pixel[2]*1.f/255.f,
pixel[3]*1.f/255.f);
}
const unsigned char* getBuffer() const { return m_buffer;}
int getWidth() const { return m_width;}
int getHeight() const { return m_height;}
void grapicalPrintf(char* str, void* fontData, int startx = 0,int starty=0);
};
#endif //RENDER_TEXTURE_H

View File

@@ -0,0 +1,405 @@
#ifdef _WINDOWS
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2010 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <windows.h>
#include <gl/gl.h>
#include "DemoApplication.h"
#include "GLDebugDrawer.h"
#include "GLDebugFont.h"
#include "BulletDynamics/Dynamics/btDynamicsWorld.h"
/// This Win32AppMain is shared code between all demos.
/// The actual demo, derived from DemoApplication is created using 'createDemo', in a separate .cpp file
DemoApplication* gDemoApplication = 0;
DemoApplication* createDemo();
// Function Declarations
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
void EnableOpenGL(HWND hWnd, HDC * hDC, HGLRC * hRC);
void DisableOpenGL(HWND hWnd, HDC hDC, HGLRC hRC);
static bool sOpenGLInitialized = false;
static int sWidth = 0;
static int sHeight =0;
static int quitRequest = 0;
// WinMain
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
LPSTR lpCmdLine, int iCmdShow)
{
WNDCLASS wc;
HWND hWnd;
HDC hDC;
HGLRC hRC;
MSG msg;
BOOL quit = FALSE;
float theta = 0.0f;
gDemoApplication = createDemo();
// register window class
wc.style = CS_OWNDC;
wc.lpfnWndProc = WndProc;
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hInstance = hInstance;
wc.hIcon = LoadIcon( NULL, IDI_APPLICATION );
wc.hCursor = LoadCursor( NULL, IDC_ARROW );
wc.hbrBackground = (HBRUSH)GetStockObject( BLACK_BRUSH );
wc.lpszMenuName = NULL;
wc.lpszClassName = "BulletPhysics";
RegisterClass( &wc );
// create main window
hWnd = CreateWindow(
"BulletPhysics", "Bullet Physics Sample. http://bulletphysics.org",
WS_CAPTION | WS_VISIBLE | WS_OVERLAPPEDWINDOW,
// 0, 0, 640, 480,
0, 0, 1024, 768,
NULL, NULL, hInstance, NULL );
// enable OpenGL for the window
EnableOpenGL( hWnd, &hDC, &hRC );
GLDebugDrawer debugDraw;
gDemoApplication->myinit();
//gDemoApplication->reshape(1024, 768);
gDemoApplication->initPhysics();
if (gDemoApplication->getDynamicsWorld())
gDemoApplication->getDynamicsWorld()->setDebugDrawer(&debugDraw);
gDemoApplication->reshape(sWidth,sHeight);
// program main loop
while ( !quit )
{
// check for messages
if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) )
{
// handle or dispatch messages
if ( msg.message == WM_QUIT )
{
quit = TRUE;
}
else
{
TranslateMessage( &msg );
DispatchMessage( &msg );
}
// gDemoApplication->displayCallback();
};
// OpenGL animation code goes here
glClearColor( .7f, 0.7f, 0.7f, 1.f );
gDemoApplication->moveAndDisplay();
SwapBuffers( hDC );
theta += 1.0f;
}
// shutdown OpenGL
DisableOpenGL( hWnd, hDC, hRC );
// destroy the window explicitly
DestroyWindow( hWnd );
delete gDemoApplication;
return msg.wParam;
}
// Window Procedure
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
switch (message)
{
case WM_SYSKEYDOWN:
{
if (lParam & 1<<29)
{
gDemoApplication->m_modifierKeys = VK_LMENU;
}
break;
}
case WM_SYSKEYUP:
{
if (lParam & 1<<29)
{
gDemoApplication->m_modifierKeys = VK_LMENU;
} else
{
gDemoApplication->m_modifierKeys = 0;
}
break;
}
case WM_SIZE: // Size Action Has Taken Place
switch (wParam) // Evaluate Size Action
{
case SIZE_MINIMIZED: // Was Window Minimized?
return 0; // Return
case SIZE_MAXIMIZED: // Was Window Maximized?
sWidth = LOWORD (lParam);
sHeight = HIWORD (lParam);
if (sOpenGLInitialized)
{
gDemoApplication->reshape(sWidth,sHeight);
}
return 0; // Return
case SIZE_RESTORED: // Was Window Restored?
sWidth = LOWORD (lParam);
sHeight = HIWORD (lParam);
if (sOpenGLInitialized)
{
gDemoApplication->reshape(sWidth,sHeight);
}
return 0; // Return
}
break;
case WM_CREATE:
return 0;
case WM_MBUTTONUP:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(1,1,xPos,yPos);
break;
}
case WM_MBUTTONDOWN:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(1,0,xPos,yPos);
break;
}
case WM_LBUTTONUP:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(0,1,xPos,yPos);
break;
}
case 0x020A://WM_MOUSEWHEEL:
{
int zDelta = (short)HIWORD(wParam);
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
if (zDelta>0)
gDemoApplication->zoomIn();
else
gDemoApplication->zoomOut();
break;
}
case WM_MOUSEMOVE:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseMotionFunc(xPos,yPos);
break;
}
case WM_RBUTTONUP:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(2,1,xPos,yPos);
break;
}
case WM_RBUTTONDOWN:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(2,0,xPos,yPos);
break;
}
case WM_LBUTTONDOWN:
{
int xPos = LOWORD(lParam);
int yPos = HIWORD(lParam);
gDemoApplication->mouseFunc(0,0,xPos,yPos);
break;
}
/*#define WM_LBUTTONUP 0x0202
#define WM_LBUTTONDBLCLK 0x0203
#define WM_RBUTTONDOWN 0x0204
#define WM_RBUTTONUP 0x0205
#define WM_RBUTTONDBLCLK 0x0206
#define WM_MBUTTONDOWN 0x0207
#define WM_MBUTTONUP 0x0208
#define WM_MBUTTONDBLCLK 0x0209
*/
case WM_CLOSE:
PostQuitMessage( 0 );
return 0;
case WM_DESTROY:
return 0;
case WM_KEYUP:
switch ( wParam )
{
case VK_PRIOR:
case VK_NEXT:
case VK_END:
case VK_HOME:
case VK_LEFT:
case VK_UP:
case VK_RIGHT:
case VK_DOWN:
{
if (gDemoApplication)
gDemoApplication->specialKeyboardUp(wParam,0,0);
return 0;
}
default:
{
gDemoApplication->keyboardUpCallback(tolower(wParam),0,0);
}
return DefWindowProc( hWnd, message, wParam, lParam );
}
case WM_KEYDOWN:
printf("bla\n");
switch ( wParam )
{
case VK_CONTROL:
case VK_PRIOR:
case VK_NEXT:
case VK_END:
case VK_HOME:
case VK_LEFT:
case VK_UP:
case VK_RIGHT:
case VK_DOWN:
{
if (gDemoApplication)
gDemoApplication->specialKeyboard(wParam,0,0);
break;
}
case ' ':
{
if (gDemoApplication)
gDemoApplication->clientResetScene();
break;
}
case 'Q':
case VK_ESCAPE:
{
quitRequest = 1;
PostQuitMessage(0);
}
return 0;
}
return 0;
case WM_CHAR:
if (!quitRequest)
gDemoApplication->keyboardCallback(wParam,0,0);
break;
default:
return DefWindowProc( hWnd, message, wParam, lParam );
}
return 0;
}
// Enable OpenGL
void EnableOpenGL(HWND hWnd, HDC * hDC, HGLRC * hRC)
{
PIXELFORMATDESCRIPTOR pfd;
int format;
// get the device context (DC)
*hDC = GetDC( hWnd );
// set the pixel format for the DC
ZeroMemory( &pfd, sizeof( pfd ) );
pfd.nSize = sizeof( pfd );
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 24;
pfd.cDepthBits = 16;
pfd.cStencilBits = 1;
pfd.iLayerType = PFD_MAIN_PLANE;
format = ChoosePixelFormat( *hDC, &pfd );
SetPixelFormat( *hDC, format, &pfd );
// create and enable the render context (RC)
*hRC = wglCreateContext( *hDC );
wglMakeCurrent( *hDC, *hRC );
sOpenGLInitialized = true;
}
// Disable OpenGL
void DisableOpenGL(HWND hWnd, HDC hDC, HGLRC hRC)
{
sOpenGLInitialized = false;
wglMakeCurrent( NULL, NULL );
wglDeleteContext( hRC );
ReleaseDC( hWnd, hDC );
}
#endif //_WINDOWS

View File

@@ -0,0 +1,79 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifdef _WINDOWS
#include "Win32DemoApplication.h"
#if 0
void Win32DemoApplication::renderme()
{
}
void Win32DemoApplication::setTexturing(bool useTexture)
{
}
void Win32DemoApplication::setShadows(bool useShadows)
{
}
void Win32DemoApplication::setCameraDistance(float camDist)
{
}
void Win32DemoApplication::clientResetScene()
{
}
#endif
void Win32DemoApplication::updateModifierKeys()
{
//not yet
}
void Win32DemoApplication::specialKeyboard(int key, int x, int y)
{
(void)x;
(void)y;
switch (key)
{
case VK_LEFT : stepLeft(); break;
case VK_RIGHT : stepRight(); break;
case VK_UP : stepFront(); break;
case VK_DOWN : stepBack(); break;
// case GLUT_KEY_PAGE_UP : zoomIn(); break;
// case GLUT_KEY_PAGE_DOWN : zoomOut(); break;
// case GLUT_KEY_HOME : toggleIdle(); break;
default:
// std::cout << "unused (special) key : " << key << std::endl;
break;
}
}
void Win32DemoApplication::swapBuffers()
{
}
#endif

View File

@@ -0,0 +1,40 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef WIN32_DEMO_APPLICATION_H
#define WIN32_DEMO_APPLICATION_H
#include "DemoApplication.h"
class Win32DemoApplication : public DemoApplication
{
protected:
public:
virtual void swapBuffers();
void specialKeyboard(int key, int x, int y);
virtual void updateModifierKeys();
};
#endif //WIN32_DEMO_APPLICATION_H

View File

@@ -0,0 +1,18 @@
project "testbed"
kind "StaticLib"
targetdir "../../build/lib"
includedirs {
".",
"../../bullet2"
}
configuration {"Windows"}
includedirs {
"../../rendering/GlutGlewWindows"
}
configuration{}
files {
"**.cpp",
"**.h"
}

View File

@@ -0,0 +1,29 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "OpenCL_bt3dGridBroadphase_AMD"
initOpenCL_AMD()
language "C++"
kind "StaticLib"
targetdir "../../../bin"
libdirs {"../../../rendering/GlutGlewWindows"}
includedirs {
-- "../../../rendering/GlutGlewWindows",
"../../../opencl/3dGridBroadphase/Shared",
"../../../../../src",
"../../primitives"
}
files {
"../Shared/*.cpp",
"../Shared/*.h"
}
end

View File

@@ -0,0 +1,23 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <MiniCL/cl_MiniCL_Defs.h>
extern "C"
{
#define MSTRINGIFY(A) A
#include "bt3dGridBroadphaseOCL.cl"
#undef MSTRINGIFY
}

View File

@@ -0,0 +1,349 @@
MSTRINGIFY(
int getPosHash(int4 gridPos, __global float4* pParams)
{
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x &= gridDim.x - 1;
gridPos.y &= gridDim.y - 1;
gridPos.z &= gridDim.z - 1;
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
return hash;
}
int4 getGridPos(float4 worldPos, __global float4* pParams)
{
int4 gridPos;
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
return gridPos;
}
// calculate grid hash value for each body using its AABB
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index*2];
float4 bbMax = pAABB[index*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
pos.w = 0.f;
// get address in grid
int4 gridPos = getGridPos(pos, pParams);
int gridHash = getPosHash(gridPos, pParams);
// store grid hash and body index
int2 hashVal;
hashVal.x = gridHash;
hashVal.y = index;
pHash[index] = hashVal;
}
__kernel void kClearCellStart( int numCells,
__global int* pCellStart GUID_ARG)
{
int index = get_global_id(0);
if(index >= numCells)
{
return;
}
pCellStart[index] = -1;
}
__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart GUID_ARG)
{
__local int sharedHash[513];
int index = get_global_id(0);
int2 sortedData;
if(index < numObjects)
{
sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
sharedHash[get_local_id(0) + 1] = sortedData.x;
if((index > 0) && (get_local_id(0) == 0))
{
// first thread in block must load neighbor body hash
sharedHash[0] = pHash[index-1].x;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(index < numObjects)
{
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
{
cellStart[sortedData.x] = index;
}
}
}
int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
}
void findPairsInCell( int numObjects,
int4 gridPos,
int index,
__global int2* pHash,
__global int* pCellStart,
__global float4* pAABB,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global float4* pParams)
{
int4 pGridDim = *((__global int4*)(pParams + 1));
int maxBodiesPerCell = pGridDim.w;
int gridHash = getPosHash(gridPos, pParams);
// get start of bucket for this cell
int bucketStart = pCellStart[gridHash];
if (bucketStart == -1)
{
return; // cell empty
}
// iterate over bodies in this cell
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = pAABB[unsorted_indx*2 + 0];
float4 max0 = pAABB[unsorted_indx*2 + 1];
int handleIndex = as_int(min0.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
int curr_max = start_curr_next.x - start - 1;
int bucketEnd = bucketStart + maxBodiesPerCell;
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
{
int2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
int unsorted_indx2 = cellData.y;
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
{
float4 min1 = pAABB[unsorted_indx2*2 + 0];
float4 max1 = pAABB[unsorted_indx2*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
int handleIndex2 = as_int(min1.w);
int k;
for(k = 0; k < curr; k++)
{
int old_pair = pPairBuff[start+k] & (~0x60000000);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= 0x40000000;
break;
}
}
if(k == curr)
{
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
curr++;
}
}
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = curr;
pPairBuffStartCurr[handleIndex] = newStartCurr;
return;
}
__kernel void kFindOverlappingPairs( int numObjects,
__global float4* pAABB,
__global int2* pHash,
__global int* pCellStart,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global float4* pParams GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 bbMin = pAABB[unsorted_indx*2 + 0];
float4 bbMax = pAABB[unsorted_indx*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
// get address in grid
int4 gridPosA = getGridPos(pos, pParams);
int4 gridPosB;
// examine only neighbouring cells
for(int z=-1; z<=1; z++)
{
gridPosB.z = gridPosA.z + z;
for(int y=-1; y<=1; y++)
{
gridPosB.y = gridPosA.y + y;
for(int x=-1; x<=1; x++)
{
gridPosB.x = gridPosA.x + x;
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, pParams);
}
}
}
}
__kernel void kFindPairsLarge( int numObjects,
__global float4* pAABB,
__global int2* pHash,
__global int* pCellStart,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
uint numLarge GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = pAABB[unsorted_indx*2 + 0];
float4 max0 = pAABB[unsorted_indx*2 + 1];
int handleIndex = as_int(min0.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
int2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
int curr_max = start_curr_next.x - start - 1;
for(uint i = 0; i < numLarge; i++)
{
int indx2 = numObjects + i;
float4 min1 = pAABB[indx2*2 + 0];
float4 max1 = pAABB[indx2*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
int k;
int handleIndex2 = as_int(min1.w);
for(k = 0; k < curr; k++)
{
int old_pair = pPairBuff[start+k] & (~0x60000000);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= 0x40000000;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | 0x20000000;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = curr;
pPairBuffStartCurr[handleIndex] = newStartCurr;
return;
}
__kernel void kComputePairCacheChanges( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global float4* pAABB GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int *pInp = pPairBuff + start;
int num_changes = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & 0x40000000))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
}
__kernel void kSqueezeOverlappingPairBuff( int numObjects,
__global int* pPairBuff,
__global int2* pPairBuffStartCurr,
__global int* pPairScan,
__global int* pPairOut,
__global float4* pAABB GUID_ARG)
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = pAABB[index * 2];
int handleIndex = as_int(bbMin.w);
int2 start_curr = pPairBuffStartCurr[handleIndex];
int start = start_curr.x;
int curr = start_curr.y;
__global int* pInp = pPairBuff + start;
__global int* pOut = pPairOut + pPairScan[index+1];
__global int* pOut2 = pInp;
int num = 0;
for(int k = 0; k < curr; k++, pInp++)
{
if(!((*pInp) & 0x40000000))
{
*pOut = *pInp;
pOut++;
}
if((*pInp) & 0x60000000)
{
*pOut2 = (*pInp) & (~0x60000000);
pOut2++;
num++;
}
}
int2 newStartCurr;
newStartCurr.x = start;
newStartCurr.y = num;
pPairBuffStartCurr[handleIndex] = newStartCurr;
}
);

View File

@@ -0,0 +1,697 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "../basic_initialize/btOpenCLUtils.h"
#include "bt3dGridBroadphaseOCL.h"
#include <stdio.h>
#include <string.h>
#include "Adl/Adl.h"
#include <AdlPrimitives/Scan/PrefixScan.h>
#include <AdlPrimitives/Sort/RadixSort32.h>
#include <AdlPrimitives/Sort/RadixSort.h>
#define ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
#define GRID_OCL_PATH "..\\..\\opencl\\3dGridBroadphase\\Shared\\bt3dGridBroadphaseOCL.cl"
#define MSTRINGIFY(A) #A
static const char* spProgramSource =
#include "bt3dGridBroadphaseOCL.cl"
adl::PrefixScan<adl::TYPE_CL>::Data* gData1=0;
adl::Buffer<unsigned int>* m_srcClBuffer=0;
struct MySortData
{
int key;
int value;
};
adl::RadixSort32<adl::TYPE_CL>::Data* dataC = 0;
adl::RadixSort<adl::TYPE_HOST>::Data* dataHost = 0;
static unsigned int infElem = 0x2fffffff;
static unsigned int zeroEl = 0;
static unsigned int minusOne= -1;
bt3dGridBroadphaseOCL::bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell,
cl_context context, cl_device_id device, cl_command_queue queue,
adl::DeviceCL* deviceCL
) :
btGpu3DGridBroadphase(overlappingPairCache, cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize, maxSmallProxiesPerCell)
{
initCL(context, device, queue);
allocateBuffers();
prefillBuffers();
initKernels();
//create an Adl device host and OpenCL device
adl::DeviceUtils::Config cfg;
m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
m_ownsDevice = false;
if (!deviceCL)
{
m_ownsDevice = true;
deviceCL = new adl::DeviceCL;
deviceCL->m_context = context;
deviceCL->m_deviceIdx = device;
deviceCL->m_commandQueue = queue;
deviceCL->m_kernelManager = new adl::KernelManager;
}
m_deviceCL = deviceCL;
int minSize = 256*1024;
int maxSortBuffer = maxSmallProxies < minSize ? minSize :maxSmallProxies;
m_srcClBuffer = new adl::Buffer<unsigned int> (m_deviceCL,maxSmallProxies+2);
m_srcClBuffer->write(&zeroEl,1,0);
//m_srcClBuffer->write(&infElem,maxSmallProxies,0);
m_srcClBuffer->write(&infElem,1,maxSmallProxies);
m_srcClBuffer->write(&zeroEl,1,maxSmallProxies+1);
m_deviceCL->waitForCompletion();
gData1 = adl::PrefixScan<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2,adl::PrefixScanBase::EXCLUSIVE );
dataHost = adl::RadixSort<adl::TYPE_HOST>::allocate( m_deviceHost, maxSmallProxies+2 );
dataC = adl::RadixSort32<adl::TYPE_CL>::allocate( m_deviceCL, maxSortBuffer+2 );
}
bt3dGridBroadphaseOCL::~bt3dGridBroadphaseOCL()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
adl::RadixSort<adl::TYPE_HOST>::deallocate(dataHost);
adl::PrefixScan<adl::TYPE_CL>::deallocate(gData1);
adl::RadixSort32<adl::TYPE_CL>::deallocate(dataC);
adl::DeviceUtils::deallocate(m_deviceHost);
delete m_srcClBuffer;
if (m_ownsDevice)
{
delete m_deviceCL->m_kernelManager;
delete m_deviceCL;
}
}
#ifdef CL_PLATFORM_MINI_CL
// there is a problem with MSVC9 : static constructors are not called if variables defined in library and are not used
// looks like it is because of optimization
// probably this will happen with other compilers as well
// so to make it robust, register kernels again (it is safe)
#define MINICL_DECLARE(a) extern "C" void a();
MINICL_DECLARE(kCalcHashAABB)
MINICL_DECLARE(kClearCellStart)
MINICL_DECLARE(kFindCellStart)
MINICL_DECLARE(kFindOverlappingPairs)
MINICL_DECLARE(kFindPairsLarge)
MINICL_DECLARE(kComputePairCacheChanges)
MINICL_DECLARE(kSqueezeOverlappingPairBuff)
#undef MINICL_DECLARE
#endif
void bt3dGridBroadphaseOCL::initCL(cl_context context, cl_device_id device, cl_command_queue queue)
{
#ifdef CL_PLATFORM_MINI_CL
// call constructors here
MINICL_REGISTER(kCalcHashAABB)
MINICL_REGISTER(kClearCellStart)
MINICL_REGISTER(kFindCellStart)
MINICL_REGISTER(kFindOverlappingPairs)
MINICL_REGISTER(kFindPairsLarge)
MINICL_REGISTER(kComputePairCacheChanges)
MINICL_REGISTER(kSqueezeOverlappingPairBuff)
#endif
cl_int ciErrNum;
btAssert(context);
m_cxMainContext = context;
btAssert(device);
m_cdDevice = device;
btAssert(queue);
m_cqCommandQue = queue;
//adl::Kernel kern = m_deviceCL->getKernel(fileName,funcName,options,src);
m_cpProgram = btOpenCLUtils::compileCLProgramFromString(m_cxMainContext,m_cdDevice,spProgramSource, &ciErrNum,"-DGUID_ARG=""""",GRID_OCL_PATH);
printf("OK\n");
}
void bt3dGridBroadphaseOCL::initKernels()
{
initKernel(GRID3DOCL_KERNEL_CALC_HASH_AABB, "kCalcHashAABB");
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_CALC_HASH_AABB, 3, sizeof(cl_mem),(void*)&m_dBpParams);
initKernel(GRID3DOCL_KERNEL_CLEAR_CELL_START, "kClearCellStart");
setKernelArg(GRID3DOCL_KERNEL_CLEAR_CELL_START, 1, sizeof(cl_mem),(void*)&m_dCellStart);
initKernel(GRID3DOCL_KERNEL_FIND_CELL_START, "kFindCellStart");
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 1, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_CELL_START, 2, sizeof(cl_mem),(void*)&m_dCellStart);
initKernel(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, "kFindOverlappingPairs");
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 3, sizeof(cl_mem),(void*)&m_dCellStart);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, 6, sizeof(cl_mem),(void*)&m_dBpParams);
initKernel(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, "kFindPairsLarge");
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 1, sizeof(cl_mem),(void*)&m_dAABB);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 2, sizeof(cl_mem),(void*)&m_dBodiesHash);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 3, sizeof(cl_mem),(void*)&m_dCellStart);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 4, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 5, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
initKernel(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, "kComputePairCacheChanges");
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
setKernelArg(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, 4, sizeof(cl_mem),(void*)&m_dAABB);
initKernel(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, "kSqueezeOverlappingPairBuff");
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 4, sizeof(cl_mem),(void*)&m_dPairsChanged);
setKernelArg(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, 5, sizeof(cl_mem),(void*)&m_dAABB);
}
void bt3dGridBroadphaseOCL::allocateBuffers()
{
cl_int ciErrNum;
unsigned int memSize;
// current version of bitonic sort works for power of 2 arrays only, so ...
m_hashSize = 1;
for(int bit = 1; bit < 32; bit++)
{
if(m_hashSize >= m_maxHandles)
{
break;
}
m_hashSize <<= 1;
}
memSize = m_hashSize * 2 * sizeof(unsigned int);
if (memSize < 1024*1024)
memSize = 1024*1024;
m_dBodiesHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_numCells * sizeof(unsigned int);
m_dCellStart = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
m_dPairBuff = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = (m_maxHandles * 2 + 1) * sizeof(unsigned int);
m_dPairBuffStartCurr = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
memSize = numAABB * sizeof(float) * 4 * 2;
m_dAABB = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = (m_maxHandles + 2) * sizeof(unsigned int);
m_dPairScanChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int);
m_dPairsChanged = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
m_dPairsContiguous = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
memSize = 3 * 4 * sizeof(float);
m_dBpParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::prefillBuffers()
{
memset(m_hBodiesHash, 0xFF, m_maxHandles*2*sizeof(unsigned int));
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_maxHandles * 2 * sizeof(unsigned int));
// now fill the rest (bitonic sorting works with size == pow of 2)
int remainder = m_hashSize - m_maxHandles;
if(remainder)
{
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, remainder * 2 * sizeof(unsigned int), m_maxHandles * 2 * sizeof(unsigned int), 0);
}
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
}
void bt3dGridBroadphaseOCL::initKernel(int kernelId, char* pName)
{
cl_int ciErrNum;
cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
size_t wgSize;
ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
m_kernels[kernelId].m_Id = kernelId;
m_kernels[kernelId].m_kernel = kernel;
m_kernels[kernelId].m_name = pName;
m_kernels[kernelId].m_workgroupSize = (int)wgSize;
return;
}
void bt3dGridBroadphaseOCL::runKernelWithWorkgroupSize(int kernelId, int globalSize)
{
if(globalSize <= 0)
{
return;
}
cl_kernel kernelFunc = m_kernels[kernelId].m_kernel;
cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
int workgroupSize = btMin(64,m_kernels[kernelId].m_workgroupSize);
if(workgroupSize <= 0)
{ // let OpenCL library calculate workgroup size
size_t globalWorkSize[2];
globalWorkSize[0] = globalSize;
globalWorkSize[1] = 1;
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 );
}
else
{
size_t localWorkSize[2], globalWorkSize[2];
//workgroupSize = btMin(workgroupSize, globalSize);
int num_t = globalSize / workgroupSize;
int num_g = num_t * workgroupSize;
if(num_g < globalSize)
{
num_t++;
}
localWorkSize[0] = workgroupSize;
globalWorkSize[0] = num_t * workgroupSize;
localWorkSize[1] = 1;
globalWorkSize[1] = 1;
ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 );
}
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
ciErrNum = clFlush(m_cqCommandQue);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr)
{
cl_int ciErrNum;
ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
void bt3dGridBroadphaseOCL::copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs, int hostOffs)
{
if (size)
{
cl_int ciErrNum;
char* pHost = (char*)host + hostOffs;
ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
}
void bt3dGridBroadphaseOCL::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs)
{
if (size)
{
cl_int ciErrNum;
char* pHost = (char*)host + hostOffs;
ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
GRID3DOCL_CHECKERROR(ciErrNum, CL_SUCCESS);
}
}
//
// overrides
//
void bt3dGridBroadphaseOCL::prepareAABB()
{
btGpu3DGridBroadphase::prepareAABB();
copyArrayToDevice(m_dAABB, m_hAABB, sizeof(bt3DGrid3F1U) * 2 * (m_numHandles + m_numLargeHandles));
return;
}
void bt3dGridBroadphaseOCL::setParameters(bt3DGridBroadphaseParams* hostParams)
{
btGpu3DGridBroadphase::setParameters(hostParams);
struct btParamsBpOCL
{
float m_invCellSize[4];
int m_gridSize[4];
};
btParamsBpOCL hParams;
hParams.m_invCellSize[0] = m_params.m_invCellSizeX;
hParams.m_invCellSize[1] = m_params.m_invCellSizeY;
hParams.m_invCellSize[2] = m_params.m_invCellSizeZ;
hParams.m_invCellSize[3] = 0.f;
hParams.m_gridSize[0] = m_params.m_gridSizeX;
hParams.m_gridSize[1] = m_params.m_gridSizeY;
hParams.m_gridSize[2] = m_params.m_gridSizeZ;
hParams.m_gridSize[3] = m_params.m_maxBodiesPerCell;
copyArrayToDevice(m_dBpParams, &hParams, sizeof(btParamsBpOCL));
return;
}
void bt3dGridBroadphaseOCL::calcHashAABB()
{
BT_PROFILE("calcHashAABB");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CALC_HASH_AABB, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
#else
btGpu3DGridBroadphase::calcHashAABB();
#endif
return;
}
void bt3dGridBroadphaseOCL::sortHash()
{
BT_PROFILE("sortHash");
#ifdef CL_PLATFORM_MINI_CL
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
btGpu3DGridBroadphase::sortHash();
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#else
//#define USE_HOST
#ifdef USE_HOST
copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
//adl::Buffer<unsigned int> keysIn,keysOut,valuesIn,valuesOut;
///adl::RadixSort32<adl::TYPE_CL>::execute(dataC,keysIn,keysOut,valuesIn,valuesOut,m_numHandles);
adl::HostBuffer<adl::SortData> inoutHost;
inoutHost.m_device = m_deviceHost;
inoutHost.m_ptr = (adl::SortData*)m_hBodiesHash;
inoutHost.m_size = m_numHandles;
adl::RadixSort<adl::TYPE_HOST>::execute(dataHost, inoutHost,m_numHandles);
copyArrayToDevice(m_dBodiesHash, m_hBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#else
{
clFinish(m_cqCommandQue);
BT_PROFILE("RadixSort32::execute");
adl::Buffer<adl::SortData> inout;
inout.m_device = this->m_deviceCL;
inout.m_size = m_numHandles;
inout.m_ptr = (adl::SortData*)m_dBodiesHash;
int actualHandles = m_numHandles;
int dataAlignment = adl::RadixSort32<adl::TYPE_CL>::DATA_ALIGNMENT;
if (actualHandles%dataAlignment)
{
actualHandles += dataAlignment-(actualHandles%dataAlignment);
}
adl::RadixSort32<adl::TYPE_CL>::execute(dataC,inout, actualHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
}
{
//BT_PROFILE("copyArrayFromDevice");
//copyArrayFromDevice(m_hBodiesHash, m_dBodiesHash, m_numHandles * 2 * sizeof(unsigned int));
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif //ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
}
#endif //USE_HOST
#endif
return;
}
void bt3dGridBroadphaseOCL::findCellStart()
{
#if 1
BT_PROFILE("findCellStart");
#if defined(CL_PLATFORM_MINI_CL)
btGpu3DGridBroadphase::findCellStart();
copyArrayToDevice(m_dCellStart, m_hCellStart, m_numCells * sizeof(unsigned int));
#else
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_CLEAR_CELL_START, m_numCells);
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_CELL_START, m_numHandles);
#endif
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findCellStart();
#endif
return;
}
void bt3dGridBroadphaseOCL::findOverlappingPairs()
{
#if 1
BT_PROFILE("findOverlappingPairs");
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findOverlappingPairs();
copyArrayToDevice(m_dPairBuffStartCurr, m_hPairBuffStartCurr, (m_maxHandles * 2 + 1) * sizeof(unsigned int));
copyArrayToDevice(m_dPairBuff, m_hPairBuff, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int));
#endif
return;
}
void bt3dGridBroadphaseOCL::findPairsLarge()
{
BT_PROFILE("findPairsLarge");
#if 1
if(m_numLargeHandles)
{
setKernelArg(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, 6, sizeof(int),(void*)&m_numLargeHandles);
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_FIND_PAIRS_LARGE, m_numHandles);
}
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::findPairsLarge();
#endif
return;
}
void bt3dGridBroadphaseOCL::computePairCacheChanges()
{
BT_PROFILE("computePairCacheChanges");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES, m_numHandles);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
copyArrayFromDevice( m_hPairScanChanged,m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#else
btGpu3DGridBroadphase::computePairCacheChanges();
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#endif
return;
}
extern cl_device_type deviceType;
void bt3dGridBroadphaseOCL::scanOverlappingPairBuff(bool copyToCpu)
{
//Intel/CPU version doesn't handlel Adl scan well
#if 0
{
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
btGpu3DGridBroadphase::scanOverlappingPairBuff();
copyArrayToDevice(m_dPairScanChanged, m_hPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
m_numPrefixSum = m_hPairScanChanged[m_numHandles+1];
clFinish(m_cqCommandQue);
//memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
}
#else
{
// copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
// btGpu3DGridBroadphase::scanOverlappingPairBuff();
adl::Buffer<unsigned int> destBuffer;
{
BT_PROFILE("copy GPU->GPU");
destBuffer.m_ptr = (unsigned int*)m_dPairScanChanged;
destBuffer.m_device = m_deviceCL;
destBuffer.m_size = sizeof(unsigned int)*(m_numHandles+2);
m_deviceCL->copy(m_srcClBuffer, &destBuffer,m_numHandles,1,1);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
}
{
BT_PROFILE("PrefixScan");
adl::PrefixScan<adl::TYPE_CL>::execute(gData1,*m_srcClBuffer,destBuffer, m_numHandles+2,&m_numPrefixSum);
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
//if (m_numPrefixSum>0x1000)
// {
// printf("error m_numPrefixSum==%d\n",m_numPrefixSum);
// }
}
#if 0
unsigned int* verifyhPairScanChanged = new unsigned int[m_maxHandles + 2];
memset(verifyhPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
copyArrayFromDevice(verifyhPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
clFinish(m_cqCommandQue);
/*for (int i=0;i<m_numHandles+2;i++)
{
if (verifyhPairScanChanged[i] != m_hPairScanChanged[i])
{
printf("hello!\n");
}
}
*/
#endif
if (1)
{
//the data
if (copyToCpu)
{
BT_PROFILE("copy GPU -> CPU");
copyArrayFromDevice(m_hPairScanChanged, m_dPairScanChanged, sizeof(unsigned int)*(m_numHandles + 2));
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
}
}
}
#endif
}
void bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff()
{
BT_PROFILE("btCuda_squeezeOverlappingPairBuff");
#if 1
runKernelWithWorkgroupSize(GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF, m_numHandles);
// btCuda_squeezeOverlappingPairBuff(m_dPairBuff, m_dPairBuffStartCurr, m_dPairScanChanged, m_dPairsChanged, m_dAABB, m_numHandles);
//copyArrayFromDevice(m_hPairsChanged, m_dPairsChanged, sizeof(unsigned int) * m_numPrefixSum);//m_hPairScanChanged[m_numHandles+1]); //gSum
#ifdef ADD_BLOCKING_CL_FINISH_FOR_BENCHMARK
clFinish(m_cqCommandQue);
#endif
#else
btGpu3DGridBroadphase::squeezeOverlappingPairBuff();
#endif
return;
}
void bt3dGridBroadphaseOCL::resetPool(btDispatcher* dispatcher)
{
btGpu3DGridBroadphase::resetPool(dispatcher);
prefillBuffers();
}

View File

@@ -0,0 +1,146 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT3DGRIDBROADPHASEOCL_H
#define BT3DGRIDBROADPHASEOCL_H
#ifdef __APPLE__
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <MiniCL/cl.h>
#endif
//CL_PLATFORM_MINI_CL could be defined in build system
#else
//#include <GL/glew.h>
// standard utility and system includes
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <CL/cl.h>
#endif
// Extra CL/GL include
//#include <CL/cl_gl.h>
#endif //__APPLE__
namespace adl
{
struct Device;
struct DeviceCL;
};
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "btGpu3DGridBroadphaseSharedTypes.h"
#include "btGpu3DGridBroadphase.h"
#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); btAssert((a) == (b)); }
enum
{
GRID3DOCL_KERNEL_CALC_HASH_AABB = 0,
GRID3DOCL_KERNEL_CLEAR_CELL_START,
GRID3DOCL_KERNEL_FIND_CELL_START,
GRID3DOCL_KERNEL_FIND_OVERLAPPING_PAIRS,
GRID3DOCL_KERNEL_FIND_PAIRS_LARGE,
GRID3DOCL_KERNEL_COMPUTE_CACHE_CHANGES,
GRID3DOCL_KERNEL_SQUEEZE_PAIR_BUFF,
GRID3DOCL_KERNEL_TOTAL
};
struct bt3dGridOCLKernelInfo
{
int m_Id;
cl_kernel m_kernel;
char* m_name;
int m_workgroupSize;
};
///The bt3dGridBroadphaseOCL uses OpenCL-capable GPU to compute overlapping pairs
class bt3dGridBroadphaseOCL : public btGpu3DGridBroadphase
{
protected:
int m_hashSize;
cl_context m_cxMainContext;
cl_device_id m_cdDevice;
cl_command_queue m_cqCommandQue;
cl_program m_cpProgram;
bt3dGridOCLKernelInfo m_kernels[GRID3DOCL_KERNEL_TOTAL];
// data buffers
cl_mem m_dBodiesHash;
cl_mem m_dCellStart;
cl_mem m_dPairBuff;
cl_mem m_dPairBuffStartCurr;
public:
cl_mem m_dAABB;
protected:
cl_mem m_dPairScanChanged;
cl_mem m_dPairsChanged;
cl_mem m_dPairsContiguous;
cl_mem m_dBpParams;
adl::Device* m_deviceHost;
adl::DeviceCL* m_deviceCL;
bool m_ownsDevice;
public:
unsigned int m_numPrefixSum;
bt3dGridBroadphaseOCL( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
btScalar maxSmallProxySize,
int maxSmallProxiesPerCell = 8,
cl_context context = NULL,
cl_device_id device = NULL,
cl_command_queue queue = NULL,
adl::DeviceCL* deviceCL = 0
);
virtual ~bt3dGridBroadphaseOCL();
protected:
void initCL(cl_context context, cl_device_id device, cl_command_queue queue);
void initKernels();
void allocateBuffers();
void prefillBuffers();
void initKernel(int kernelId, char* pName);
void allocateArray(void** devPtr, unsigned int size);
void freeArray(void* devPtr);
void runKernelWithWorkgroupSize(int kernelId, int globalSize);
void setKernelArg(int kernelId, int argNum, int argSize, void* argPtr);
void copyArrayToDevice(cl_mem device, const void* host, unsigned int size, int devOffs = 0, int hostOffs = 0);
void copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs = 0, int devOffs = 0);
// overrides
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
virtual void squeezeOverlappingPairBuff();
virtual void resetPool(btDispatcher* dispatcher);
};
#endif //BT3DGRIDBROADPHASEOCL_H

View File

@@ -0,0 +1,626 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
#include "btGpuDefines.h"
#include "btGpuUtilsSharedDefs.h"
#include "btGpuUtilsSharedCode.h"
#include "LinearMath/btAlignedAllocator.h"
#include "LinearMath/btQuickprof.h"
#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
#include "btGpuDefines.h"
#include "btGpuUtilsSharedDefs.h"
#include "btGpu3DGridBroadphaseSharedDefs.h"
#include "btGpu3DGridBroadphase.h"
#include <string.h> //for memset
#include <stdio.h>
static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
btGpu3DGridBroadphase::btGpu3DGridBroadphase( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell) :
btSimpleBroadphase(maxSmallProxies,
// new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
m_bInitialized(false),
m_numBodies(0)
{
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
maxSmallProxySize, maxBodiesPerCell);
}
btGpu3DGridBroadphase::btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell) :
btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
m_bInitialized(false),
m_numBodies(0)
{
_initialize(cellSize, gridSizeX, gridSizeY, gridSizeZ,
maxSmallProxies, maxLargeProxies, maxPairsPerBody,
maxSmallProxySize, maxBodiesPerCell);
}
btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
{
//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
assert(m_bInitialized);
_finalize();
}
// returns 2^n : 2^(n+1) > val >= 2^n
int btGpu3DGridBroadphase::getFloorPowOfTwo(int val)
{
int mask = 0x40000000;
for(int k = 0; k < 30; k++, mask >>= 1)
{
if(mask & val)
{
break;
}
}
return mask;
}
void btGpu3DGridBroadphase::_initialize( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell)
{
// set various paramerers
m_ownsPairCache = true;
m_params.m_gridSizeX = getFloorPowOfTwo(gridSizeX);
m_params.m_gridSizeY = getFloorPowOfTwo(gridSizeY);
m_params.m_gridSizeZ = getFloorPowOfTwo(gridSizeZ);
m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
m_numCells = m_params.m_numCells;
m_params.m_invCellSizeX = btScalar(1.f) / cellSize[0];
m_params.m_invCellSizeY = btScalar(1.f) / cellSize[1];
m_params.m_invCellSizeZ = btScalar(1.f) / cellSize[2];
m_maxRadius = maxSmallProxySize * btScalar(0.5f);
m_params.m_numBodies = m_numBodies;
m_params.m_maxBodiesPerCell = maxBodiesPerCell;
m_numLargeHandles = 0;
m_maxLargeHandles = maxLargeProxies;
m_maxPairsPerBody = maxPairsPerBody;
m_LastLargeHandleIndex = -1;
assert(!m_bInitialized);
// allocate host storage
m_hBodiesHash = new unsigned int[m_maxHandles * 2];
memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
m_hCellStart = new unsigned int[m_params.m_numCells];
memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
// --------------- for now, init with m_maxPairsPerBody for each body
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(int i = 1; i <= m_maxHandles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
//----------------
unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
m_hPairScanChanged = new unsigned int[m_maxHandles + 2];
memset(m_hPairScanChanged,0,sizeof(int)*m_maxHandles + 2);
m_hPairsChanged = new unsigned int[m_maxHandles * m_maxPairsPerBody];
memset(m_hPairsChanged,0,sizeof(int)*(m_maxHandles * m_maxPairsPerBody));
m_hAllOverlappingPairs= new MyUint2[m_maxHandles * m_maxPairsPerBody];
memset(m_hAllOverlappingPairs,0,sizeof(MyUint2)*(m_maxHandles * m_maxPairsPerBody));
// large proxies
// allocate handles buffer and put all handles on free list
m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
m_firstFreeLargeHandle = 0;
{
for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
{
m_pLargeHandles[i].SetNextFree(i + 1);
m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
}
m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
}
// debug data
m_numPairsAdded = 0;
m_numOverflows = 0;
m_bInitialized = true;
}
void btGpu3DGridBroadphase::_finalize()
{
assert(m_bInitialized);
delete [] m_hBodiesHash;
delete [] m_hCellStart;
delete [] m_hPairBuffStartCurr;
delete [] m_hAABB;
delete [] m_hPairBuff;
delete [] m_hPairScanChanged;
delete [] m_hPairsChanged;
delete [] m_hAllOverlappingPairs;
btAlignedFree(m_pLargeHandlesRawPtr);
m_bInitialized = false;
}
void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
{
btSimpleBroadphase::calculateOverlappingPairs(dispatcher);
if(m_numHandles <= 0)
{
BT_PROFILE("addLarge2LargePairsToCache");
addLarge2LargePairsToCache(dispatcher);
return;
}
// update constants
{
BT_PROFILE("setParameters");
setParameters(&m_params);
}
// prepare AABB array
{
BT_PROFILE("prepareAABB");
prepareAABB();
}
// calculate hash
{
BT_PROFILE("calcHashAABB");
calcHashAABB();
}
{
BT_PROFILE("sortHash");
// sort bodies based on hash
sortHash();
}
// find start of each cell
{
BT_PROFILE("findCellStart");
findCellStart();
}
{
BT_PROFILE("findOverlappingPairs");
// findOverlappingPairs (small/small)
findOverlappingPairs();
}
// findOverlappingPairs (small/large)
{
BT_PROFILE("findPairsLarge");
findPairsLarge();
}
// add pairs to CPU cache
{
BT_PROFILE("computePairCacheChanges");
computePairCacheChanges();
}
{
BT_PROFILE("scanOverlappingPairBuff");
scanOverlappingPairBuff();
}
{
BT_PROFILE("squeezeOverlappingPairBuff");
squeezeOverlappingPairBuff();
}
{
BT_PROFILE("addPairsToCache");
addPairsToCache(dispatcher);
}
// find and add large/large pairs to CPU cache
{
BT_PROFILE("addLarge2LargePairsToCache");
addLarge2LargePairsToCache(dispatcher);
}
return;
}
void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
{
m_numPairsAdded = 0;
m_numPairsRemoved = 0;
for(int i = 0; i < m_numHandles; i++)
{
unsigned int num = m_hPairScanChanged[i+2] - m_hPairScanChanged[i+1];
if(!num)
{
continue;
}
unsigned int* pInp = m_hPairsChanged + m_hPairScanChanged[i+1];
unsigned int index0 = m_hAABB[i * 2].uw;
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
for(unsigned int j = 0; j < num; j++)
{
unsigned int indx1_s = pInp[j];
unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
btSimpleBroadphaseProxy* proxy1;
if(index1 < (unsigned int)m_maxHandles)
{
proxy1 = &m_pHandles[index1];
}
else
{
index1 -= m_maxHandles;
btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
proxy1 = &m_pLargeHandles[index1];
}
if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
m_numPairsAdded++;
}
else
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
m_numPairsRemoved++;
}
}
}
}
btBroadphaseProxy* btGpu3DGridBroadphase::createProxy( const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
{
btBroadphaseProxy* proxy;
bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
if(bIsLarge)
{
if (m_numLargeHandles >= m_maxLargeHandles)
{
///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
btAssert(0);
return 0; //should never happen, but don't let the game crash ;-)
}
btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
int newHandleIndex = allocLargeHandle();
proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
}
else
{
proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
}
return proxy;
}
void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
{
bool bIsLarge = isLargeProxy(proxy);
if(bIsLarge)
{
btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
freeLargeHandle(proxy0);
m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
}
else
{
btSimpleBroadphase::destroyProxy(proxy, dispatcher);
}
return;
}
void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
{
m_hPairBuffStartCurr[0] = 0;
m_hPairBuffStartCurr[1] = 0;
for(int i = 1; i <= m_maxHandles; i++)
{
m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
m_hPairBuffStartCurr[i * 2 + 1] = 0;
}
}
bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax)
{
btVector3 diag = aabbMax - aabbMin;
///use the bounding sphere radius of this bounding box, to include rotation
btScalar radius = diag.length() * btScalar(0.5f);
return (radius > m_maxRadius);
}
bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
{
return (proxy->getUid() >= (m_maxHandles+2));
}
void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
{
int i,j;
if (m_numLargeHandles <= 0)
{
return;
}
int new_largest_index = -1;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
new_largest_index = i;
for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
{
btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
btAssert(proxy0 != proxy1);
btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
if(aabbOverlap(p0,p1))
{
if (!m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->addOverlappingPair(proxy0,proxy1);
}
}
else
{
if(m_pairCache->findPair(proxy0,proxy1))
{
m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
}
}
}
}
m_LastLargeHandleIndex = new_largest_index;
return;
}
void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
{
btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
for (int i=0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
rayCallback.process(proxy);
}
}
//
// overrides for CPU version
//
void btGpu3DGridBroadphase::prepareAABB()
{
BT_PROFILE("prepareAABB");
bt3DGrid3F1U* pBB = m_hAABB;
int i;
int new_largest_index = -1;
unsigned int num_small = 0;
for(i = 0; i <= m_LastHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_small;
pBB++;
num_small++;
}
m_LastHandleIndex = new_largest_index;
new_largest_index = -1;
unsigned int num_large = 0;
for(i = 0; i <= m_LastLargeHandleIndex; i++)
{
btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
new_largest_index = i;
pBB->fx = proxy0->m_aabbMin.getX();
pBB->fy = proxy0->m_aabbMin.getY();
pBB->fz = proxy0->m_aabbMin.getZ();
pBB->uw = i + m_maxHandles;
pBB++;
pBB->fx = proxy0->m_aabbMax.getX();
pBB->fy = proxy0->m_aabbMax.getY();
pBB->fz = proxy0->m_aabbMax.getZ();
pBB->uw = num_large + m_maxHandles;
pBB++;
num_large++;
}
m_LastLargeHandleIndex = new_largest_index;
// paranoid checks
btAssert(num_small == m_numHandles);
btAssert(num_large == m_numLargeHandles);
return;
}
void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
{
s3DGridBroadphaseParams = *hostParams;
return;
}
void btGpu3DGridBroadphase::calcHashAABB()
{
BT_PROFILE("bt3DGrid_calcHashAABB");
btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
return;
}
void btGpu3DGridBroadphase::sortHash()
{
class bt3DGridHashKey
{
public:
unsigned int hash;
unsigned int index;
void quickSort(bt3DGridHashKey* pData, int lo, int hi)
{
int i=lo, j=hi;
bt3DGridHashKey x = pData[(lo+hi)/2];
do
{
while(pData[i].hash > x.hash) i++;
while(x.hash > pData[j].hash) j--;
if(i <= j)
{
bt3DGridHashKey t = pData[i];
pData[i] = pData[j];
pData[j] = t;
i++; j--;
}
} while(i <= j);
if(lo < j) pData->quickSort(pData, lo, j);
if(i < hi) pData->quickSort(pData, i, hi);
}
};
BT_PROFILE("bt3DGrid_sortHash");
bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
pHash->quickSort(pHash, 0, m_numHandles - 1);
return;
}
void btGpu3DGridBroadphase::findCellStart()
{
BT_PROFILE("bt3DGrid_findCellStart");
btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
return;
}
void btGpu3DGridBroadphase::findOverlappingPairs()
{
BT_PROFILE("bt3DGrid_findOverlappingPairs");
btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
return;
}
void btGpu3DGridBroadphase::findPairsLarge()
{
BT_PROFILE("bt3DGrid_findPairsLarge");
btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles, m_numLargeHandles);
return;
}
void btGpu3DGridBroadphase::computePairCacheChanges()
{
BT_PROFILE("bt3DGrid_computePairCacheChanges");
btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hAABB, m_numHandles);
return;
}
void btGpu3DGridBroadphase::scanOverlappingPairBuff(bool copyToCpu)
{
BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
unsigned int sum = 0;
m_hPairScanChanged[0]=0;
for(int i = 0; i <= m_numHandles+1; i++)
{
unsigned int delta = m_hPairScanChanged[i];
m_hPairScanChanged[i] = sum;
sum += delta;
}
return;
}
void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
{
BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
//btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, m_hPairsChanged, m_hAABB, m_numHandles);
btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScanChanged, (unsigned int*)m_hAllOverlappingPairs, m_hAABB, m_numHandles);
return;
}
#include "btGpu3DGridBroadphaseSharedCode.h"

View File

@@ -0,0 +1,154 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASE_H
#define BTGPU3DGRIDBROADPHASE_H
//----------------------------------------------------------------------------------------
#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
#include "btGpu3DGridBroadphaseSharedTypes.h"
struct MyUint2
{
int x;
int y;
};
//----------------------------------------------------------------------------------------
///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
class btGpu3DGridBroadphase : public btSimpleBroadphase
{
protected:
bool m_bInitialized;
unsigned int m_numBodies;
unsigned int m_numCells;
unsigned int m_maxPairsPerBody;
unsigned int m_maxBodiesPerCell;
bt3DGridBroadphaseParams m_params;
btScalar m_maxRadius;
// CPU data
unsigned int* m_hBodiesHash;
unsigned int* m_hCellStart;
unsigned int* m_hPairBuffStartCurr;
bt3DGrid3F1U* m_hAABB;
unsigned int* m_hPairBuff;
unsigned int* m_hPairScanChanged;
unsigned int* m_hPairsChanged;
MyUint2* m_hAllOverlappingPairs;
// large proxies
int m_numLargeHandles;
int m_maxLargeHandles;
int m_LastLargeHandleIndex;
btSimpleBroadphaseProxy* m_pLargeHandles;
void* m_pLargeHandlesRawPtr;
int m_firstFreeLargeHandle;
int allocLargeHandle()
{
btAssert(m_numLargeHandles < m_maxLargeHandles);
int freeLargeHandle = m_firstFreeLargeHandle;
m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
m_numLargeHandles++;
if(freeLargeHandle > m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex = freeLargeHandle;
}
return freeLargeHandle;
}
void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
{
int handle = int(proxy - m_pLargeHandles);
btAssert((handle >= 0) && (handle < m_maxHandles));
if(handle == m_LastLargeHandleIndex)
{
m_LastLargeHandleIndex--;
}
proxy->SetNextFree(m_firstFreeLargeHandle);
m_firstFreeLargeHandle = handle;
proxy->m_clientObject = 0;
m_numLargeHandles--;
}
bool isLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax);
bool isLargeProxy(btBroadphaseProxy* proxy);
// debug
unsigned int m_numPairsAdded;
unsigned int m_numPairsRemoved;
unsigned int m_numOverflows;
//
public:
virtual int getNumOverlap()
{
return m_hPairScanChanged[m_numHandles+1];
}
virtual MyUint2* getOverlap()
{
return m_hAllOverlappingPairs;
}
// NOTE : for better results gridSizeX, gridSizeY and gridSizeZ should be powers of 2
btGpu3DGridBroadphase(const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell = 8);
btGpu3DGridBroadphase( btOverlappingPairCache* overlappingPairCache,
const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell = 8);
virtual ~btGpu3DGridBroadphase();
virtual void calculateOverlappingPairs(btDispatcher* dispatcher);
virtual btBroadphaseProxy* createProxy(const btVector3& aabbMin, const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
virtual void destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
virtual void rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
virtual void resetPool(btDispatcher* dispatcher);
static int getFloorPowOfTwo(int val); // returns 2^n : 2^(n+1) > val >= 2^n
protected:
void _initialize( const btVector3& cellSize,
int gridSizeX, int gridSizeY, int gridSizeZ,
int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
btScalar maxSmallProxySize,
int maxBodiesPerCell);
void _finalize();
void addPairsToCache(btDispatcher* dispatcher);
void addLarge2LargePairsToCache(btDispatcher* dispatcher);
// overrides for CPU version
virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
virtual void prepareAABB();
virtual void calcHashAABB();
virtual void sortHash();
virtual void findCellStart();
virtual void findOverlappingPairs();
virtual void findPairsLarge();
virtual void computePairCacheChanges();
virtual void scanOverlappingPairBuff(bool copyToCpu=true);
virtual void squeezeOverlappingPairBuff();
};
//----------------------------------------------------------------------------------------
#endif //BTGPU3DGRIDBROADPHASE_H
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------

View File

@@ -0,0 +1,428 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// calculate position in uniform grid
BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
{
int3 gridPos;
gridPos.x = (int)floor(p.x * BT_GPU_params.m_invCellSizeX) & (BT_GPU_params.m_gridSizeX - 1);
gridPos.y = (int)floor(p.y * BT_GPU_params.m_invCellSizeY) & (BT_GPU_params.m_gridSizeY - 1);
gridPos.z = (int)floor(p.z * BT_GPU_params.m_invCellSizeZ) & (BT_GPU_params.m_gridSizeZ - 1);
return gridPos;
} // bt3DGrid_calcGridPos()
//----------------------------------------------------------------------------------------
// calculate address in grid from position (clamping to edges)
BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
{
gridPos.x &= (BT_GPU_params.m_gridSizeX - 1);
gridPos.y &= (BT_GPU_params.m_gridSizeY - 1);
gridPos.z &= (BT_GPU_params.m_gridSizeZ - 1);
return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
} // bt3DGrid_calcGridHash()
//----------------------------------------------------------------------------------------
// calculate grid hash value for each body using its AABB
BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index*2];
bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = bt3DGrid_calcGridPos(pos);
uint gridHash = bt3DGrid_calcGridHash(gridPos);
// store grid hash and body index
pHash[index] = BT_GPU_make_uint2(gridHash, index);
} // calcHashAABBD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
BT_GPU___shared__ uint sharedHash[257];
sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
if((index > 0) && (BT_GPU_threadIdx.x == 0))
{
// first thread in block must load neighbor body hash
volatile uint2 prevData = pHash[index-1];
sharedHash[0] = prevData.x;
}
BT_GPU___syncthreads();
if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
{
cellStart[sortedData.x] = index;
}
} // findCellStartD()
//----------------------------------------------------------------------------------------
BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
{
return (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) &&
(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) &&
(min0.fz <= max1.fz)&& (min1.fz <= max0.fz);
} // cudaTestAABBOverlap()
//----------------------------------------------------------------------------------------
BT_GPU___device__ void findPairsInCell( int3 gridPos,
uint index,
uint2* pHash,
uint* pCellStart,
bt3DGrid3F1U* pAABB,
uint* pPairBuff,
uint2* pPairBuffStartCurr,
uint numBodies)
{
uint gridHash = bt3DGrid_calcGridHash(gridPos);
// get start of bucket for this cell
uint bucketStart = pCellStart[gridHash];
if (bucketStart == 0xffffffff)
{
return; // cell empty
}
// iterate over bodies in this cell
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
for(uint index2 = bucketStart; index2 < bucketEnd; index2++)
{
uint2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
uint unsorted_indx2 = cellData.y;
if (unsorted_indx2 < unsorted_indx) // check not colliding with self
{
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint handleIndex2 = min1.uw;
uint k;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
curr++;
}
}
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
return;
} // findPairsInCell()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findOverlappingPairsD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart,
uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
float4 pos;
pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
// get address in grid
int3 gridPos = bt3DGrid_calcGridPos(pos);
// examine only neighbouring cells
for(int z=-1; z<=1; z++) {
for(int y=-1; y<=1; y++) {
for(int x=-1; x<=1; x++) {
findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
}
}
}
} // findOverlappingPairsD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff,
uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
uint2 sortedData = pHash[index];
uint unsorted_indx = sortedData.y;
bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
uint handleIndex = min0.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
uint curr_max = start_curr_next.x - start - 1;
for(uint i = 0; i < numLarge; i++)
{
uint indx2 = numBodies + i;
bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
if(cudaTestAABBOverlap(min0, max0, min1, max1))
{
uint k;
uint handleIndex2 = min1.uw;
for(k = 0; k < curr; k++)
{
uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
if(old_pair == handleIndex2)
{
pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
break;
}
}
if(k == curr)
{
pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
if(curr >= curr_max)
{ // not a good solution, but let's avoid crash
break;
}
curr++;
}
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
return;
} // findPairsLargeD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr,
uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint *pInp = pPairBuff + start;
uint num_changes = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
if(((*pInp) & BT_3DGRID_PAIR_ANY_FLG))
{
num_changes++;
}
}
pPairScan[index+1] = num_changes;
} // computePairCacheChangesD()
//----------------------------------------------------------------------------------------
BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
uint2* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
{
int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
if(index >= (int)numBodies)
{
return;
}
bt3DGrid3F1U bbMin = pAABB[index * 2];
uint handleIndex = bbMin.uw;
uint2 start_curr = pPairBuffStartCurr[handleIndex];
uint start = start_curr.x;
uint curr = start_curr.y;
uint* pInp = pPairBuff + start;
uint2* pOut = pPairOut + pPairScan[index+1];
uint* pOut2 = pInp;
uint num = 0;
for(uint k = 0; k < curr; k++, pInp++)
{
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
//if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
{
pOut->x = handleIndex;
pOut->y = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
pOut++;
}
if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
{
*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
pOut2++;
num++;
}
}
pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
} // squeezeOverlappingPairBuffD()
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
// E N D O F K E R N E L F U N C T I O N S
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
// execute the kernel
BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
// check if kernel invocation generated an error
BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
} // calcHashAABB()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
} // findCellStart()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies))
{
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
#endif
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
} // findOverlappingPairs()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
{
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
#endif
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
#if B_CUDA_USE_TEX
BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
#endif
} // findPairsLarge()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
} // computePairCacheChanges()
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
{
int numThreads, numBlocks;
BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint2*)pPairOut,pAABB,numBodies));
BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
} // btCuda_squeezeOverlappingPairBuff()
//------------------------------------------------------------------------------------------------
} // extern "C"
//------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------

View File

@@ -0,0 +1,61 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared definitions for GPU-based 3D Grid collision detection broadphase
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
//----------------------------------------------------------------------------------------
#include "btGpu3DGridBroadphaseSharedTypes.h"
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies);
void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies);
void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
//----------------------------------------------------------------------------------------
} // extern "C"
//----------------------------------------------------------------------------------------
#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H

View File

@@ -0,0 +1,64 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared definitions for GPU-based 3D Grid collision detection broadphase
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
//----------------------------------------------------------------------------------------
#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
#define BT_3DGRID_PAIR_NEW_FLG (0x20000000)
#define BT_3DGRID_PAIR_ANY_FLG (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
//----------------------------------------------------------------------------------------
struct bt3DGridBroadphaseParams
{
unsigned int m_gridSizeX;
unsigned int m_gridSizeY;
unsigned int m_gridSizeZ;
unsigned int m_numCells;
float m_invCellSizeX;
float m_invCellSizeY;
float m_invCellSizeZ;
unsigned int m_numBodies;
unsigned int m_maxBodiesPerCell;
};
//----------------------------------------------------------------------------------------
struct bt3DGrid3F1U
{
float fx;
float fy;
float fz;
unsigned int uw;
};
//----------------------------------------------------------------------------------------
#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H

View File

@@ -0,0 +1,211 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// definitions for "GPU on CPU" code
#ifndef BT_GPU_DEFINES_H
#define BT_GPU_DEFINES_H
typedef unsigned int uint;
struct int2
{
int x, y;
};
struct uint2
{
unsigned int x, y;
};
struct int3
{
int x, y, z;
};
struct uint3
{
unsigned int x, y, z;
};
struct float4
{
float x, y, z, w;
};
struct float3
{
float x, y, z;
};
#define BT_GPU___device__ inline
#define BT_GPU___devdata__
#define BT_GPU___constant__
#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
#define BT_GPU_params s3DGridBroadphaseParams
#define BT_GPU___mul24(a, b) ((a)*(b))
#define BT_GPU___global__ inline
#define BT_GPU___shared__ static
#define BT_GPU___syncthreads()
#define CUDART_PI_F SIMD_PI
static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
{
uint2 t; t.x = x; t.y = y; return t;
}
#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
static inline int3 bt3dGrid_make_int3(int x, int y, int z)
{
int3 t; t.x = x; t.y = y; t.z = z; return t;
}
#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
static inline float3 bt3dGrid_make_float3(float x, float y, float z)
{
float3 t; t.x = x; t.y = y; t.z = z; return t;
}
#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
static inline float3 bt3dGrid_make_float34(float4 f)
{
float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
}
#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
static inline float3 bt3dGrid_make_float31(float f)
{
float3 t; t.x = t.y = t.z = f; return t;
}
#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
static inline float4 bt3dGrid_make_float42(float3 v, float f)
{
float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
}
#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b)
static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
{
float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
}
#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d)
inline int3 operator+(int3 a, int3 b)
{
return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline float4 operator+(const float4& a, const float4& b)
{
float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
}
inline float4 operator*(const float4& a, float fact)
{
float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
}
inline float4 operator*(float fact, float4& a)
{
return (a * fact);
}
inline float4& operator*=(float4& a, float fact)
{
a = fact * a;
return a;
}
inline float4& operator+=(float4& a, const float4& b)
{
a = a + b;
return a;
}
inline float3 operator+(const float3& a, const float3& b)
{
float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
}
inline float3 operator-(const float3& a, const float3& b)
{
float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
}
static inline float bt3dGrid_dot(float3& a, float3& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z;
}
#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
static inline float bt3dGrid_dot4(float4& a, float4& b)
{
return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
}
#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
{
float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x; return r;
}
#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
inline float3 operator*(const float3& a, float fact)
{
float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
}
inline float3& operator+=(float3& a, const float3& b)
{
a = a + b;
return a;
}
inline float3& operator-=(float3& a, const float3& b)
{
a = a - b;
return a;
}
inline float3& operator*=(float3& a, float fact)
{
a = a * fact;
return a;
}
inline float3 operator-(const float3& v)
{
float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
}
#define BT_GPU_FETCH(a, b) a[b]
#define BT_GPU_FETCH4(a, b) a[b]
#define BT_GPU_PREF(func) btGpu_##func
#define BT_GPU_SAFE_CALL(func) func
#define BT_GPU_Memset memset
#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
#define BT_GPU_BindTexture(a, b, c, d)
#define BT_GPU_UnbindTexture(a)
static uint2 s_blockIdx, s_blockDim, s_threadIdx;
#define BT_GPU_blockIdx s_blockIdx
#define BT_GPU_blockDim s_blockDim
#define BT_GPU_threadIdx s_threadIdx
#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
#define BT_GPU_CHECK_ERROR(s)
#endif //BT_GPU_DEFINES_H

View File

@@ -0,0 +1,55 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2009 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//----------------------------------------------------------------------------------------
// Shared code for GPU-based utilities
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// will be compiled by both CPU and CUDA compilers
// file with definitions of BT_GPU_xxx should be included first
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//----------------------------------------------------------------------------------------
#include "btGpuUtilsSharedDefs.h"
//----------------------------------------------------------------------------------------
extern "C"
{
//----------------------------------------------------------------------------------------
//Round a / b to nearest higher integer value
int BT_GPU_PREF(iDivUp)(int a, int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
} // iDivUp()
//----------------------------------------------------------------------------------------
// compute grid and thread block size for a given number of elements
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
{
numThreads = BT_GPU_min(blockSize, n);
numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
} // computeGridSize()
//----------------------------------------------------------------------------------------
} // extern "C"

View File

@@ -0,0 +1,52 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// Shared definitions for GPU-based utilities
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Keep this file free from Bullet headers
// it is included into both CUDA and CPU code
// file with definitions of BT_GPU_xxx should be included first
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifndef BTGPUUTILSDHAREDDEFS_H
#define BTGPUUTILSDHAREDDEFS_H
extern "C"
{
//Round a / b to nearest higher integer value
int BT_GPU_PREF(iDivUp)(int a, int b);
// compute grid and thread block size for a given number of elements
void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
void BT_GPU_PREF(freeArray)(void* devPtr);
void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
} // extern "C"
#endif // BTGPUUTILSDHAREDDEFS_H

View File

@@ -0,0 +1,5 @@
include "AMD"
-- include "Intel"
-- include "NVIDIA"

View File

@@ -0,0 +1,23 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "OpenCL_intialize_AMD"
initOpenCL_AMD()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
-- includedirs {"..","../../../../include/gpu_research"}
files {
"../main.cpp",
"../btOpenCLUtils.cpp",
"../btOpenCLUtils.h"
}
end

View File

@@ -0,0 +1,23 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "OpenCL_intialize_Intel"
initOpenCL_Intel()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
-- includedirs {"..","../../../../include/gpu_research"}
files {
"../main.cpp",
"../btOpenCLUtils.cpp",
"../btOpenCLUtils.h"
}
end

View File

@@ -0,0 +1,23 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "OpenCL_intialize_NVIDIA"
initOpenCL_NVIDIA()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
-- includedirs {"..","../../../../include/gpu_research"}
files {
"../main.cpp",
"../btOpenCLUtils.cpp",
"../btOpenCLUtils.h"
}
end

View File

@@ -0,0 +1,43 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#ifndef BT_OPENCL_INCLUDE_H
#define BT_OPENCL_INCLUDE_H
#ifdef __APPLE__
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <OpenCL/cl.h>
#endif
#else
#ifdef USE_MINICL
#include <MiniCL/cl.h>
#else
#include <CL/cl.h>
#ifdef _WIN32
#include "CL/cl_gl.h"
#endif //_WIN32
#endif
#endif //__APPLE__
#include <assert.h>
#include <stdio.h>
#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); }
#endif //BT_OPENCL_INCLUDE_H

View File

@@ -0,0 +1,731 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//original author: Roman Ponomarev
//cleanup by Erwin Coumans
#include <string.h>
#include "btOpenCLUtils.h"
#include <stdio.h>
#include <stdlib.h>
#define BT_MAX_CL_DEVICES 16 //who needs 16 devices?
#ifdef _WIN32
#include <Windows.h>
#include <assert.h>
#define btAssert assert
#endif
//Set the preferred platform vendor using the OpenCL SDK
static char* spPlatformVendor =
#if defined(CL_PLATFORM_MINI_CL)
"MiniCL, SCEA";
#elif defined(CL_PLATFORM_AMD)
"Advanced Micro Devices, Inc.";
#elif defined(CL_PLATFORM_NVIDIA)
"NVIDIA Corporation";
#elif defined(CL_PLATFORM_INTEL)
"Intel(R) Corporation";
#else
"Unknown Vendor";
#endif
#ifndef CL_PLATFORM_MINI_CL
#ifdef _WIN32
#include "CL/cl_gl.h"
#endif //_WIN32
#endif
int btOpenCLUtils::getNumPlatforms(cl_int* pErrNum)
{
cl_uint numPlatforms=0;
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if(ciErrNum != CL_SUCCESS)
{
if(pErrNum != NULL)
*pErrNum = ciErrNum;
}
return numPlatforms;
}
const char* btOpenCLUtils::getSdkVendorName()
{
return spPlatformVendor;
}
cl_platform_id btOpenCLUtils::getPlatform(int platformIndex, cl_int* pErrNum)
{
cl_platform_id platform = 0;
cl_uint numPlatforms;
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if (platformIndex>=0 && platformIndex<numPlatforms)
{
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(ciErrNum != CL_SUCCESS)
{
if(pErrNum != NULL)
*pErrNum = ciErrNum;
return platform;
}
platform = platforms[platformIndex];
delete[] platforms;
}
return platform;
}
void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo& platformInfo)
{
cl_int ciErrNum;
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VENDOR,BT_MAX_STRING_LENGTH,platformInfo.m_platformVendor,NULL);
oclCHECKERROR(ciErrNum,CL_SUCCESS);
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_NAME,BT_MAX_STRING_LENGTH,platformInfo.m_platformName,NULL);
oclCHECKERROR(ciErrNum,CL_SUCCESS);
ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VERSION,BT_MAX_STRING_LENGTH,platformInfo.m_platformVersion,NULL);
oclCHECKERROR(ciErrNum,CL_SUCCESS);
}
cl_context btOpenCLUtils::createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
{
cl_context retContext = 0;
cl_int ciErrNum=0;
/*
* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
* implementation thinks we should be using.
*/
cl_context_properties cps[7] = {0,0,0,0,0,0,0};
cps[0] = CL_CONTEXT_PLATFORM;
cps[1] = (cl_context_properties)platform;
if (pGLContext && pGLDC)
{
cps[2] = CL_GL_CONTEXT_KHR;
cps[3] = (cl_context_properties)pGLContext;
cps[4] = CL_WGL_HDC_KHR;
cps[5] = (cl_context_properties)pGLDC;
}
cl_uint num_entries = BT_MAX_CL_DEVICES;
cl_device_id devices[BT_MAX_CL_DEVICES];
cl_uint num_devices=-1;
ciErrNum = clGetDeviceIDs(
platform,
deviceType,
num_entries,
devices,
&num_devices);
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
if (pGLContext)
{
//search for the GPU that relates to the OpenCL context
for (int i=0;i<num_devices;i++)
{
retContext = clCreateContext(cprops,1,&devices[i],NULL,NULL,&ciErrNum);
if (ciErrNum==CL_SUCCESS)
break;
}
}
else
{
if (preferredDeviceIndex>=0 && preferredDeviceIndex<num_devices)
{
//create a context of the preferred device index
retContext = clCreateContext(cprops,1,&devices[preferredDeviceIndex],NULL,NULL,&ciErrNum);
} else
{
//create a context of all devices
retContext = clCreateContext(cprops,num_devices,devices,NULL,NULL,&ciErrNum);
}
}
if(pErrNum != NULL)
{
*pErrNum = ciErrNum;
};
return retContext;
}
cl_context btOpenCLUtils::createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC , int preferredDeviceIndex, int preferredPlatformIndex)
{
cl_uint numPlatforms;
cl_context retContext = 0;
cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if(ciErrNum != CL_SUCCESS)
{
if(pErrNum != NULL) *pErrNum = ciErrNum;
return NULL;
}
if(numPlatforms > 0)
{
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(ciErrNum != CL_SUCCESS)
{
if(pErrNum != NULL) *pErrNum = ciErrNum;
return NULL;
}
int i;
for ( i = 0; i < numPlatforms; ++i)
{
char pbuf[128];
ciErrNum = clGetPlatformInfo( platforms[i],
CL_PLATFORM_VENDOR,
sizeof(pbuf),
pbuf,
NULL);
if(ciErrNum != CL_SUCCESS)
{
if(pErrNum != NULL) *pErrNum = ciErrNum;
return NULL;
}
if (preferredPlatformIndex>=0 && i==preferredPlatformIndex)
{
cl_platform_id tmpPlatform = platforms[0];
platforms[0] = platforms[i];
platforms[i] = tmpPlatform;
break;
} else
{
if(!strcmp(pbuf, spPlatformVendor))
{
cl_platform_id tmpPlatform = platforms[0];
platforms[0] = platforms[i];
platforms[i] = tmpPlatform;
break;
}
}
}
for (i = 0; i < numPlatforms; ++i)
{
cl_platform_id platform = platforms[i];
assert(platform);
retContext = btOpenCLUtils::createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex);
if (retContext)
{
// printf("OpenCL platform details:\n");
btOpenCLPlatformInfo platformInfo;
btOpenCLUtils::getPlatformInfo(platform, platformInfo);
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
break;
}
}
delete[] platforms;
}
return retContext;
}
//////////////////////////////////////////////////////////////////////////////
//! Gets the id of the nth device from the context
//!
//! @return the id or -1 when out of range
//! @param cxMainContext OpenCL context
//! @param device_idx index of the device of interest
//////////////////////////////////////////////////////////////////////////////
cl_device_id btOpenCLUtils::getDevice(cl_context cxMainContext, int deviceIndex)
{
size_t szParmDataBytes;
cl_device_id* cdDevices;
// get the list of devices associated with context
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
if( szParmDataBytes / sizeof(cl_device_id) < deviceIndex ) {
return (cl_device_id)-1;
}
cdDevices = (cl_device_id*) malloc(szParmDataBytes);
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
cl_device_id device = cdDevices[deviceIndex];
free(cdDevices);
return device;
}
int btOpenCLUtils::getNumDevices(cl_context cxMainContext)
{
size_t szParamDataBytes;
clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
int device_count = (int) szParamDataBytes/ sizeof(cl_device_id);
return device_count;
}
void btOpenCLUtils::printDeviceInfo(cl_device_id device)
{
btOpenCLDeviceInfo info;
getDeviceInfo(device,info);
printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
if( info.m_deviceType & CL_DEVICE_TYPE_CPU )
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
if( info.m_deviceType & CL_DEVICE_TYPE_GPU )
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
if( info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR )
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
if( info.m_deviceType & CL_DEVICE_TYPE_DEFAULT )
printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize/ (1024 * 1024)));
printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize/ (1024 * 1024)));
printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport== CL_TRUE ? "yes" : "no");
printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
if( info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE )
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
if( info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE )
printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
printf("\n CL_DEVICE_IMAGE <dim>");
printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
if (info.m_deviceExtensions != 0)
printf("\n CL_DEVICE_EXTENSIONS:%s\n",info.m_deviceExtensions);
else
printf(" CL_DEVICE_EXTENSIONS: None\n");
printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong,info.m_vecWidthFloat, info.m_vecWidthDouble);
}
void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info)
{
// CL_DEVICE_NAME
clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info.m_deviceName, NULL);
// CL_DEVICE_VENDOR
clGetDeviceInfo(device, CL_DEVICE_VENDOR, BT_MAX_STRING_LENGTH, &info.m_deviceVendor, NULL);
// CL_DRIVER_VERSION
clGetDeviceInfo(device, CL_DRIVER_VERSION, BT_MAX_STRING_LENGTH, &info.m_driverVersion, NULL);
// CL_DEVICE_INFO
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info.m_deviceType, NULL);
// CL_DEVICE_MAX_COMPUTE_UNITS
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info.m_computeUnits), &info.m_computeUnits, NULL);
// CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info.m_workitemDims), &info.m_workitemDims, NULL);
// CL_DEVICE_MAX_WORK_ITEM_SIZES
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info.m_workItemSize), &info.m_workItemSize, NULL);
// CL_DEVICE_MAX_WORK_GROUP_SIZE
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info.m_workgroupSize), &info.m_workgroupSize, NULL);
// CL_DEVICE_MAX_CLOCK_FREQUENCY
clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info.m_clockFrequency), &info.m_clockFrequency, NULL);
// CL_DEVICE_ADDRESS_BITS
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info.m_addressBits), &info.m_addressBits, NULL);
// CL_DEVICE_MAX_MEM_ALLOC_SIZE
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info.m_maxMemAllocSize), &info.m_maxMemAllocSize, NULL);
// CL_DEVICE_GLOBAL_MEM_SIZE
clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info.m_globalMemSize), &info.m_globalMemSize, NULL);
// CL_DEVICE_ERROR_CORRECTION_SUPPORT
clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info.m_errorCorrectionSupport), &info.m_errorCorrectionSupport, NULL);
// CL_DEVICE_LOCAL_MEM_TYPE
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info.m_localMemType), &info.m_localMemType, NULL);
// CL_DEVICE_LOCAL_MEM_SIZE
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info.m_localMemSize), &info.m_localMemSize, NULL);
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info.m_constantBufferSize), &info.m_constantBufferSize, NULL);
// CL_DEVICE_QUEUE_PROPERTIES
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info.m_queueProperties), &info.m_queueProperties, NULL);
// CL_DEVICE_IMAGE_SUPPORT
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info.m_imageSupport), &info.m_imageSupport, NULL);
// CL_DEVICE_MAX_READ_IMAGE_ARGS
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info.m_maxReadImageArgs), &info.m_maxReadImageArgs, NULL);
// CL_DEVICE_MAX_WRITE_IMAGE_ARGS
clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info.m_maxWriteImageArgs), &info.m_maxWriteImageArgs, NULL);
// CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info.m_image2dMaxWidth, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info.m_image2dMaxHeight, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info.m_image3dMaxWidth, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info.m_image3dMaxHeight, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info.m_image3dMaxDepth, NULL);
// CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, BT_MAX_STRING_LENGTH, &info.m_deviceExtensions, NULL);
// CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info.m_vecWidthChar, NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info.m_vecWidthShort, NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info.m_vecWidthInt, NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info.m_vecWidthLong, NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info.m_vecWidthFloat, NULL);
clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info.m_vecWidthDouble, NULL);
}
static const char* strip2(const char* name, const char* pattern)
{
size_t const patlen = strlen(pattern);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
{
patcnt++;
}
return oriptr;
}
cl_program btOpenCLUtils::compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum, const char* additionalMacros , const char* clFileNameForCaching)
{
cl_program m_cpProgram=0;
cl_int status;
char binaryFileName[522];
if (clFileNameForCaching)
{
char deviceName[256];
char driverVersion[256];
clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
const char* strippedName = strip2(clFileNameForCaching,"\\");
strippedName = strip2(strippedName,"/");
sprintf_s(binaryFileName,"cache/%s.%s.%s.bin",strippedName, deviceName,driverVersion );
//printf("searching for %s\n", binaryFileName);
bool fileUpToDate = false;
bool binaryFileValid=false;
FILETIME modtimeBinary;
#ifdef _WIN32
CreateDirectory("cache",0);
{
HANDLE binaryFileHandle = CreateFile(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
if (binaryFileHandle ==INVALID_HANDLE_VALUE)
{
DWORD errorCode;
errorCode = GetLastError();
switch (errorCode)
{
case ERROR_FILE_NOT_FOUND:
{
printf("\nCached file not found %s\n", binaryFileName);
break;
}
case ERROR_PATH_NOT_FOUND:
{
printf("\nCached file path not found %s\n", binaryFileName);
break;
}
default:
{
printf("\nFailed reading cached file with errorCode = %d\n", errorCode);
}
}
} else
{
if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0)
{
DWORD errorCode;
errorCode = GetLastError();
printf("\nGetFileTime errorCode = %d\n", errorCode);
} else
{
binaryFileValid = true;
}
CloseHandle(binaryFileHandle);
}
if (binaryFileValid)
{
HANDLE srcFileHandle = CreateFile(clFileNameForCaching,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
if (srcFileHandle!=INVALID_HANDLE_VALUE)
{
FILETIME modtimeSrc;
if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0)
{
DWORD errorCode;
errorCode = GetLastError();
printf("\nGetFileTime errorCode = %d\n", errorCode);
}
if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime)
||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
{
fileUpToDate=true;
} else
{
printf("\nCached binary file out-of-date (%s)\n",binaryFileName);
}
CloseHandle(srcFileHandle);
}
else
{
#ifdef _DEBUG
DWORD errorCode;
errorCode = GetLastError();
switch (errorCode)
{
case ERROR_FILE_NOT_FOUND:
{
printf("\nSrc file not found %s\n", clFileNameForCaching);
break;
}
case ERROR_PATH_NOT_FOUND:
{
printf("\nSrc path not found %s\n", clFileNameForCaching);
break;
}
default:
{
printf("\nnSrc file reading errorCode = %d\n", errorCode);
}
}
//we should make sure the src file exists so we can verify the timestamp with binary
assert(0);
#else
//if we cannot find the source, assume it is OK in release builds
fileUpToDate = true;
#endif
}
}
}
if( fileUpToDate)
{
FILE* file = fopen(binaryFileName, "rb");
if (file)
{
fseek( file, 0L, SEEK_END );
size_t binarySize = ftell( file );
rewind( file );
char* binary = new char[binarySize];
fread( binary, sizeof(char), binarySize, file );
fclose( file );
m_cpProgram = clCreateProgramWithBinary( clContext, 1,&device, &binarySize, (const unsigned char**)&binary, 0, &status );
btAssert( status == CL_SUCCESS );
status = clBuildProgram( m_cpProgram, 1, &device, additionalMacros, 0, 0 );
btAssert( status == CL_SUCCESS );
if( status != CL_SUCCESS )
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
build_log[ret_val_size] = '\0';
printf("%s\n", build_log);
delete build_log;
btAssert(0);
m_cpProgram = 0;
}
delete[] binary;
}
}
#endif //_WIN32
}
if (!m_cpProgram)
{
cl_kernel kernel;
cl_int localErrNum;
size_t program_length = strlen(kernelSource);
m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
if (localErrNum!= CL_SUCCESS)
{
if (pErrNum)
*pErrNum = localErrNum;
return 0;
}
// Build the program with 'mad' Optimization option
#ifdef MAC
char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
#else
//const char* flags = "-DGUID_ARG= -fno-alias";
const char* flags = "-DGUID_ARG= ";
#endif
char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5];
sprintf(compileFlags, "%s %s", flags, additionalMacros);
localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
if (localErrNum!= CL_SUCCESS)
{
char *build_log;
size_t ret_val_size;
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
build_log = new char[ret_val_size+1];
clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
// to be carefully, terminate with \0
// there's no information in the reference whether the string is 0 terminated or not
build_log[ret_val_size] = '\0';
printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
delete[] build_log;
if (pErrNum)
*pErrNum = localErrNum;
return 0;
}
if( clFileNameForCaching )
{ // write to binary
cl_uint numAssociatedDevices;
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 );
btAssert( status == CL_SUCCESS );
if (numAssociatedDevices==1)
{
size_t binarySize;
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 );
btAssert( status == CL_SUCCESS );
char* binary = new char[binarySize];
status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 );
btAssert( status == CL_SUCCESS );
{
FILE* file = fopen(binaryFileName, "wb");
if (file)
{
fwrite( binary, sizeof(char), binarySize, file );
fclose( file );
} else
{
printf("cannot write file %s\n", binaryFileName);
}
}
delete [] binary;
}
}
delete [] compileFlags;
}
return m_cpProgram;
}
cl_kernel btOpenCLUtils::compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros )
{
printf("compiling kernel %s ",kernelName);
cl_kernel kernel;
cl_int localErrNum;
size_t program_length = strlen(kernelSource);
cl_program m_cpProgram = prog;
if (!m_cpProgram)
{
m_cpProgram = compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros);
}
// Create the kernel
kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
if (localErrNum != CL_SUCCESS)
{
printf("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
if (pErrNum)
*pErrNum = localErrNum;
return 0;
}
if (!prog && m_cpProgram)
{
clReleaseProgram(m_cpProgram);
}
printf("ready. \n");
if (pErrNum)
*pErrNum = CL_SUCCESS;
return kernel;
}

View File

@@ -0,0 +1,104 @@
/*
Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//original author: Roman Ponomarev
//cleanup by Erwin Coumans
#ifndef BT_OPENCL_UTILS_H
#define BT_OPENCL_UTILS_H
#include "btOpenCLInclude.h"
#define BT_MAX_STRING_LENGTH 1024
struct btOpenCLDeviceInfo
{
char m_deviceName[BT_MAX_STRING_LENGTH];
char m_deviceVendor[BT_MAX_STRING_LENGTH];
char m_driverVersion[BT_MAX_STRING_LENGTH];
char m_deviceExtensions[BT_MAX_STRING_LENGTH];
cl_device_type m_deviceType;
cl_uint m_computeUnits;
size_t m_workitemDims;
size_t m_workItemSize[3];
size_t m_image2dMaxWidth;
size_t m_image2dMaxHeight;
size_t m_image3dMaxWidth;
size_t m_image3dMaxHeight;
size_t m_image3dMaxDepth;
size_t m_workgroupSize;
cl_uint m_clockFrequency;
cl_ulong m_constantBufferSize;
cl_ulong m_localMemSize;
cl_ulong m_globalMemSize;
cl_bool m_errorCorrectionSupport;
cl_device_local_mem_type m_localMemType;
cl_uint m_maxReadImageArgs;
cl_uint m_maxWriteImageArgs;
cl_uint m_addressBits;
cl_ulong m_maxMemAllocSize;
cl_command_queue_properties m_queueProperties;
cl_bool m_imageSupport;
cl_uint m_vecWidthChar;
cl_uint m_vecWidthShort;
cl_uint m_vecWidthInt;
cl_uint m_vecWidthLong;
cl_uint m_vecWidthFloat;
cl_uint m_vecWidthDouble;
};
struct btOpenCLPlatformInfo
{
char m_platformVendor[BT_MAX_STRING_LENGTH];
char m_platformName[BT_MAX_STRING_LENGTH];
char m_platformVersion[BT_MAX_STRING_LENGTH];
};
class btOpenCLUtils
{
public:
/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
static cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1);
static int getNumDevices(cl_context cxMainContext);
static cl_device_id getDevice(cl_context cxMainContext, int nr);
static void getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo& info);
static void printDeviceInfo(cl_device_id device);
static cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" );
//optional
static cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0);
//the following optional APIs provide access using specific platform information
static int getNumPlatforms(cl_int* pErrNum=0);
///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
static cl_platform_id getPlatform(int nr, cl_int* pErrNum=0);
static void getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo& platformInfo);
static const char* getSdkVendorName();
static cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1);
};
#endif // BT_OPENCL_UTILS_H

View File

@@ -0,0 +1,92 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///original author: Erwin Coumans
#include "btOpenCLUtils.h"
#include <stdio.h>
cl_context g_cxMainContext;
cl_command_queue g_cqCommandQue;
int main(int argc, char* argv[])
{
int ciErrNum = 0;
cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
int numPlatforms = btOpenCLUtils::getNumPlatforms();
printf("Num Platforms = %d\n", numPlatforms);
for (int i=0;i<numPlatforms;i++)
{
cl_platform_id platform = btOpenCLUtils::getPlatform(i);
btOpenCLPlatformInfo platformInfo;
btOpenCLUtils::getPlatformInfo(platform,platformInfo);
printf("--------------------------------\n");
printf("Platform info for platform nr %d:\n",i);
printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
cl_context context = btOpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
int numDevices = btOpenCLUtils::getNumDevices(context);
printf("Num Devices = %d\n", numDevices);
for (int j=0;j<numDevices;j++)
{
cl_device_id dev = btOpenCLUtils::getDevice(context,j);
btOpenCLDeviceInfo devInfo;
btOpenCLUtils::getDeviceInfo(dev,devInfo);
btOpenCLUtils::printDeviceInfo(dev);
}
clReleaseContext(context);
}
///Easier method to initialize OpenCL using createContextFromType for a GPU
deviceType = CL_DEVICE_TYPE_GPU;
void* glCtx=0;
void* glDC = 0;
printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
for (int i=0;i<numDev;i++)
{
cl_device_id device;
device = btOpenCLUtils::getDevice(g_cxMainContext,i);
btOpenCLDeviceInfo clInfo;
btOpenCLUtils::getDeviceInfo(device,clInfo);
btOpenCLUtils::printDeviceInfo(device);
// create a command-queue
g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
oclCHECKERROR(ciErrNum, CL_SUCCESS);
//normally you would create and execute kernels using this command queue
clReleaseCommandQueue(g_cqCommandQue);
}
clReleaseContext(g_cxMainContext);
return 0;
}

View File

@@ -0,0 +1,4 @@
include "AMD"
include "Intel"
include "NVIDIA"

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "OpenCL_broadphase_benchmark_AMD"
initOpenCL_AMD()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "OpenCL_broadphase_benchmark_Intel"
initOpenCL_Intel()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,49 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "OpenCL_broadphase_benchmark_NVIDIA"
initOpenCL_NVIDIA()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../findPairsOpenCL.cpp",
"../findPairsOpenCL.h",
"../btGridBroadphaseCL.cpp",
"../btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

Some files were not shown because too many files have changed in this diff Show More