Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

This commit is contained in:
erwin.coumans
2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
hasCL = findOpenCL_AMD()
if (hasCL) then
project "OpenCL_gpu_rigidbody_pipeline_AMD"
initOpenCL_AMD()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../btConvexUtility.cpp",
"../btConvexUtility.h",
"../btGpuNarrowPhaseAndSolver.cpp",
"../btGpuNarrowPhaseAndSolver.h",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
"../../../../../src/LinearMath/btConvexHullComputer.h",
"../../broadphase_benchmark/findPairsOpenCL.cpp",
"../../broadphase_benchmark/findPairsOpenCL.h",
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
"../../broadphase_benchmark/btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,91 @@
#ifndef COMMAND_LINE_ARGS_H
#define COMMAND_LINE_ARGS_H
/******************************************************************************
* Command-line parsing
******************************************************************************/
#include <map>
#include <algorithm>
#include <string>
#include <sstream>
class CommandLineArgs
{
protected:
std::map<std::string, std::string> pairs;
public:
// Constructor
CommandLineArgs(int argc, char **argv)
{
using namespace std;
for (int i = 1; i < argc; i++)
{
string arg = argv[i];
if ((arg[0] != '-') || (arg[1] != '-')) {
continue;
}
string::size_type pos;
string key, val;
if ((pos = arg.find( '=')) == string::npos) {
key = string(arg, 2, arg.length() - 2);
val = "";
} else {
key = string(arg, 2, pos - 2);
val = string(arg, pos + 1, arg.length() - 1);
}
pairs[key] = val;
}
}
bool CheckCmdLineFlag(const char* arg_name)
{
using namespace std;
map<string, string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {
return true;
}
return false;
}
template <typename T>
void GetCmdLineArgument(const char *arg_name, T &val);
int ParsedArgc()
{
return pairs.size();
}
};
template <typename T>
void CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
{
using namespace std;
map<string, string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {
istringstream strstream(itr->second);
strstream >> val;
}
}
template <>
void CommandLineArgs::GetCmdLineArgument<char*>(const char* arg_name, char* &val)
{
using namespace std;
map<string, string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {
string s = itr->second;
val = (char*) malloc(sizeof(char) * (s.length() + 1));
strcpy(val, s.c_str());
} else {
val = NULL;
}
}
#endif //COMMAND_LINE_ARGS_H

View File

@@ -0,0 +1,58 @@
hasCL = findOpenCL_Intel()
if (hasCL) then
project "OpenCL_gpu_rigidbody_pipeline_Intel"
initOpenCL_Intel()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../btConvexUtility.cpp",
"../btConvexUtility.h",
"../btGpuNarrowPhaseAndSolver.cpp",
"../btGpuNarrowPhaseAndSolver.h",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
"../../../../../src/LinearMath/btConvexHullComputer.h",
"../../broadphase_benchmark/findPairsOpenCL.cpp",
"../../broadphase_benchmark/findPairsOpenCL.h",
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
"../../broadphase_benchmark/btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,57 @@
hasCL = findOpenCL_NVIDIA()
if (hasCL) then
project "OpenCL_gpu_rigidbody_pipeline_NVIDIA"
initOpenCL_NVIDIA()
language "C++"
kind "ConsoleApp"
targetdir "../../../bin"
initOpenGL()
initGlut()
initGlew()
includedirs {
"../../../rendering/BulletMath",
"../../primitives",
"../../../../../src"
}
files {
"../main.cpp",
"../btConvexUtility.cpp",
"../btConvexUtility.h",
"../btGpuNarrowPhaseAndSolver.cpp",
"../btGpuNarrowPhaseAndSolver.h",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.cpp",
"../../../dynamics/basic_demo/ConvexHeightFieldShape.h",
"../../../../../src/LinearMath/btConvexHullComputer.cpp",
"../../../../../src/LinearMath/btConvexHullComputer.h",
"../../broadphase_benchmark/findPairsOpenCL.cpp",
"../../broadphase_benchmark/findPairsOpenCL.h",
"../../broadphase_benchmark/btGridBroadphaseCL.cpp",
"../../broadphase_benchmark/btGridBroadphaseCL.h",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.cpp",
"../../3dGridBroadphase/Shared/bt3dGridBroadphaseOCL.h",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.cpp",
"../../3dGridBroadphase/Shared/btGpu3DGridBroadphase.h",
"../../../../../src/LinearMath/btAlignedAllocator.cpp",
"../../../../../src/LinearMath/btQuickprof.cpp",
"../../../../../src/LinearMath/btQuickprof.h",
"../../../../../src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp",
"../../../../../src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp",
"../../basic_initialize/btOpenCLUtils.cpp",
"../../basic_initialize/btOpenCLUtils.h",
"../../opengl_interop/btOpenCLGLInteropBuffer.cpp",
"../../opengl_interop/btOpenCLGLInteropBuffer.h",
"../../opengl_interop/btStopwatch.cpp",
"../../opengl_interop/btStopwatch.h"
}
end

View File

@@ -0,0 +1,240 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "btConvexUtility.h"
#include "LinearMath/btConvexHullComputer.h"
#include "LinearMath/btGrahamScan2dConvexHull.h"
#include "LinearMath/btQuaternion.h"
bool btConvexUtility::initializePolyhedralFeatures(const btAlignedObjectArray<btVector3>& orgVertices, bool mergeCoplanarTriangles)
{
btConvexHullComputer conv;
conv.compute(&orgVertices[0].getX(), sizeof(btVector3),orgVertices.size(),0.f,0.f);
btAlignedObjectArray<btVector3> faceNormals;
int numFaces = conv.faces.size();
faceNormals.resize(numFaces);
btConvexHullComputer* convexUtil = &conv;
btAlignedObjectArray<btFace> tmpFaces;
tmpFaces.resize(numFaces);
int numVertices = convexUtil->vertices.size();
m_vertices.resize(numVertices);
for (int p=0;p<numVertices;p++)
{
m_vertices[p] = convexUtil->vertices[p];
}
for (int i=0;i<numFaces;i++)
{
int face = convexUtil->faces[i];
//printf("face=%d\n",face);
const btConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face];
const btConvexHullComputer::Edge* edge = firstEdge;
btVector3 edges[3];
int numEdges = 0;
//compute face normals
btScalar maxCross2 = 0.f;
int chosenEdge = -1;
do
{
int src = edge->getSourceVertex();
tmpFaces[i].m_indices.push_back(src);
int targ = edge->getTargetVertex();
btVector3 wa = convexUtil->vertices[src];
btVector3 wb = convexUtil->vertices[targ];
btVector3 newEdge = wb-wa;
newEdge.normalize();
if (numEdges<2)
edges[numEdges++] = newEdge;
edge = edge->getNextEdgeOfFace();
} while (edge!=firstEdge);
btScalar planeEq = 1e30f;
if (numEdges==2)
{
faceNormals[i] = edges[0].cross(edges[1]);
faceNormals[i].normalize();
tmpFaces[i].m_plane[0] = faceNormals[i].getX();
tmpFaces[i].m_plane[1] = faceNormals[i].getY();
tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
tmpFaces[i].m_plane[3] = planeEq;
}
else
{
btAssert(0);//degenerate?
faceNormals[i].setZero();
}
for (int v=0;v<tmpFaces[i].m_indices.size();v++)
{
btScalar eq = m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
if (planeEq>eq)
{
planeEq=eq;
}
}
tmpFaces[i].m_plane[3] = -planeEq;
}
//merge coplanar faces
btScalar faceWeldThreshold= 0.999f;
btAlignedObjectArray<int> todoFaces;
for (int i=0;i<tmpFaces.size();i++)
todoFaces.push_back(i);
while (todoFaces.size())
{
btAlignedObjectArray<int> coplanarFaceGroup;
int refFace = todoFaces[todoFaces.size()-1];
coplanarFaceGroup.push_back(refFace);
btFace& faceA = tmpFaces[refFace];
todoFaces.pop_back();
btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
for (int j=todoFaces.size()-1;j>=0;j--)
{
int i = todoFaces[j];
btFace& faceB = tmpFaces[i];
btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
{
coplanarFaceGroup.push_back(i);
todoFaces.remove(i);
}
}
bool did_merge = false;
if (mergeCoplanarTriangles && coplanarFaceGroup.size()>1)
{
//do the merge: use Graham Scan 2d convex hull
btAlignedObjectArray<GrahamVector2> orgpoints;
for (int i=0;i<coplanarFaceGroup.size();i++)
{
btFace& face = tmpFaces[coplanarFaceGroup[i]];
btVector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
btVector3 xyPlaneNormal(0,0,1);
btQuaternion rotationArc = shortestArcQuat(faceNormal,xyPlaneNormal);
for (int f=0;f<face.m_indices.size();f++)
{
int orgIndex = face.m_indices[f];
btVector3 pt = m_vertices[orgIndex];
btVector3 rotatedPt = quatRotate(rotationArc,pt);
rotatedPt.setZ(0);
bool found = false;
for (int i=0;i<orgpoints.size();i++)
{
//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
if (orgpoints[i].m_orgIndex == orgIndex)
{
found=true;
break;
}
}
if (!found)
orgpoints.push_back(GrahamVector2(rotatedPt,orgIndex));
}
}
btFace combinedFace;
for (int i=0;i<4;i++)
combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
btAlignedObjectArray<GrahamVector2> hull;
GrahamScanConvexHull2D(orgpoints,hull);
for (int i=0;i<hull.size();i++)
{
combinedFace.m_indices.push_back(hull[i].m_orgIndex);
for(int k = 0; k < orgpoints.size(); k++) {
if(orgpoints[k].m_orgIndex == hull[i].m_orgIndex) {
orgpoints[k].m_orgIndex = -1; // invalidate...
break;
}
}
}
// are there rejected vertices?
bool reject_merge = false;
for(int i = 0; i < orgpoints.size(); i++) {
if(orgpoints[i].m_orgIndex == -1)
continue; // this is in the hull...
// this vertex is rejected -- is anybody else using this vertex?
for(int j = 0; j < tmpFaces.size(); j++) {
btFace& face = tmpFaces[j];
// is this a face of the current coplanar group?
bool is_in_current_group = false;
for(int k = 0; k < coplanarFaceGroup.size(); k++) {
if(coplanarFaceGroup[k] == j) {
is_in_current_group = true;
break;
}
}
if(is_in_current_group) // ignore this face...
continue;
// does this face use this rejected vertex?
for(int v = 0; v < face.m_indices.size(); v++) {
if(face.m_indices[v] == orgpoints[i].m_orgIndex) {
// this rejected vertex is used in another face -- reject merge
reject_merge = true;
break;
}
}
if(reject_merge)
break;
}
if(reject_merge)
break;
}
if(!reject_merge) {
// do this merge!
did_merge = true;
m_faces.push_back(combinedFace);
}
}
if(!did_merge)
{
for (int i=0;i<coplanarFaceGroup.size();i++)
{
m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
}
}
}
return true;
}

View File

@@ -0,0 +1,41 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#ifndef _BT_CONVEX_UTILITY_H
#define _BT_CONVEX_UTILITY_H
#include "LinearMath/btAlignedObjectArray.h"
#include "LinearMath/btVector3.h"
struct btFace
{
btAlignedObjectArray<int> m_indices;
// btAlignedObjectArray<int> m_connectedFaces;
btScalar m_plane[4];
};
class btConvexUtility
{
public:
btAlignedObjectArray<btVector3> m_vertices;
btAlignedObjectArray<btFace> m_faces;
bool initializePolyhedralFeatures(const btAlignedObjectArray<btVector3>& orgVertices, bool mergeCoplanarTriangles);
};
#endif

View File

@@ -0,0 +1,730 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#include "btGpuNarrowphaseAndSolver.h"
//#include "CustomConvexShape.h"
//#include "CustomConvexPairCollision.h"
#include "LinearMath/btQuickprof.h"
//#include "BulletDynamics/Dynamics/btRigidBody.h"
#include "Adl/Adl.h"
#include "../../dynamics/basic_demo/Stubs/AdlMath.h"
#include "../../dynamics/basic_demo/Stubs/AdlContact4.h"
#include "../../dynamics/basic_demo/Stubs/AdlQuaternion.h"
#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
#include "../../dynamics/basic_demo/Stubs/Solver.h"
#include <AdlPrimitives/Sort/RadixSort32.h>
int gpuBatchContacts = 1;
int numPairsOut =0;
struct CPUSolveData
{
u32 m_n[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT];
u32 m_offset[adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT];
};
struct ParallelSolveData
{
adl::Buffer<u32>* m_numConstraints;
adl::Buffer<u32>* m_offsets;
};
struct CustomDispatchData
{
adl::DeviceCL* m_deviceCL;
adl::Device* m_deviceHost;
ShapeDataType m_ShapeBuffer;
adl::HostBuffer<ConvexHeightField*>* m_shapePointers;
adl::HostBuffer<int2>* m_pBufPairsCPU;
adl::Buffer<int2>* m_convexPairsOutGPU;
adl::Buffer<int2>* m_planePairs;
adl::Buffer<Contact4>* m_pBufContactOutGPU;
adl::HostBuffer<Contact4>* m_pBufContactOutCPU;
adl::ChNarrowphase<adl::TYPE_CL>::Data* m_Data;
adl::HostBuffer<RigidBodyBase::Body>* m_bodyBufferCPU;
adl::Buffer<RigidBodyBase::Body>* m_bodyBufferGPU;
adl::Buffer<RigidBodyBase::Inertia>* m_inertiaBufferCPU;
adl::Buffer<RigidBodyBase::Inertia>* m_inertiaBufferGPU;
adl::Solver<adl::TYPE_CL>::Data* m_solverDataGPU;
SolverData m_contactCGPU;
void* m_frictionCGPU;
int m_numAcceleratedShapes;
int m_numAcceleratedRigidBodies;
};
btGpuNarrowphaseAndSolver::btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL)
:m_internalData(0) ,m_planeBodyIndex(-1)
{
if (deviceCL)
{
m_internalData = new CustomDispatchData();
memset(m_internalData,0,sizeof(CustomDispatchData));
adl::DeviceUtils::Config cfg;
m_internalData->m_deviceCL = deviceCL;
m_internalData->m_deviceHost = adl::DeviceUtils::allocate( adl::TYPE_HOST, cfg );
m_internalData->m_pBufPairsCPU = new adl::HostBuffer<int2>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_convexPairsOutGPU = new adl::Buffer<int2>(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_planePairs = new adl::Buffer<int2>(m_internalData->m_deviceCL,MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_pBufContactOutCPU = new adl::HostBuffer<Contact4>(m_internalData->m_deviceHost, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_bodyBufferCPU = new adl::HostBuffer<RigidBodyBase::Body>(m_internalData->m_deviceHost, MAX_CONVEX_BODIES_CL);
m_internalData->m_inertiaBufferCPU = new adl::Buffer<RigidBodyBase::Inertia>(m_internalData->m_deviceHost,MAX_CONVEX_BODIES_CL);
m_internalData->m_pBufContactOutGPU = new adl::Buffer<Contact4>(m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_inertiaBufferGPU = new adl::Buffer<RigidBodyBase::Inertia>(m_internalData->m_deviceCL,MAX_CONVEX_BODIES_CL);
m_internalData->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_bodyBufferGPU = new adl::Buffer<RigidBodyBase::Body>(m_internalData->m_deviceCL, MAX_CONVEX_BODIES_CL);
m_internalData->m_Data = adl::ChNarrowphase<adl::TYPE_CL>::allocate(m_internalData->m_deviceCL);
// m_internalData->m_DataCPU = adl::ChNarrowphase<adl::TYPE_HOST>::allocate(m_internalData->m_deviceHost);
m_internalData->m_ShapeBuffer = adl::ChNarrowphase<adl::TYPE_CL>::allocateShapeBuffer(m_internalData->m_deviceCL, MAX_CONVEX_SHAPES_CL);
m_internalData->m_shapePointers = new adl::HostBuffer<ConvexHeightField*>(m_internalData->m_deviceHost,MAX_CONVEX_SHAPES_CL);
m_internalData->m_numAcceleratedShapes = 0;
m_internalData->m_numAcceleratedRigidBodies = 0;
m_internalData->m_contactCGPU = adl::Solver<adl::TYPE_CL>::allocateConstraint4( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
m_internalData->m_frictionCGPU = adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_internalData->m_deviceCL, MAX_BROADPHASE_COLLISION_CL);
}
}
int btGpuNarrowphaseAndSolver::registerShape(ConvexHeightField* convexShape)
{
(*m_internalData->m_shapePointers)[m_internalData->m_numAcceleratedShapes] = convexShape;
adl::ChNarrowphase<adl::TYPE_CL>::setShape(m_internalData->m_ShapeBuffer, convexShape, m_internalData->m_numAcceleratedShapes, 0.01f);
return m_internalData->m_numAcceleratedShapes++;
}
cl_mem btGpuNarrowphaseAndSolver::getBodiesGpu()
{
return (cl_mem)m_internalData->m_bodyBufferGPU->m_ptr;
}
cl_mem btGpuNarrowphaseAndSolver::getBodyInertiasGpu()
{
return (cl_mem)m_internalData->m_inertiaBufferGPU->m_ptr;
}
int btGpuNarrowphaseAndSolver::registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation , bool writeToGpu)
{
assert(m_internalData->m_numAcceleratedRigidBodies< (MAX_CONVEX_BODIES_CL-1));
RigidBodyBase::Body& body = m_internalData->m_bodyBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies];
float friction = 1.f;
float restitution = 0.f;
body.m_frictionCoeff = friction;
body.m_restituitionCoeff = restitution;
body.m_angVel = make_float4(0.f);
body.m_linVel = make_float4(0.f);
body.m_pos = make_float4(position[0],position[1],position[2],0.f);
body.m_quat = make_float4(orientation[0],orientation[1],orientation[2],orientation[3]);
body.m_shapeIdx = shapeIndex;
if (shapeIndex<0)
{
body.m_shapeType = CollisionShape::SHAPE_PLANE;
m_planeBodyIndex = m_internalData->m_numAcceleratedRigidBodies;
} else
{
body.m_shapeType = CollisionShape::SHAPE_CONVEX_HEIGHT_FIELD;
}
body.m_invMass = mass? 1.f/mass : 0.f;
if (writeToGpu)
m_internalData->m_bodyBufferGPU->write(&body,1,m_internalData->m_numAcceleratedRigidBodies);
RigidBodyBase::Inertia& shapeInfo = m_internalData->m_inertiaBufferCPU->m_ptr[m_internalData->m_numAcceleratedRigidBodies];
if (mass==0.f)
{
shapeInfo.m_initInvInertia = mtZero();
shapeInfo.m_invInertia = mtZero();
} else
{
assert(body.m_shapeIdx>=0);
//approximate using the aabb of the shape
Aabb aabb = (*m_internalData->m_shapePointers)[shapeIndex]->m_aabb;
float4 halfExtents = (aabb.m_max - aabb.m_min);
float4 localInertia;
float lx=2.f*halfExtents.x;
float ly=2.f*halfExtents.y;
float lz=2.f*halfExtents.z;
localInertia = make_float4( (mass/12.0f) * (ly*ly + lz*lz),
(mass/12.0f) * (lx*lx + lz*lz),
(mass/12.0f) * (lx*lx + ly*ly));
float4 invLocalInertia;
invLocalInertia.x = 1.f/localInertia.x;
invLocalInertia.y = 1.f/localInertia.y;
invLocalInertia.z = 1.f/localInertia.z;
invLocalInertia.w = 0.f;
shapeInfo.m_initInvInertia = mtZero();
shapeInfo.m_initInvInertia.m_row[0].x = invLocalInertia.x;
shapeInfo.m_initInvInertia.m_row[1].y = invLocalInertia.y;
shapeInfo.m_initInvInertia.m_row[2].z = invLocalInertia.z;
Matrix3x3 m = qtGetRotationMatrix( body.m_quat);
Matrix3x3 mT = mtTranspose( m );
shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );
}
if (writeToGpu)
m_internalData->m_inertiaBufferGPU->write(&shapeInfo,1,m_internalData->m_numAcceleratedRigidBodies);
return m_internalData->m_numAcceleratedRigidBodies++;
}
void btGpuNarrowphaseAndSolver::writeAllBodiesToGpu()
{
m_internalData->m_bodyBufferGPU->write(m_internalData->m_bodyBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies);
m_internalData->m_inertiaBufferGPU->write( m_internalData->m_inertiaBufferCPU->m_ptr,m_internalData->m_numAcceleratedRigidBodies);
}
btGpuNarrowphaseAndSolver::~btGpuNarrowphaseAndSolver(void)
{
if (m_internalData)
{
delete m_internalData->m_pBufPairsCPU;
delete m_internalData->m_convexPairsOutGPU;
delete m_internalData->m_planePairs;
delete m_internalData->m_pBufContactOutGPU;
delete m_internalData->m_inertiaBufferGPU;
delete m_internalData->m_pBufContactOutCPU;
delete m_internalData->m_shapePointers;
adl::ChNarrowphase<adl::TYPE_CL>::deallocateShapeBuffer(m_internalData->m_ShapeBuffer);
delete m_internalData->m_inertiaBufferCPU;
adl::Solver<adl::TYPE_CL>::deallocateConstraint4( m_internalData->m_contactCGPU );
adl::Solver<adl::TYPE_CL>::deallocateFrictionConstraint( m_internalData->m_frictionCGPU );
delete m_internalData->m_bodyBufferGPU;
adl::Solver<adl::TYPE_CL>::deallocate( m_internalData->m_solverDataGPU);
delete m_internalData->m_bodyBufferCPU;
adl::ChNarrowphase<adl::TYPE_CL>::deallocate(m_internalData->m_Data);
adl::DeviceUtils::deallocate(m_internalData->m_deviceHost);
delete m_internalData;
}
}
void btGpuNarrowphaseAndSolver::computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs)
{
BT_PROFILE("computeContactsAndSolver");
bool bGPU = (m_internalData != 0);
int maxBodyIndex = m_internalData->m_numAcceleratedRigidBodies;
if (!maxBodyIndex)
return;
int numOfConvexRBodies = maxBodyIndex;
adl::ChNarrowphaseBase::Config cfgNP;
cfgNP.m_collisionMargin = 0.01f;
int nContactOut = 0;
//printf("convexPairsOut.m_size = %d\n",m_internalData->m_convexPairsOutGPU->m_size);
adl::Buffer<int2> broadphasePairsGPU;
broadphasePairsGPU.m_ptr = (int2*)broadphasePairs;
broadphasePairsGPU.m_size = numBroadphasePairs;
broadphasePairsGPU.m_device = m_internalData->m_deviceCL;
bool useCulling = true;
if (useCulling)
{
BT_PROFILE("ChNarrowphase::culling");
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
numPairsOut = adl::ChNarrowphase<adl::TYPE_CL>::culling(
m_internalData->m_Data,
&broadphasePairsGPU,
numBroadphasePairs,
m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer,
m_internalData->m_convexPairsOutGPU,
cfgNP);
}
{
BT_PROFILE("ChNarrowphase::execute");
if (useCulling)
{
if (m_planeBodyIndex>=0)
{
BT_PROFILE("ChNarrowphase:: plane versus convex");
//todo: get rid of this dynamic allocation
int2* hostPairs = new int2[m_internalData->m_numAcceleratedRigidBodies-1];
int index=0;
for (int i=0;i<m_internalData->m_numAcceleratedRigidBodies;i++)
{
if (i!=m_planeBodyIndex)
{
hostPairs[index].x = m_planeBodyIndex;
hostPairs[index].y = i;
index++;
}
}
assert(m_internalData->m_numAcceleratedRigidBodies-1 == index);
m_internalData->m_planePairs->write(hostPairs,index);
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
delete[]hostPairs;
//convex versus plane
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, m_internalData->m_planePairs, index, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer,
0,0,m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
}
//convex versus convex
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, m_internalData->m_convexPairsOutGPU,numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
} else
{
adl::ChNarrowphase<adl::TYPE_CL>::execute(m_internalData->m_Data, &broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
}
adl::DeviceUtils::waitForCompletion(m_internalData->m_deviceCL);
}
if (!nContactOut)
return;
bool useSolver = true;//true;//false;
if (useSolver)
{
float dt=1./60.;
adl::SolverBase::ConstraintCfg csCfg( dt );
csCfg.m_enableParallelSolve = true;
csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
csCfg.m_staticIdx = m_planeBodyIndex;
bool exposeInternalBatchImplementation=true;
adl::Solver<adl::TYPE_HOST>::Data* cpuSolverData = 0;
if (exposeInternalBatchImplementation)
{
BT_PROFILE("Batching");
cpuSolverData = adl::Solver<adl::TYPE_HOST>::allocate( m_internalData->m_deviceHost, nContactOut);
adl::Buffer<Contact4>* contactsIn = m_internalData->m_pBufContactOutGPU;
const adl::Buffer<RigidBodyBase::Body>* bodyBuf = m_internalData->m_bodyBufferGPU;
void* additionalData = m_internalData->m_frictionCGPU;
const adl::Buffer<RigidBodyBase::Inertia>* shapeBuf = m_internalData->m_inertiaBufferGPU;
SolverData contactCOut = m_internalData->m_contactCGPU;
int nContacts = nContactOut;
bool useCPU=false;
if (useCPU)
{
BT_PROFILE("CPU batch");
{
BT_PROFILE("CPU sortContacts2");
sortContacts2( cpuSolverData, bodyBuf, contactsIn, additionalData, nContacts, csCfg );
}
CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData;
{
BT_PROFILE("CPU batchContacts2");
adl::Buffer<u32> n; n.setRawPtr( cpuSolverData->m_device, dataCPU->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
adl::Buffer<u32> offsets; offsets.setRawPtr( cpuSolverData->m_device, dataCPU->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
batchContacts2( cpuSolverData, contactsIn, nContacts, &n, &offsets, csCfg.m_staticIdx );
}
{
BT_PROFILE("CPU convertToConstraints2");
convertToConstraints2( cpuSolverData, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, csCfg );
}
{
BT_PROFILE("CPU -> GPU copy");
ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData;
dataGPU->m_numConstraints->write(dataCPU->m_n,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
dataGPU->m_offsets->write(dataCPU->m_offset,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
}
}
else
{
BT_PROFILE("GPU batch");
adl::Solver<adl::TYPE_CL>::Data* data = m_internalData->m_solverDataGPU;
{
if( data->m_contactBuffer )
{
if( data->m_contactBuffer->getSize() < nContacts )
{
BT_PROFILE("delete data->m_contactBuffer;");
delete data->m_contactBuffer;
data->m_contactBuffer = 0;
}
}
if( data->m_contactBuffer == 0 )
{
data->m_contactBuffer = new adl::Buffer<Contact4>( data->m_device, nContacts );
}
adl::Buffer<Contact4>* contactNative = contactsIn;
ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData;
{
ADLASSERT( data->m_device->m_type == adl::TYPE_CL );
adl::Buffer<RigidBodyBase::Body>* bodyNative = adl::BufferUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
adl::Buffer<Contact4>* contactNative = adl::BufferUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
const int sortAlignment = 512; // todo. get this out of sort
if( csCfg.m_enableParallelSolve )
{
ParallelSolveData* nativeSolveData = (ParallelSolveData*)data->m_parallelSolveData;
int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );
adl::Buffer<u32>* countsNative = nativeSolveData->m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost );
adl::Buffer<u32>* offsetsNative = nativeSolveData->m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost );
{ // 2. set cell idx
BT_PROFILE("GPU set cell idx");
struct CB
{
int m_nContacts;
int m_staticIdx;
float m_scale;
int m_nSplit;
};
ADLASSERT( sortSize%64 == 0 );
CB cdata;
cdata.m_nContacts = nContacts;
cdata.m_staticIdx = csCfg.m_staticIdx;
cdata.m_scale = 1.f/(adl::SolverBase::N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
cdata.m_nSplit = adl::SolverBase::N_SPLIT;
adl::Buffer<CB> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( bodyNative ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) };
adl::Launcher launcher( data->m_device, data->m_setSortDataKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( sortSize, 64 );
}
bool gpuRadixSort=true;
if (gpuRadixSort)
{ // 3. sort by cell idx
BT_PROFILE("gpuRadixSort");
int n = adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT;
int sortBit = 32;
//if( n <= 0xffff ) sortBit = 16;
//if( n <= 0xff ) sortBit = 8;
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
} else
{
BT_PROFILE("cpu RadixSort");
adl::HostBuffer<adl::SortData> sortData(m_internalData->m_deviceHost,nContacts);
data->m_sortDataBuffer->read(sortData.m_ptr,nContacts);
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
adl::RadixSort<adl::TYPE_HOST>::Data* sData = adl::RadixSort<adl::TYPE_HOST>::allocate( m_internalData->m_deviceHost, nContacts );
adl::RadixSort<adl::TYPE_HOST>::execute( sData, sortData, nContacts );
adl::RadixSort<adl::TYPE_HOST>::deallocate( sData );
data->m_sortDataBuffer->write(sortData.m_ptr,nContacts);
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
}
bool gpuBoundSearch=true;
if (gpuBoundSearch)
{ // 4. find entries
BT_PROFILE("gpuBoundSearch");
adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, adl::BoundSearchBase::COUNT );
adl::PrefixScan<adl::TYPE_CL>::execute( data->m_scan, *countsNative, *offsetsNative,
adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
} else
{
BT_PROFILE("cpuBoundSearch");
adl::HostBuffer<adl::SortData> sortData(m_internalData->m_deviceHost,nContacts);
data->m_sortDataBuffer->read(sortData.m_ptr,nContacts);
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
adl::HostBuffer<u32> n0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
adl::HostBuffer<u32> offset0( m_internalData->m_deviceHost, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT );
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
n0[i] = 0;
offset0[i] = 0;
}
for(int i=0; i<nContacts; i++)
{
int idx = sortData[i].m_key;
assert(idx>=0);
assert(idx<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
n0[idx]++;
}
// scan
int sum = 0;
for(int i=0; i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT; i++)
{
offset0[i] = sum;
sum += n0[i];
}
countsNative->write(n0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
offsetsNative->write(offset0.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( data->m_device );
}
{ // 5. sort constraints by cellIdx
{
BT_PROFILE("gpu m_reorderContactKernel");
adl::Buffer<int4> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
int4 cdata; cdata.x = nContacts;
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( contactNative ), adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( data->m_sortDataBuffer ) };
adl::Launcher launcher( data->m_device, data->m_reorderContactKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nContacts, 64 );
}
}
}
adl::BufferUtils::unmap<false>( bodyNative, bodyBuf );
adl::BufferUtils::unmap<false>( contactNative, contactsIn );
}
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL);
{
BT_PROFILE("gpu m_copyConstraintKernel");
adl::Buffer<int4> constBuffer( data->m_device, 1, adl::BufferBase::BUFFER_CONST );
int4 cdata; cdata.x = nContacts;
adl::Launcher::BufferInfo bInfo[] = { adl::Launcher::BufferInfo( data->m_contactBuffer ), adl::Launcher::BufferInfo( contactNative ) };
adl::Launcher launcher( data->m_device, data->m_copyConstraintKernel );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(adl::Launcher::BufferInfo) );
launcher.setConst( constBuffer, cdata );
launcher.launch1D( nContacts, 64 );
adl::DeviceUtils::waitForCompletion( data->m_device );
}
bool compareGPU = false;
if (gpuBatchContacts)
{
BT_PROFILE("gpu batchContacts");
adl::Solver<adl::TYPE_CL>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx );
}
else
{
BT_PROFILE("cpu batchContacts2");
cpuSolverData->m_parallelSolveData = 0;//
ParallelSolveData* dataGPU = (ParallelSolveData*)m_internalData->m_solverDataGPU->m_parallelSolveData;
adl::Buffer<u32> numConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::Buffer<u32> offsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
{
BT_PROFILE("gpu->cpu read m_numConstraints");
dataGPU->m_numConstraints->read(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
dataGPU->m_offsets->read(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( data->m_device );
}
adl::Buffer<u32> gpunumConstraints(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::Buffer<u32> gpuoffsets(cpuSolverData->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
if (compareGPU)
{
adl::Buffer<Contact4> contactNativeCopy (data->m_device,contactNative->getSize());
contactNativeCopy.write(*contactNative,contactNative->getSize());
adl::DeviceUtils::waitForCompletion( data->m_device );
adl::Buffer<u32> tmpNumGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::Buffer<u32> tmpOffsetGPU(data->m_device,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
tmpNumGPU.write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
tmpOffsetGPU.write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( data->m_device );
BT_PROFILE("gpu batchContacts");
//adl::Solver<adl::TYPE_CL>::batchContacts( data, contactNative, nContacts, nativeSolveData->m_numConstraints, nativeSolveData->m_offsets, csCfg.m_staticIdx );
adl::Solver<adl::TYPE_CL>::batchContacts( data, &contactNativeCopy, nContacts, &tmpNumGPU, &tmpOffsetGPU, csCfg.m_staticIdx );
adl::DeviceUtils::waitForCompletion( data->m_device );
//compare now
tmpNumGPU.read(gpunumConstraints,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
tmpOffsetGPU.read(gpuoffsets,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( data->m_device );
}
CPUSolveData* dataCPU = (CPUSolveData*)cpuSolverData->m_parallelSolveData;
{
BT_PROFILE("cpu batchContacts2");
batchContacts2( cpuSolverData, contactNative, nContacts, &numConstraints, &offsets, csCfg.m_staticIdx );
}
if (compareGPU)
{
adl::DeviceUtils::waitForCompletion( data->m_device );
dataGPU->m_numConstraints->write(numConstraints.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
dataGPU->m_offsets->write(offsets.m_ptr,adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT);
adl::DeviceUtils::waitForCompletion( data->m_device );
for (int i=0;i<adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT;i++)
{
if (gpunumConstraints.m_ptr[i] != numConstraints.m_ptr[i])
{
printf("numConstraints error at %d, expected %d got %d\n",i,numConstraints.m_ptr[i],gpunumConstraints.m_ptr[i]);
}
if (gpuoffsets.m_ptr[i] != offsets.m_ptr[i])
{
printf("numConstraints error at %d, expected %d got %d\n",i,offsets.m_ptr[i],gpuoffsets.m_ptr[i]);
}
}
}
}
if (1)
{
BT_PROFILE("gpu convertToConstraints");
adl::Solver<adl::TYPE_CL>::convertToConstraints( data, bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, csCfg );
adl::DeviceUtils::waitForCompletion( data->m_device );
}
if (compareGPU)
{
adl::Buffer<Contact4> contactNativeCPU(cpuSolverData->m_device,contactNative->getSize());
contactNative->read(contactNativeCPU,nContacts);
adl::DeviceUtils::waitForCompletion( data->m_device );
for (int i=0;i<nContacts;i++)
{
//if (contactNativeCopyCPU.m_ptr[i].m_frictionCoeffCmp !=45874)// contactNativeCPU.m_ptr[i].m_batchIdx != contactNativeCopyCPU.m_ptr[i].m_batchIdx)
{
//if (.m_friction!=45874
//printf("not matching at %d, expected %d, got %d\n",i,contactNativeCPU.m_ptr[i].m_batchIdx,contactNativeCopyCPU.m_ptr[i].m_batchIdx);
}
}
}
}
}
} else
{
BT_PROFILE("GPU reorderConvertToConstraints");
adl::Solver<adl::TYPE_CL>::reorderConvertToConstraints(
m_internalData->m_solverDataGPU,
m_internalData->m_bodyBufferGPU,
m_internalData->m_inertiaBufferGPU,
m_internalData->m_pBufContactOutGPU,
m_internalData->m_contactCGPU,
m_internalData->m_frictionCGPU,
nContactOut,
csCfg );
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
}
if (1)
{
BT_PROFILE("GPU solveContactConstraint");
m_internalData->m_solverDataGPU->m_nIterations = 5;
adl::Solver<adl::TYPE_CL>::solveContactConstraint( m_internalData->m_solverDataGPU,
m_internalData->m_bodyBufferGPU,
m_internalData->m_inertiaBufferGPU,
m_internalData->m_contactCGPU,
0,
nContactOut );
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
}
if (cpuSolverData)
adl::Solver<adl::TYPE_HOST>::deallocate( cpuSolverData );
if (0)
{
BT_PROFILE("read body velocities back to CPU");
//read body updated linear/angular velocities back to CPU
m_internalData->m_bodyBufferGPU->read(
m_internalData->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
}
}
}

View File

@@ -0,0 +1,72 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans
#ifndef GPU_NARROWPHASE_SOLVER_H
#define GPU_NARROWPHASE_SOLVER_H
//#define MAX_CONVEX_BODIES_CL 8*1024
#define MAX_CONVEX_BODIES_CL 128*1024
#define MAX_PAIRS_PER_BODY_CL 16
#define MAX_CONVEX_SHAPES_CL 8192
#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL)
/*
#define MAX_CONVEX_BODIES_CL 1024
#define MAX_PAIRS_PER_BODY_CL 32
#define MAX_CONVEX_SHAPES_CL 8192
#define MAX_BROADPHASE_COLLISION_CL (MAX_CONVEX_BODIES_CL*MAX_PAIRS_PER_BODY_CL)
*/
namespace adl
{
struct DeviceCL;
};
struct CustomDispatchData;
#include "../basic_initialize/btOpenCLInclude.h"
class btGpuNarrowphaseAndSolver
{
protected:
CustomDispatchData* m_internalData;
int m_acceleratedCompanionShapeIndex;
int m_planeBodyIndex;
public:
btGpuNarrowphaseAndSolver(adl::DeviceCL* deviceCL);
virtual ~btGpuNarrowphaseAndSolver(void);
int registerShape(class ConvexHeightField* convexShape);
int registerRigidBody(int shapeIndex, float mass, const float* position, const float* orientation, bool writeToGpu = true);
void writeAllBodiesToGpu();
//btBroadphasePair* GetPair(btBroadphasePairArray& pairArray, int idxBodyA, int idxBodyB);
virtual void computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs);
cl_mem getBodiesGpu();
cl_mem getBodyInertiasGpu();
};
#endif //GPU_NARROWPHASE_SOLVER_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
include "AMD"
-- include "Intel"
include "NVIDIA"