diff --git a/Demos3/GpuDemos/GpuDemo.h b/Demos3/GpuDemos/GpuDemo.h index 0cfe70ef0..686eaba55 100644 --- a/Demos3/GpuDemos/GpuDemo.h +++ b/Demos3/GpuDemos/GpuDemo.h @@ -48,9 +48,9 @@ public: arraySizeZ(10), #else - arraySizeX(30), - arraySizeY(30), - arraySizeZ(30), + arraySizeX(1), + arraySizeY(10), + arraySizeZ(1), #endif m_useConcaveMesh(false), gapX(16.3), diff --git a/Demos3/bullet2/BasicDemo/main.cpp b/Demos3/bullet2/BasicDemo/main.cpp index c9265f958..3c9f77f45 100644 --- a/Demos3/bullet2/BasicDemo/main.cpp +++ b/Demos3/bullet2/BasicDemo/main.cpp @@ -64,7 +64,6 @@ public: class BasicDemo : public Bullet2RigidBodyDemo { - SimpleOpenGL3App* m_glApp; btRigidBody* m_pickedBody; btTypedConstraint* m_pickedConstraint; @@ -75,6 +74,9 @@ class BasicDemo : public Bullet2RigidBodyDemo public: + SimpleOpenGL3App* m_glApp; + + BasicDemo(SimpleOpenGL3App* app) :m_glApp(app), m_pickedBody(0), @@ -375,6 +377,28 @@ static void MyMouseButtonCallback(int button, int state, float x, float y) b3DefaultMouseButtonCallback(button,state,x,y); } +void MyKeyboardCallback(int key, int state) +{ + + if (key==B3G_ESCAPE && sDemo->m_glApp->m_window) + { + sDemo->m_glApp->m_window->setRequestExit(); + } + if (key=='w') + { + glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); + } + if (key=='s') + { + glPolygonMode( GL_FRONT_AND_BACK, GL_FILL); + } + +// if (sDemo) + // sDemo->keyboardCallback(key,state); + + b3DefaultKeyboardCallback(key,state); +} + int main(int argc, char* argv[]) { @@ -394,6 +418,7 @@ int main(int argc, char* argv[]) app->m_window->setMouseMoveCallback(MyMouseMoveCallback); app->m_window->setMouseButtonCallback(MyMouseButtonCallback); + app->m_window->setKeyboardCallback(MyKeyboardCallback); BasicDemo* demo = new BasicDemo(app); demo->initPhysics(); diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h new file mode 100644 index 000000000..8788ccbb4 --- /dev/null +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h @@ -0,0 +1,20 @@ + +#ifndef B3_BVH_SUBTREE_INFO_DATA_H +#define B3_BVH_SUBTREE_INFO_DATA_H + +typedef struct b3BvhSubtreeInfoData b3BvhSubtreeInfoData_t; + +struct b3BvhSubtreeInfoData +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +}; + +#endif //B3_BVH_SUBTREE_INFO_DATA_H + diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h new file mode 100644 index 000000000..2618da24b --- /dev/null +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h @@ -0,0 +1,126 @@ + + +#include "Bullet3Common/shared/b3Int4.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" +#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" + + + +// work-in-progress +void b3BvhTraversal( __global const b3Int4* pairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global b3Aabb* aabbs, + __global b3Int4* concavePairsOut, + __global volatile int* numConcavePairsOut, + __global const b3BvhSubtreeInfo* subtreeHeadersRoot, + __global const b3QuantizedBvhNode* quantizedNodesRoot, + __global const b3BvhInfo* bvhInfos, + int numPairs, + int maxNumConcavePairsCapacity, + int id) +{ + + int bodyIndexA = pairs[id].x; + int bodyIndexB = pairs[id].y; + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) + return; + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + if (shapeTypeB!=SHAPE_CONVEX_HULL && + shapeTypeB!=SHAPE_SPHERE && + shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS + ) + return; + + b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; + + b3Float4 bvhAabbMin = bvhInfo.m_aabbMin; + b3Float4 bvhAabbMax = bvhInfo.m_aabbMax; + b3Float4 bvhQuantization = bvhInfo.m_quantization; + int numSubtreeHeaders = bvhInfo.m_numSubTrees; + __global const b3BvhSubtreeInfoData* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; + __global const b3QuantizedBvhNodeData* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; + + + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + b3QuantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_minVec,false,bvhAabbMin, bvhAabbMax,bvhQuantization); + b3QuantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_maxVec,true ,bvhAabbMin, bvhAabbMax,bvhQuantization); + + for (int i=0;im_numVertices; + + const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),*dir); + float offset = b3Dot(pos,*dir); + for(int i=0;im_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + + +inline bool b3TestSepAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, + b3Float4ConstArg posA,b3QuatConstArg ornA, + b3Float4ConstArg posB,b3QuatConstArg ornB, + b3Float4* sep_axis, const b3Float4* verticesA, __global const b3Float4* verticesB,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + b3Project(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); + b3Project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); + + if(Max0m_numFaces; + // Test normals from hullA + for(int i=0;im_faceOffset+i].m_plane; + b3Float4 faceANormalWS = b3QuatRotate(ornA,normal); + if (b3Dot(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + curPlaneTests++; + float d; + if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((b3Dot(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + + + +bool b3FindSeparatingAxisEdgeEdge( const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, + b3Float4ConstArg posA1, + b3QuatConstArg ornA, + b3Float4ConstArg posB1, + b3QuatConstArg ornB, + b3Float4ConstArg DeltaC2, + const b3Float4* verticesA, + const b3Float4* uniqueEdgesA, + const b3GpuFace* facesA, + const int* indicesA, + __global const b3Float4* verticesB, + __global const b3Float4* uniqueEdgesB, + __global const b3GpuFace* facesB, + __global const int* indicesB, + b3Float4* sep, + float* dmin) +{ + + + b3Float4 posA = posA1; + posA.w = 0.f; + b3Float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0m_numUniqueEdges;e0++) + { + const b3Float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; + b3Float4 edge0World = b3QuatRotate(ornA,edge0); + + for(int e1=0;e1m_numUniqueEdges;e1++) + { + const b3Float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; + b3Float4 edge1World = b3QuatRotate(ornB,edge1); + + + b3Float4 crossje = b3Cross(edge0World,edge1World); + + curEdgeEdge++; + if(!b3IsAlmostZero(crossje)) + { + crossje = b3Normalized(crossje); + if (b3Dot(DeltaC2,crossje)<0) + crossje *= -1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + b3Project(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); + b3Project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); + + if(Max00.0f) + { + *sep = -(*sep); + } + return true; +} + +// work-in-progress +__kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3Float4* vertices, + __global const b3Float4* uniqueEdges, + __global const b3GpuFace* faces, + __global const int* indices, + __global const b3GpuChildShape* gpuChildShapes, + __global b3Aabb* aabbs, + __global b3Float4* concaveSeparatingNormalsOut, + int numConcavePairs, + int pairIdx + ) +{ + int i = pairIdx; +/* int i = get_global_id(0); + if (i>=numConcavePairs) + return; + int pairIdx = i; + */ + + int bodyIndexA = concavePairs[i].x; + int bodyIndexB = concavePairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& + collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + concavePairs[pairIdx].w = -1; + return; + } + + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + int numActualConcaveConvexTests = 0; + + int f = concavePairs[i].z; + + bool overlap = false; + + b3ConvexPolyhedronData convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + b3Float4 localCenter = b3MakeFloat4(0.f,0.f,0.f,0.f); + + b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + b3Float4 triMinAabb, triMaxAabb; + b3Aabb triAabb; + triAabb.m_minVec = b3MakeFloat4(1e30f,1e30f,1e30f,0.f); + triAabb.m_maxVec = b3MakeFloat4(-1e30f,-1e30f,-1e30f,0.f); + + b3Float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + b3Float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + + triAabb.m_minVec = b3MinFloat4(triAabb.m_minVec,vert); + triAabb.m_maxVec = b3MaxFloat4(triAabb.m_maxVec,vert); + + } + + overlap = true; + overlap = (triAabb.m_minVec.x > aabbs[bodyIndexB].m_maxVec.x || triAabb.m_maxVec.x < aabbs[bodyIndexB].m_minVec.x) ? false : overlap; + overlap = (triAabb.m_minVec.z > aabbs[bodyIndexB].m_maxVec.z || triAabb.m_maxVec.z < aabbs[bodyIndexB].m_minVec.z) ? false : overlap; + overlap = (triAabb.m_minVec.y > aabbs[bodyIndexB].m_maxVec.y || triAabb.m_maxVec.y < aabbs[bodyIndexB].m_minVec.y) ? false : overlap; + + if (overlap) + { + float dmin = FLT_MAX; + int hasSeparatingAxis=5; + b3Float4 sepAxis=b3MakeFloat4(1,2,3,4); + + int localCC=0; + numActualConcaveConvexTests++; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + b3Float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + b3Float4 normal = b3MakeFloat4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + b3GpuFace facesA[B3_TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = b3Dot(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;im_escapeIndexOrTriangleIndex&~(y)); +} + +inline int b3IsLeaf(const b3QuantizedBvhNodeData* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +inline int b3GetEscapeIndex(const b3QuantizedBvhNodeData* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +inline void b3QuantizeWithClamp(unsigned short* out, b3Float4ConstArg point2,int isMax, b3Float4ConstArg bvhAabbMin, b3Float4ConstArg bvhAabbMax, b3Float4ConstArg bvhQuantization) +{ + b3Float4 clampedPoint = b3MaxFloat4(point2,bvhAabbMin); + clampedPoint = b3MinFloat4 (clampedPoint, bvhAabbMax); + + b3Float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; + if (isMax) + { + out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); + out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); + out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); + } else + { + out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); + out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); + out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); + } + +} + + +inline int b3TestQuantizedAabbAgainstQuantizedAabbSlow( + const unsigned short int* aabbMin1, + const unsigned short int* aabbMax1, + const unsigned short int* aabbMin2, + const unsigned short int* aabbMax2) +{ + //int overlap = 1; + if (aabbMin1[0] > aabbMax2[0]) + return 0; + if (aabbMax1[0] < aabbMin2[0]) + return 0; + if (aabbMin1[1] > aabbMax2[1]) + return 0; + if (aabbMax1[1] < aabbMin2[1]) + return 0; + if (aabbMin1[2] > aabbMax2[2]) + return 0; + if (aabbMax1[2] < aabbMin2[2]) + return 0; + return 1; + //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; + //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; + //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; + //return overlap; +} + + +#endif //B3_QUANTIZED_BVH_NODE_H diff --git a/src/Bullet3Common/shared/b3Float4.h b/src/Bullet3Common/shared/b3Float4.h index a8159e51f..fd92b641c 100644 --- a/src/Bullet3Common/shared/b3Float4.h +++ b/src/Bullet3Common/shared/b3Float4.h @@ -10,12 +10,30 @@ #define b3Dot3F4 b3Dot #define b3Cross3 b3Cross #define b3MakeFloat4 b3MakeVector3 + inline b3Vector3 b3Normalized(const b3Vector3& vec) + { + return vec.normalized(); + } inline b3Float4 b3FastNormalized3(b3Float4ConstArg v) { return v.normalized(); } + inline b3Float4 b3MaxFloat4 (const b3Float4& a, const b3Float4& b) + { + b3Float4 tmp = a; + tmp.setMax(b); + return tmp; + } + inline b3Float4 b3MinFloat4 (const b3Float4& a, const b3Float4& b) + { + b3Float4 tmp = a; + tmp.setMin(b); + return tmp; + } + + #else typedef float4 b3Float4; @@ -33,6 +51,11 @@ float4 b1 = b3MakeFloat4(v1.xyz,0.f); return cross(a1, b1); } + #define b3MinFloat4 min + #define b3MaxFloat4 max + + #define b3Normalized(a) normalize(a) + #endif diff --git a/src/Bullet3Common/shared/b3Int4.h b/src/Bullet3Common/shared/b3Int4.h index 41e049cb9..aa02d6bee 100644 --- a/src/Bullet3Common/shared/b3Int4.h +++ b/src/Bullet3Common/shared/b3Int4.h @@ -1,8 +1,11 @@ #ifndef B3_INT4_H #define B3_INT4_H +#ifdef __cplusplus + #include "Bullet3Common/b3Scalar.h" + B3_ATTRIBUTE_ALIGNED16(struct) b3UnsignedInt4 { B3_DECLARE_ALIGNED_ALLOCATOR(); @@ -51,5 +54,15 @@ B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y return v; } +#else + + +#define b3UnsignedInt4 uint4 +#define b3Int4 int4 +#define b3MakeInt4 (int4) +#define b3MakeUnsignedInt4 (uint4) + + +#endif //__cplusplus #endif //B3_INT4_H diff --git a/src/Bullet3Common/shared/b3PlatformDefinitions.h b/src/Bullet3Common/shared/b3PlatformDefinitions.h index 51af689f8..01243ed1f 100644 --- a/src/Bullet3Common/shared/b3PlatformDefinitions.h +++ b/src/Bullet3Common/shared/b3PlatformDefinitions.h @@ -8,9 +8,19 @@ struct MyTest #ifdef __cplusplus #define b3AtomicInc(a) ((*a)++) + +inline int b3AtomicAdd (volatile int *p, int val) +{ + int oldValue = *p; + int newValue = oldValue+val; + *p = newValue; + return oldValue; +} + #define __global #else #define b3AtomicInc atomic_inc +#define b3AtomicAdd atomic_add #define b3Fabs fabs #define b3Sqrt native_sqrt #define b3Sin native_sin diff --git a/src/Bullet3Common/shared/b3Quat.h b/src/Bullet3Common/shared/b3Quat.h index 8f2fe8301..81b688108 100644 --- a/src/Bullet3Common/shared/b3Quat.h +++ b/src/Bullet3Common/shared/b3Quat.h @@ -10,6 +10,10 @@ #define b3Quat b3Quaternion #define b3QuatConstArg const b3Quaternion& + inline b3Quat b3QuatInverse(b3QuatConstArg orn) + { + return orn.inverse(); + } inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) { diff --git a/src/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp b/src/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp index 598e8ce32..53846a6a4 100644 --- a/src/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp +++ b/src/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp @@ -6,7 +6,7 @@ #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" #include "Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h" #include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3CollidableData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" #include "Bullet3Common/b3Vector3.h" #include "Bullet3Dynamics/shared/b3ContactConstraint4.h" #include "Bullet3Dynamics/shared/b3Inertia.h" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index f08f284b4..0fdc65fe3 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -13,6 +13,10 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ +bool findSeparatingAxisOnGpu = true; + +bool bvhTraversalKernelGPU = true; +bool findConcaveSeparatingAxisKernelGPU = false;//true; ///This file was written by Erwin Coumans ///Separating axis rest based on work from Pierre Terdiman, see @@ -20,7 +24,7 @@ subject to the following restrictions: //#define B3_DEBUG_SAT_FACE -//#define CHECK_ON_HOST +#define CHECK_ON_HOST #ifdef CHECK_ON_HOST //#define PERSISTENT_CONTACTS_HOST @@ -65,6 +69,11 @@ typedef b3AlignedObjectArray b3VertexArray; #endif +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" + + + #define dot3F4 b3Dot GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) @@ -1197,7 +1206,7 @@ int clipHullHullSingle( int numPoints = 0; { - B3_PROFILE("extractManifold"); + // B3_PROFILE("extractManifold"); numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); } @@ -2723,6 +2732,9 @@ int computeContactConvexConvex2( } + + + void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* pairs, int nPairs, const b3OpenCLArray* bodyBuf, b3OpenCLArray* contactOut, int& nContacts, @@ -2898,15 +2910,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, - // oldHostContacts); + int contactIndex = 0;//computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); if (contactIndex>=0) { +// printf("convex convex contactIndex = %d\n",contactIndex); hostPairs[i].z = contactIndex; } // printf("plane-convex\n"); @@ -2932,7 +2942,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* contactOut->resize(0); } - return; + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + #else { @@ -2996,7 +3007,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* int numCompoundPairs = 0; - bool findSeparatingAxisOnGpu = true;//false; int numConcavePairs =0; { @@ -3038,65 +3048,172 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* if (treeNodesGPU->size() && treeNodesGPU->size()) { - B3_PROFILE("m_bvhTraversalKernel"); + if (bvhTraversalKernelGPU) + { + + B3_PROFILE("m_bvhTraversalKernel"); - numConcavePairs = m_numConcavePairsOut.at(0); + numConcavePairs = m_numConcavePairsOut.at(0); - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); - launcher.setBuffer( pairs->getBufferCL()); - launcher.setBuffer( bodyBuf->getBufferCL()); - launcher.setBuffer( gpuCollidables.getBufferCL()); - launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer( subTreesGPU->getBufferCL()); - launcher.setBuffer( treeNodesGPU->getBufferCL()); - launcher.setBuffer( bvhInfo->getBufferCL()); + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer( subTreesGPU->getBufferCL()); + launcher.setBuffer( treeNodesGPU->getBufferCL()); + launcher.setBuffer( bvhInfo->getBufferCL()); - launcher.setConst( nPairs ); - launcher.setConst( maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + numConcavePairs = m_numConcavePairsOut.at(0); + } else + { + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + //int maxTriConvexPairCapacity, + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + + int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... + + volatile int hostNumConcavePairsOut=0; + + // + for (int i=0;i maxTriConvexPairCapacity) { static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Rxceeded %d times the maxTriConvexPairCapacity (found %d but max is %d)\n", exceeded_maxTriConvexPairCapacity_count++, - numConcavePairs,maxTriConvexPairCapacity); + b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", + numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); numConcavePairs = maxTriConvexPairCapacity; } triangleConvexPairsOut.resize(numConcavePairs); if (numConcavePairs) { - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) - }; + if (findConcaveSeparatingAxisKernelGPU) + { + //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) + }; - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numConcavePairs ); + launcher.setConst( numConcavePairs ); - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } else + { + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + //numConcavePairs + //b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + //b3BufferInfoCL( bodyBuf->getBufferCL(),true), + //b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + // b3BufferInfoCL( convexData.getBufferCL(),true), + //b3BufferInfoCL( gpuVertices.getBufferCL(),true), + //b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + //b3BufferInfoCL( gpuFaces.getBufferCL(),true), + //b3BufferInfoCL( gpuIndices.getBufferCL(),true), + //b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + //b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + //b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + } // b3AlignedObjectArray cpuCompoundSepNormals; // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); // b3AlignedObjectArray cpuConcavePairs; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h index 2292ee834..629a0fce7 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h @@ -41,6 +41,9 @@ class b3Serializer; #define b3QuantizedBvhDataName "b3QuantizedBvhFloatData" #endif +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" + //http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp @@ -55,16 +58,10 @@ class b3Serializer; ///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. ///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -B3_ATTRIBUTE_ALIGNED16 (struct) b3QuantizedBvhNode +B3_ATTRIBUTE_ALIGNED16 (struct) b3QuantizedBvhNode : public b3QuantizedBvhNodeData { B3_DECLARE_ALIGNED_ALLOCATOR(); - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; - bool isLeafNode() const { //skipindex is negative (internal node), triangleindex >=0 (leafnode) @@ -116,20 +113,11 @@ B3_ATTRIBUTE_ALIGNED16 (struct) b3OptimizedBvhNode ///b3BvhSubtreeInfo provides info to gather a subtree of limited size -B3_ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo +B3_ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo : public b3BvhSubtreeInfoData { public: B3_DECLARE_ALIGNED_ALLOCATOR(); - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; - b3BvhSubtreeInfo() { //memset(&m_padding[0], 0, sizeof(m_padding)); @@ -501,14 +489,6 @@ private: ; -struct b3BvhSubtreeInfoData -{ - int m_rootNodeIndex; - int m_subtreeSize; - unsigned short m_quantizedAabbMin[3]; - unsigned short m_quantizedAabbMax[3]; -}; - struct b3OptimizedBvhNodeFloatData { b3Vector3FloatData m_aabbMinOrg; @@ -530,12 +510,6 @@ struct b3OptimizedBvhNodeDoubleData }; -struct b3QuantizedBvhNodeData -{ - unsigned short m_quantizedAabbMin[3]; - unsigned short m_quantizedAabbMax[3]; - int m_escapeIndexOrTriangleIndex; -}; struct b3QuantizedBvhFloatData { diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl index adc2b5d8c..faa413441 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl @@ -34,33 +34,6 @@ typedef struct } b3BvhInfo; -/* - bool isLeafNode() const - { - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (m_escapeIndexOrTriangleIndex >= 0); - } - int getEscapeIndex() const - { - btAssert(!isLeafNode()); - return -m_escapeIndexOrTriangleIndex; - } - int getTriangleIndex() const - { - btAssert(isLeafNode()); - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (m_escapeIndexOrTriangleIndex&~(y)); - } - int getPartId() const - { - btAssert(isLeafNode()); - // Get only the highest bits where the part index is stored - return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); - } -*/ - int getTriangleIndex(const btQuantizedBvhNode* rootNode) { unsigned int x=0; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl index df6938dd8..c9d00b5ad 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl @@ -401,7 +401,7 @@ bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const C float4* sep, float* dmin) { - int i = get_global_id(0); + float4 posA = posA1; posA.w = 0.f; @@ -452,7 +452,7 @@ bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const float4* sep, float* dmin) { - int i = get_global_id(0); + float4 posA = posA1; posA.w = 0.f; @@ -505,7 +505,7 @@ bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global float4* sep, float* dmin) { - int i = get_global_id(0); + float4 posA = posA1; posA.w = 0.f; @@ -607,7 +607,7 @@ bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global cons float4* sep, float* dmin) { - int i = get_global_id(0); + float4 posA = posA1; posA.w = 0.f; @@ -666,7 +666,7 @@ bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __glo float4* sep, float* dmin) { - int i = get_global_id(0); + float4 posA = posA1; posA.w = 0.f; diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp index ae53d25f7..5306a5ad9 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp @@ -27,7 +27,7 @@ b3LauncherCL::~b3LauncherCL() if (gDebugLauncherCL) { static int counter = 0; - printf("[%d] Finished launching OpenCL kernel %s [%d]\n", counter++,m_name); + printf("[%d] Finished launching OpenCL kernel %s\n", counter++,m_name); } }