From 3fe969c4ee17f59d8c8bcc2201fd3637bb052e92 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Fri, 13 Dec 2013 07:52:41 -0800 Subject: [PATCH] b3Solver -> pass pointer to source instead of 0 (was left over from a debugging session), thanks to David for the report Break up clipHullHullConcaveConvexKernel into multiple stages, so it might 'fit' in Apple's OpenCL implementation Implemented bvhTraversalKernel and findConcaveSeparatingAxis on CPU (debugging, possible future CPU version) --- Demos3/GpuDemos/main_opengl3core.cpp | 16 +- .../shared/b3FindConcaveSatAxis.h | 127 +++- .../b3ConvexHullContact.cpp | 618 +++++++++++------- .../b3ConvexHullContact.h | 1 + .../kernels/bvhTraversal.h | 26 - .../kernels/primitiveContacts.h | 4 + .../NarrowphaseCollision/kernels/sat.cl | 127 ++++ .../kernels/satClipHullContacts.cl | 8 +- .../kernels/satClipHullContacts.h | 12 +- .../NarrowphaseCollision/kernels/satKernels.h | 126 +++- src/Bullet3OpenCL/RigidBody/b3Solver.cpp | 2 +- .../RigidBody/kernels/batchingKernels.h | 4 + .../RigidBody/kernels/batchingKernelsNew.h | 4 + .../RigidBody/kernels/integrateKernel.h | 4 + .../RigidBody/kernels/solverSetup.h | 4 + .../RigidBody/kernels/solverSetup2.h | 4 + .../RigidBody/kernels/solverUtils.h | 4 + .../RigidBody/kernels/updateAabbsKernel.h | 4 + 18 files changed, 800 insertions(+), 295 deletions(-) diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index 5b4c3b2a7..303a77c84 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -101,7 +101,7 @@ enum }; b3AlignedObjectArray demoNames; -int selectedDemo = 1; +int selectedDemo = 0; GpuDemo::CreateFunc* allDemos[]= { //ConcaveCompound2Scene::MyCreateFunc, @@ -247,9 +247,21 @@ static void MyMouseButtonCallback(int button, int state, float x, float y) } extern bool useShadowMap; - +static bool wireframe=false; void MyKeyboardCallback(int key, int state) { + if (key=='w' && state) + { + wireframe=!wireframe; + if (wireframe) + { + glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); + } else + { + glPolygonMode( GL_FRONT_AND_BACK, GL_FILL ); + } + } + if (key=='s' && state) { useShadowMap=!useShadowMap; diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h index 88301aa03..bc7bdaa85 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h @@ -203,7 +203,98 @@ bool b3FindSeparatingAxisEdgeEdge( const b3ConvexPolyhedronData* hullA, __global return true; } -// work-in-progress + + +inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + b3Float4ConstArg posA, b3QuatConstArg ornA,b3Float4ConstArg posB, b3QuatConstArg ornB, + __global b3Float4* worldVertsA1, + __global b3Float4* worldNormalsA1, + __global b3Float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + __global const b3Float4* verticesA, + __global const b3GpuFace_t* facesA, + __global const int* indicesA, + __global const b3Float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + + __global b3Int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;facem_numFaces;face++) + { + const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal); + float d = b3Dot(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = b3TransformPoint(b,posB,ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const b3Float4 Normal = b3MakeFloat4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal); + + float d = b3Dot(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = b3TransformPoint(a, posA,ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs, __global const b3RigidBodyData* rigidBodies, __global const b3Collidable* collidables, @@ -215,6 +306,12 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs __global const b3GpuChildShape* gpuChildShapes, __global b3Aabb* aabbs, __global b3Float4* concaveSeparatingNormalsOut, + __global b3Int4* clippingFacesOut, + __global b3Vector3* worldVertsA1Out, + __global b3Vector3* worldNormalsA1Out, + __global b3Vector3* worldVertsB1Out, + __global int* hasSeparatingNormals, + int vertexFaceCapacity, int numConcavePairs, int pairIdx ) @@ -242,7 +339,7 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs return; } - + hasSeparatingNormals[i] = 0; int numFacesA = convexShapes[shapeIndexA].m_numFaces; int numActualConcaveConvexTests = 0; @@ -454,8 +551,34 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs if (hasSeparatingAxis) { + hasSeparatingNormals[i]=1; sepAxis.w = dmin; concaveSeparatingNormalsOut[pairIdx]=sepAxis; + + //now compute clipping faces A and B, and world-space clipping vertices A and B... + + float minDist = -1e30f; + float maxDist = 0.02f; + + b3FindClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1Out, + worldNormalsA1Out, + worldVertsB1Out, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + } else { //mark this pair as in-active diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 0fdc65fe3..9b7a109ff 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -16,7 +16,7 @@ subject to the following restrictions: bool findSeparatingAxisOnGpu = true; bool bvhTraversalKernelGPU = true; -bool findConcaveSeparatingAxisKernelGPU = false;//true; +bool findConcaveSeparatingAxisKernelGPU = true; ///This file was written by Erwin Coumans ///Separating axis rest based on work from Pierre Terdiman, see @@ -24,7 +24,7 @@ bool findConcaveSeparatingAxisKernelGPU = false;//true; //#define B3_DEBUG_SAT_FACE -#define CHECK_ON_HOST +//#define CHECK_ON_HOST #ifdef CHECK_ON_HOST //#define PERSISTENT_CONTACTS_HOST @@ -85,6 +85,7 @@ m_totalContactsOut(m_context, m_queue), m_sepNormals(m_context, m_queue), m_hasSeparatingNormals(m_context, m_queue), m_concaveSepNormals(m_context, m_queue), +m_concaveHasSeparatingNormals(m_context,m_queue), m_numConcavePairsOut(m_context, m_queue), m_gpuCompoundPairs(m_context, m_queue), m_gpuCompoundSepNormals(m_context, m_queue), @@ -2990,7 +2991,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* int concaveCapacity=maxTriConvexPairCapacity; m_concaveSepNormals.resize(concaveCapacity); - + m_concaveHasSeparatingNormals.resize(concaveCapacity); m_numConcavePairsOut.resize(0); m_numConcavePairsOut.push_back(0); @@ -3039,191 +3040,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* clFinish(m_queue); } - //now perform the tree query on GPU - { - - - - { - - if (treeNodesGPU->size() && treeNodesGPU->size()) - { - if (bvhTraversalKernelGPU) - { - - B3_PROFILE("m_bvhTraversalKernel"); - - - numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); - launcher.setBuffer( pairs->getBufferCL()); - launcher.setBuffer( bodyBuf->getBufferCL()); - launcher.setBuffer( gpuCollidables.getBufferCL()); - launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer( subTreesGPU->getBufferCL()); - launcher.setBuffer( treeNodesGPU->getBufferCL()); - launcher.setBuffer( bvhInfo->getBufferCL()); - - launcher.setConst( nPairs ); - launcher.setConst( maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - } else - { - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - //int maxTriConvexPairCapacity, - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - - int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut - - b3AlignedObjectArray treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... - - volatile int hostNumConcavePairsOut=0; - - // - for (int i=0;i maxTriConvexPairCapacity) - { - static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); - numConcavePairs = maxTriConvexPairCapacity; - } - triangleConvexPairsOut.resize(numConcavePairs); - if (numConcavePairs) - { - if (findConcaveSeparatingAxisKernelGPU) - { - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } else - { - - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - - //numConcavePairs - //b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - //b3BufferInfoCL( bodyBuf->getBufferCL(),true), - //b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - // b3BufferInfoCL( convexData.getBufferCL(),true), - //b3BufferInfoCL( gpuVertices.getBufferCL(),true), - //b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - //b3BufferInfoCL( gpuFaces.getBufferCL(),true), - //b3BufferInfoCL( gpuIndices.getBufferCL(),true), - //b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - //b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - //b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) - - b3AlignedObjectArray concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - } -// b3AlignedObjectArray cpuCompoundSepNormals; - // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); - // b3AlignedObjectArray cpuConcavePairs; - // triangleConvexPairsOut.copyToHost(cpuConcavePairs); - - - } - } - } - } + numCompoundPairs = m_numCompoundPairsOut.at(0); bool useGpuFindCompoundPairs=true; @@ -3442,8 +3259,252 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* } + int vertexFaceCapacity = 64; + + { + //now perform the tree query on GPU + + + + + if (treeNodesGPU->size() && treeNodesGPU->size()) + { + if (bvhTraversalKernelGPU) + { + + B3_PROFILE("m_bvhTraversalKernel"); + + + numConcavePairs = m_numConcavePairsOut.at(0); + + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer( subTreesGPU->getBufferCL()); + launcher.setBuffer( treeNodesGPU->getBufferCL()); + launcher.setBuffer( bvhInfo->getBufferCL()); + + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + numConcavePairs = m_numConcavePairsOut.at(0); + } else + { + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + //int maxTriConvexPairCapacity, + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + + int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... + + volatile int hostNumConcavePairsOut=0; + + // + for (int i=0;i maxTriConvexPairCapacity) + { + static int exceeded_maxTriConvexPairCapacity_count = 0; + b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", + numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); + numConcavePairs = maxTriConvexPairCapacity; + } + triangleConvexPairsOut.resize(numConcavePairs); + + if (numConcavePairs) + { + + + + + clippingFacesOutGPU.resize(numConcavePairs); + worldNormalsAGPU.resize(numConcavePairs); + worldVertsA1GPU.resize(vertexFaceCapacity*numConcavePairs); + worldVertsB1GPU.resize(vertexFaceCapacity*numConcavePairs); + + + if (findConcaveSeparatingAxisKernelGPU) + { + + /* + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + */ + + //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } else + { + + b3AlignedObjectArray clippingFacesOutCPU; + b3AlignedObjectArray worldVertsA1CPU; + b3AlignedObjectArray worldNormalsACPU; + b3AlignedObjectArray worldVertsB1CPU; + b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; + + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); + + b3GpuChildShape* childShapePointerCPU = 0; + if (cpuChildShapes.size()) + childShapePointerCPU = &cpuChildShapes.at(0); + + clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + worldNormalsACPU.resize(worldNormalsAGPU.size()); + worldVertsB1CPU.resize(worldVertsB1GPU.size()); + + for (int i=0;i cpuCompoundSepNormals; +// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); +// b3AlignedObjectArray cpuConcavePairs; +// triangleConvexPairsOut.copyToHost(cpuConcavePairs); + + + } + } + + + } if (numConcavePairs) { @@ -3494,45 +3555,130 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* if (contactClippingOnGpu) { - //B3_PROFILE("clipHullHullKernel"); - - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - //concave-convex contact clipping + //B3_PROFILE("clipHullHullKernel"); + bool breakupConcaveConvexKernel = false; + +#ifdef __APPLE__ + //actually, some Apple OpenCL platform/device combinations work fine... + breakupConcaveConvexKernel = true; +#endif + //concave-convex contact clipping if (numConcavePairs) { // printf("numConcavePairs = %d\n", numConcavePairs); // nContacts = m_totalContactsOut.at(0); // printf("nContacts before = %d\n", nContacts); - B3_PROFILE("clipHullHullConcaveConvexKernel"); - nContacts = m_totalContactsOut.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numConcavePairs ); - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - b3AlignedObjectArray cpuContacts; - contactOut->copyToHost(cpuContacts); + if (breakupConcaveConvexKernel) + { + + worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); + + + //clipFacesAndFindContacts + bool clipFacesAndFindContactsCPU = false; + if (clipFacesAndFindContactsCPU) + { + } else + { + + if (1) + { + + + B3_PROFILE("clipFacesAndFindContacts"); + //nContacts = m_totalContactsOut.at(0); + //int h = m_hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + + launcher.setConst( numConcavePairs ); + int debugMode = 0; + launcher.setConst( debugMode); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + //int bla = m_totalContactsOut.at(0); + } + } + //contactReduction + { + contactOut->reserve(nContacts+numConcavePairs); + + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + + } + //re-use? + + + } else + { + B3_PROFILE("clipHullHullConcaveConvexKernel"); + nContacts = m_totalContactsOut.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + b3AlignedObjectArray cpuContacts; + contactOut->copyToHost(cpuContacts); + } // printf("nContacts after = %d\n", nContacts); } @@ -3553,24 +3699,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* - int vertexFaceCapacity = 64; - worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - - clippingFacesOutGPU.resize(nPairs); - - worldNormalsAGPU.resize(nPairs); - - worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); - - worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); - - { B3_PROFILE("findClippingFacesKernel"); @@ -3608,13 +3742,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* ///clip face B against face A, reduce contacts and append them to a global contact array if (1) { - B3_PROFILE("clipFacesAndContactReductionKernel"); + B3_PROFILE("clipFacesAndFindContacts"); //nContacts = m_totalContactsOut.at(0); //int h = m_hasSeparatingNormals.at(0); //int4 p = clippingFacesOutGPU.at(0); b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), @@ -3633,23 +3765,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* launcher.setConst( nPairs ); int debugMode = 0; launcher.setConst( debugMode); - - /* - int serializationBytes = launcher.getSerializationBufferSize(); - unsigned char* buf = (unsigned char*)malloc(serializationBytes+1); - int actualWritten = launcher.serializeArguments(buf,serializationBytes+1); - FILE* f = fopen("clipFacesAndContactReductionKernel.bin","wb"); - fwrite(buf,actualWritten,1,f); - fclose(f); - free(buf); - printf("serializationBytes=%d, actualWritten=%d\n",serializationBytes,actualWritten); - */ - - int num = nPairs; - - launcher.launch1D( num); + int num = nPairs; + launcher.launch1D( num); clFinish(m_queue); - { + + { // nContacts = m_totalContactsOut.at(0); // printf("nContacts = %d\n",nContacts); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index 0e8a7d3d4..9d11b50ab 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -52,6 +52,7 @@ struct GpuSatCollision b3OpenCLArray m_sepNormals; b3OpenCLArray m_hasSeparatingNormals; b3OpenCLArray m_concaveSepNormals; + b3OpenCLArray m_concaveHasSeparatingNormals; b3OpenCLArray m_numConcavePairsOut; b3OpenCLArray m_gpuCompoundPairs; b3OpenCLArray m_gpuCompoundSepNormals; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h index d51084e41..4b3b49eae 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -29,32 +29,6 @@ static const char* bvhTraversalKernelCL= \ " int m_nodeOffset;\n" " int m_subTreeOffset;\n" "} b3BvhInfo;\n" -"/*\n" -" bool isLeafNode() const\n" -" {\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (m_escapeIndexOrTriangleIndex >= 0);\n" -" }\n" -" int getEscapeIndex() const\n" -" {\n" -" btAssert(!isLeafNode());\n" -" return -m_escapeIndexOrTriangleIndex;\n" -" }\n" -" int getTriangleIndex() const\n" -" {\n" -" btAssert(isLeafNode());\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (m_escapeIndexOrTriangleIndex&~(y));\n" -" }\n" -" int getPartId() const\n" -" {\n" -" btAssert(isLeafNode());\n" -" // Get only the highest bits where the part index is stored\n" -" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n" -" }\n" -"*/\n" "int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" "{\n" " unsigned int x=0;\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index 03f0480d1..a282d1eff 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -13,6 +13,7 @@ static const char* primitiveContactsKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -36,6 +37,9 @@ static const char* primitiveContactsKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl index c9d00b5ad..e94accf7c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl @@ -1353,6 +1353,97 @@ __kernel void findSeparatingAxisKernel( __global const int4* pairs, + +int findClippingFaces(const float4 separatingNormal, + const ConvexPolyhedronCL* hullA, + __global const ConvexPolyhedronCL* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + // work-in-progress __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, __global const BodyData* rigidBodies, @@ -1365,6 +1456,12 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, __global const btGpuChildShape* gpuChildShapes, __global btAabbCL* aabbs, __global float4* concaveSeparatingNormalsOut, + __global int* concaveHasSeparatingNormals, + __global int4* clippingFacesOut, + __global float4* worldVertsA1GPU, + __global float4* worldNormalsAGPU, + __global float4* worldVertsB1GPU, + int vertexFaceCapacity, int numConcavePairs ) { @@ -1372,6 +1469,9 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, int i = get_global_id(0); if (i>=numConcavePairs) return; + + concaveHasSeparatingNormals[i] = 0; + int pairIdx = i; int bodyIndexA = concavePairs[i].x; @@ -1604,6 +1704,33 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, { sepAxis.w = dmin; concaveSeparatingNormalsOut[pairIdx]=sepAxis; + concaveHasSeparatingNormals[i]=1; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + + + findClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1GPU, + worldNormalsAGPU, + worldVertsB1GPU, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + + } else { //mark this pair as in-active diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl index aa4918b9a..98253c95c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -1669,9 +1669,7 @@ __kernel void findClippingFacesKernel( __global const int4* pairs, -__kernel void clipFacesAndFindContactsKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* separatingNormals, +__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, __global const int* hasSeparatingAxis, __global struct b3Contact4Data* globalContactsOut, __global int4* clippingFacesOut, @@ -1698,8 +1696,8 @@ __kernel void clipFacesAndFindContactsKernel( __global int4* pairs, if (hasSeparatingAxis[i]) { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; +// int bodyIndexA = pairs[i].x; + // int bodyIndexB = pairs[i].y; int numLocalContactsOut = 0; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 83a24bdf7..cbf9bce24 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -40,6 +40,7 @@ static const char* satClipKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -63,6 +64,9 @@ static const char* satClipKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" @@ -1859,9 +1863,7 @@ static const char* satClipKernelsCL= \ " }// if (im_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;facem_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x,\n" +" facesA[hullA->m_faceOffset+face].m_plane.y,\n" +" facesA[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" "// work-in-progress\n" "__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" " __global const BodyData* rigidBodies,\n" @@ -1482,12 +1570,19 @@ static const char* satKernelsCL= \ " __global const btGpuChildShape* gpuChildShapes,\n" " __global btAabbCL* aabbs,\n" " __global float4* concaveSeparatingNormalsOut,\n" +" __global int* concaveHasSeparatingNormals,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1GPU,\n" +" __global float4* worldNormalsAGPU,\n" +" __global float4* worldVertsB1GPU,\n" +" int vertexFaceCapacity,\n" " int numConcavePairs\n" " )\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numConcavePairs)\n" " return;\n" +" concaveHasSeparatingNormals[i] = 0;\n" " int pairIdx = i;\n" " int bodyIndexA = concavePairs[i].x;\n" " int bodyIndexB = concavePairs[i].y;\n" @@ -1691,6 +1786,27 @@ static const char* satKernelsCL= \ " {\n" " sepAxis.w = dmin;\n" " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +" concaveHasSeparatingNormals[i]=1;\n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" findClippingFaces(sepAxis,\n" +" &convexPolyhedronA,\n" +" &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsA1GPU,\n" +" worldNormalsAGPU,\n" +" worldVertsB1GPU,\n" +" vertexFaceCapacity,\n" +" minDist, maxDist,\n" +" verticesA,\n" +" facesA,\n" +" indicesA,\n" +" vertices,\n" +" faces,\n" +" indices,\n" +" clippingFacesOut, pairIdx);\n" " } else\n" " { \n" " //mark this pair as in-active\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index b2bb087b4..de4aa794a 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -124,7 +124,7 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, { - cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH,false); + cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); b3Assert(solveContactProg); cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index 2b910591a..6c839074b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -25,6 +25,7 @@ static const char* batchingKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* batchingKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 91c4d828b..4daf95380 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -25,6 +25,7 @@ static const char* batchingKernelsNewCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* batchingKernelsNewCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h index 98479bda9..1146f0e57 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h @@ -25,6 +25,7 @@ static const char* integrateKernelCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* integrateKernelCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index d854dfe97..7f125298b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -25,6 +25,7 @@ static const char* solverSetupCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverSetupCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index c16c71685..d3c905995 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -25,6 +25,7 @@ static const char* solverSetup2CL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverSetup2CL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index 2476d1cab..e70b44373 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -25,6 +25,7 @@ static const char* solverUtilsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverUtilsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index 05a557f0e..01d6f8b45 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -15,6 +15,7 @@ static const char* updateAabbsKernelCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -38,6 +39,9 @@ static const char* updateAabbsKernelCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"