diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index 5b4c3b2a7..303a77c84 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -101,7 +101,7 @@ enum }; b3AlignedObjectArray demoNames; -int selectedDemo = 1; +int selectedDemo = 0; GpuDemo::CreateFunc* allDemos[]= { //ConcaveCompound2Scene::MyCreateFunc, @@ -247,9 +247,21 @@ static void MyMouseButtonCallback(int button, int state, float x, float y) } extern bool useShadowMap; - +static bool wireframe=false; void MyKeyboardCallback(int key, int state) { + if (key=='w' && state) + { + wireframe=!wireframe; + if (wireframe) + { + glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); + } else + { + glPolygonMode( GL_FRONT_AND_BACK, GL_FILL ); + } + } + if (key=='s' && state) { useShadowMap=!useShadowMap; diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h index 88301aa03..bc7bdaa85 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h @@ -203,7 +203,98 @@ bool b3FindSeparatingAxisEdgeEdge( const b3ConvexPolyhedronData* hullA, __global return true; } -// work-in-progress + + +inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + b3Float4ConstArg posA, b3QuatConstArg ornA,b3Float4ConstArg posB, b3QuatConstArg ornB, + __global b3Float4* worldVertsA1, + __global b3Float4* worldNormalsA1, + __global b3Float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + __global const b3Float4* verticesA, + __global const b3GpuFace_t* facesA, + __global const int* indicesA, + __global const b3Float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + + __global b3Int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;facem_numFaces;face++) + { + const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal); + float d = b3Dot(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = b3TransformPoint(b,posB,ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const b3Float4 Normal = b3MakeFloat4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal); + + float d = b3Dot(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = b3TransformPoint(a, posA,ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs, __global const b3RigidBodyData* rigidBodies, __global const b3Collidable* collidables, @@ -215,6 +306,12 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs __global const b3GpuChildShape* gpuChildShapes, __global b3Aabb* aabbs, __global b3Float4* concaveSeparatingNormalsOut, + __global b3Int4* clippingFacesOut, + __global b3Vector3* worldVertsA1Out, + __global b3Vector3* worldNormalsA1Out, + __global b3Vector3* worldVertsB1Out, + __global int* hasSeparatingNormals, + int vertexFaceCapacity, int numConcavePairs, int pairIdx ) @@ -242,7 +339,7 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs return; } - + hasSeparatingNormals[i] = 0; int numFacesA = convexShapes[shapeIndexA].m_numFaces; int numActualConcaveConvexTests = 0; @@ -454,8 +551,34 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs if (hasSeparatingAxis) { + hasSeparatingNormals[i]=1; sepAxis.w = dmin; concaveSeparatingNormalsOut[pairIdx]=sepAxis; + + //now compute clipping faces A and B, and world-space clipping vertices A and B... + + float minDist = -1e30f; + float maxDist = 0.02f; + + b3FindClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1Out, + worldNormalsA1Out, + worldVertsB1Out, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + } else { //mark this pair as in-active diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 0fdc65fe3..9b7a109ff 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -16,7 +16,7 @@ subject to the following restrictions: bool findSeparatingAxisOnGpu = true; bool bvhTraversalKernelGPU = true; -bool findConcaveSeparatingAxisKernelGPU = false;//true; +bool findConcaveSeparatingAxisKernelGPU = true; ///This file was written by Erwin Coumans ///Separating axis rest based on work from Pierre Terdiman, see @@ -24,7 +24,7 @@ bool findConcaveSeparatingAxisKernelGPU = false;//true; //#define B3_DEBUG_SAT_FACE -#define CHECK_ON_HOST +//#define CHECK_ON_HOST #ifdef CHECK_ON_HOST //#define PERSISTENT_CONTACTS_HOST @@ -85,6 +85,7 @@ m_totalContactsOut(m_context, m_queue), m_sepNormals(m_context, m_queue), m_hasSeparatingNormals(m_context, m_queue), m_concaveSepNormals(m_context, m_queue), +m_concaveHasSeparatingNormals(m_context,m_queue), m_numConcavePairsOut(m_context, m_queue), m_gpuCompoundPairs(m_context, m_queue), m_gpuCompoundSepNormals(m_context, m_queue), @@ -2990,7 +2991,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* int concaveCapacity=maxTriConvexPairCapacity; m_concaveSepNormals.resize(concaveCapacity); - + m_concaveHasSeparatingNormals.resize(concaveCapacity); m_numConcavePairsOut.resize(0); m_numConcavePairsOut.push_back(0); @@ -3039,191 +3040,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* clFinish(m_queue); } - //now perform the tree query on GPU - { - - - - { - - if (treeNodesGPU->size() && treeNodesGPU->size()) - { - if (bvhTraversalKernelGPU) - { - - B3_PROFILE("m_bvhTraversalKernel"); - - - numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); - launcher.setBuffer( pairs->getBufferCL()); - launcher.setBuffer( bodyBuf->getBufferCL()); - launcher.setBuffer( gpuCollidables.getBufferCL()); - launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer( subTreesGPU->getBufferCL()); - launcher.setBuffer( treeNodesGPU->getBufferCL()); - launcher.setBuffer( bvhInfo->getBufferCL()); - - launcher.setConst( nPairs ); - launcher.setConst( maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - } else - { - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - //int maxTriConvexPairCapacity, - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - - int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut - - b3AlignedObjectArray treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... - - volatile int hostNumConcavePairsOut=0; - - // - for (int i=0;i maxTriConvexPairCapacity) - { - static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); - numConcavePairs = maxTriConvexPairCapacity; - } - triangleConvexPairsOut.resize(numConcavePairs); - if (numConcavePairs) - { - if (findConcaveSeparatingAxisKernelGPU) - { - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } else - { - - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - - //numConcavePairs - //b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - //b3BufferInfoCL( bodyBuf->getBufferCL(),true), - //b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - // b3BufferInfoCL( convexData.getBufferCL(),true), - //b3BufferInfoCL( gpuVertices.getBufferCL(),true), - //b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - //b3BufferInfoCL( gpuFaces.getBufferCL(),true), - //b3BufferInfoCL( gpuIndices.getBufferCL(),true), - //b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - //b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - //b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) - - b3AlignedObjectArray concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - } -// b3AlignedObjectArray cpuCompoundSepNormals; - // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); - // b3AlignedObjectArray cpuConcavePairs; - // triangleConvexPairsOut.copyToHost(cpuConcavePairs); - - - } - } - } - } + numCompoundPairs = m_numCompoundPairsOut.at(0); bool useGpuFindCompoundPairs=true; @@ -3442,8 +3259,252 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* } + int vertexFaceCapacity = 64; + + { + //now perform the tree query on GPU + + + + + if (treeNodesGPU->size() && treeNodesGPU->size()) + { + if (bvhTraversalKernelGPU) + { + + B3_PROFILE("m_bvhTraversalKernel"); + + + numConcavePairs = m_numConcavePairsOut.at(0); + + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer( subTreesGPU->getBufferCL()); + launcher.setBuffer( treeNodesGPU->getBufferCL()); + launcher.setBuffer( bvhInfo->getBufferCL()); + + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + numConcavePairs = m_numConcavePairsOut.at(0); + } else + { + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + //int maxTriConvexPairCapacity, + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + + int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... + + volatile int hostNumConcavePairsOut=0; + + // + for (int i=0;i maxTriConvexPairCapacity) + { + static int exceeded_maxTriConvexPairCapacity_count = 0; + b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", + numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); + numConcavePairs = maxTriConvexPairCapacity; + } + triangleConvexPairsOut.resize(numConcavePairs); + + if (numConcavePairs) + { + + + + + clippingFacesOutGPU.resize(numConcavePairs); + worldNormalsAGPU.resize(numConcavePairs); + worldVertsA1GPU.resize(vertexFaceCapacity*numConcavePairs); + worldVertsB1GPU.resize(vertexFaceCapacity*numConcavePairs); + + + if (findConcaveSeparatingAxisKernelGPU) + { + + /* + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + */ + + //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } else + { + + b3AlignedObjectArray clippingFacesOutCPU; + b3AlignedObjectArray worldVertsA1CPU; + b3AlignedObjectArray worldNormalsACPU; + b3AlignedObjectArray worldVertsB1CPU; + b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; + + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); + + b3GpuChildShape* childShapePointerCPU = 0; + if (cpuChildShapes.size()) + childShapePointerCPU = &cpuChildShapes.at(0); + + clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + worldNormalsACPU.resize(worldNormalsAGPU.size()); + worldVertsB1CPU.resize(worldVertsB1GPU.size()); + + for (int i=0;i cpuCompoundSepNormals; +// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); +// b3AlignedObjectArray cpuConcavePairs; +// triangleConvexPairsOut.copyToHost(cpuConcavePairs); + + + } + } + + + } if (numConcavePairs) { @@ -3494,45 +3555,130 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* if (contactClippingOnGpu) { - //B3_PROFILE("clipHullHullKernel"); - - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - //concave-convex contact clipping + //B3_PROFILE("clipHullHullKernel"); + bool breakupConcaveConvexKernel = false; + +#ifdef __APPLE__ + //actually, some Apple OpenCL platform/device combinations work fine... + breakupConcaveConvexKernel = true; +#endif + //concave-convex contact clipping if (numConcavePairs) { // printf("numConcavePairs = %d\n", numConcavePairs); // nContacts = m_totalContactsOut.at(0); // printf("nContacts before = %d\n", nContacts); - B3_PROFILE("clipHullHullConcaveConvexKernel"); - nContacts = m_totalContactsOut.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numConcavePairs ); - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - b3AlignedObjectArray cpuContacts; - contactOut->copyToHost(cpuContacts); + if (breakupConcaveConvexKernel) + { + + worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); + + + //clipFacesAndFindContacts + bool clipFacesAndFindContactsCPU = false; + if (clipFacesAndFindContactsCPU) + { + } else + { + + if (1) + { + + + B3_PROFILE("clipFacesAndFindContacts"); + //nContacts = m_totalContactsOut.at(0); + //int h = m_hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + + launcher.setConst( numConcavePairs ); + int debugMode = 0; + launcher.setConst( debugMode); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + //int bla = m_totalContactsOut.at(0); + } + } + //contactReduction + { + contactOut->reserve(nContacts+numConcavePairs); + + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + + } + //re-use? + + + } else + { + B3_PROFILE("clipHullHullConcaveConvexKernel"); + nContacts = m_totalContactsOut.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + b3AlignedObjectArray cpuContacts; + contactOut->copyToHost(cpuContacts); + } // printf("nContacts after = %d\n", nContacts); } @@ -3553,24 +3699,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* - int vertexFaceCapacity = 64; - worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - - clippingFacesOutGPU.resize(nPairs); - - worldNormalsAGPU.resize(nPairs); - - worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); - - worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); - - { B3_PROFILE("findClippingFacesKernel"); @@ -3608,13 +3742,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* ///clip face B against face A, reduce contacts and append them to a global contact array if (1) { - B3_PROFILE("clipFacesAndContactReductionKernel"); + B3_PROFILE("clipFacesAndFindContacts"); //nContacts = m_totalContactsOut.at(0); //int h = m_hasSeparatingNormals.at(0); //int4 p = clippingFacesOutGPU.at(0); b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), @@ -3633,23 +3765,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* launcher.setConst( nPairs ); int debugMode = 0; launcher.setConst( debugMode); - - /* - int serializationBytes = launcher.getSerializationBufferSize(); - unsigned char* buf = (unsigned char*)malloc(serializationBytes+1); - int actualWritten = launcher.serializeArguments(buf,serializationBytes+1); - FILE* f = fopen("clipFacesAndContactReductionKernel.bin","wb"); - fwrite(buf,actualWritten,1,f); - fclose(f); - free(buf); - printf("serializationBytes=%d, actualWritten=%d\n",serializationBytes,actualWritten); - */ - - int num = nPairs; - - launcher.launch1D( num); + int num = nPairs; + launcher.launch1D( num); clFinish(m_queue); - { + + { // nContacts = m_totalContactsOut.at(0); // printf("nContacts = %d\n",nContacts); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index 0e8a7d3d4..9d11b50ab 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -52,6 +52,7 @@ struct GpuSatCollision b3OpenCLArray m_sepNormals; b3OpenCLArray m_hasSeparatingNormals; b3OpenCLArray m_concaveSepNormals; + b3OpenCLArray m_concaveHasSeparatingNormals; b3OpenCLArray m_numConcavePairsOut; b3OpenCLArray m_gpuCompoundPairs; b3OpenCLArray m_gpuCompoundSepNormals; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h index d51084e41..4b3b49eae 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -29,32 +29,6 @@ static const char* bvhTraversalKernelCL= \ " int m_nodeOffset;\n" " int m_subTreeOffset;\n" "} b3BvhInfo;\n" -"/*\n" -" bool isLeafNode() const\n" -" {\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (m_escapeIndexOrTriangleIndex >= 0);\n" -" }\n" -" int getEscapeIndex() const\n" -" {\n" -" btAssert(!isLeafNode());\n" -" return -m_escapeIndexOrTriangleIndex;\n" -" }\n" -" int getTriangleIndex() const\n" -" {\n" -" btAssert(isLeafNode());\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (m_escapeIndexOrTriangleIndex&~(y));\n" -" }\n" -" int getPartId() const\n" -" {\n" -" btAssert(isLeafNode());\n" -" // Get only the highest bits where the part index is stored\n" -" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n" -" }\n" -"*/\n" "int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" "{\n" " unsigned int x=0;\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index 03f0480d1..a282d1eff 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -13,6 +13,7 @@ static const char* primitiveContactsKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -36,6 +37,9 @@ static const char* primitiveContactsKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl index c9d00b5ad..e94accf7c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl @@ -1353,6 +1353,97 @@ __kernel void findSeparatingAxisKernel( __global const int4* pairs, + +int findClippingFaces(const float4 separatingNormal, + const ConvexPolyhedronCL* hullA, + __global const ConvexPolyhedronCL* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + // work-in-progress __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, __global const BodyData* rigidBodies, @@ -1365,6 +1456,12 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, __global const btGpuChildShape* gpuChildShapes, __global btAabbCL* aabbs, __global float4* concaveSeparatingNormalsOut, + __global int* concaveHasSeparatingNormals, + __global int4* clippingFacesOut, + __global float4* worldVertsA1GPU, + __global float4* worldNormalsAGPU, + __global float4* worldVertsB1GPU, + int vertexFaceCapacity, int numConcavePairs ) { @@ -1372,6 +1469,9 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, int i = get_global_id(0); if (i>=numConcavePairs) return; + + concaveHasSeparatingNormals[i] = 0; + int pairIdx = i; int bodyIndexA = concavePairs[i].x; @@ -1604,6 +1704,33 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, { sepAxis.w = dmin; concaveSeparatingNormalsOut[pairIdx]=sepAxis; + concaveHasSeparatingNormals[i]=1; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + + + findClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1GPU, + worldNormalsAGPU, + worldVertsB1GPU, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + + } else { //mark this pair as in-active diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl index aa4918b9a..98253c95c 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -1669,9 +1669,7 @@ __kernel void findClippingFacesKernel( __global const int4* pairs, -__kernel void clipFacesAndFindContactsKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* separatingNormals, +__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, __global const int* hasSeparatingAxis, __global struct b3Contact4Data* globalContactsOut, __global int4* clippingFacesOut, @@ -1698,8 +1696,8 @@ __kernel void clipFacesAndFindContactsKernel( __global int4* pairs, if (hasSeparatingAxis[i]) { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; +// int bodyIndexA = pairs[i].x; + // int bodyIndexB = pairs[i].y; int numLocalContactsOut = 0; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 83a24bdf7..cbf9bce24 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -40,6 +40,7 @@ static const char* satClipKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -63,6 +64,9 @@ static const char* satClipKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" @@ -1859,9 +1863,7 @@ static const char* satClipKernelsCL= \ " }// if (im_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;facem_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x,\n" +" facesA[hullA->m_faceOffset+face].m_plane.y,\n" +" facesA[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" "// work-in-progress\n" "__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" " __global const BodyData* rigidBodies,\n" @@ -1482,12 +1570,19 @@ static const char* satKernelsCL= \ " __global const btGpuChildShape* gpuChildShapes,\n" " __global btAabbCL* aabbs,\n" " __global float4* concaveSeparatingNormalsOut,\n" +" __global int* concaveHasSeparatingNormals,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1GPU,\n" +" __global float4* worldNormalsAGPU,\n" +" __global float4* worldVertsB1GPU,\n" +" int vertexFaceCapacity,\n" " int numConcavePairs\n" " )\n" "{\n" " int i = get_global_id(0);\n" " if (i>=numConcavePairs)\n" " return;\n" +" concaveHasSeparatingNormals[i] = 0;\n" " int pairIdx = i;\n" " int bodyIndexA = concavePairs[i].x;\n" " int bodyIndexB = concavePairs[i].y;\n" @@ -1691,6 +1786,27 @@ static const char* satKernelsCL= \ " {\n" " sepAxis.w = dmin;\n" " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +" concaveHasSeparatingNormals[i]=1;\n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" findClippingFaces(sepAxis,\n" +" &convexPolyhedronA,\n" +" &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsA1GPU,\n" +" worldNormalsAGPU,\n" +" worldVertsB1GPU,\n" +" vertexFaceCapacity,\n" +" minDist, maxDist,\n" +" verticesA,\n" +" facesA,\n" +" indicesA,\n" +" vertices,\n" +" faces,\n" +" indices,\n" +" clippingFacesOut, pairIdx);\n" " } else\n" " { \n" " //mark this pair as in-active\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index b2bb087b4..de4aa794a 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -124,7 +124,7 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, { - cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH,false); + cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); b3Assert(solveContactProg); cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index 2b910591a..6c839074b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -25,6 +25,7 @@ static const char* batchingKernelsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* batchingKernelsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 91c4d828b..4daf95380 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -25,6 +25,7 @@ static const char* batchingKernelsNewCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* batchingKernelsNewCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h index 98479bda9..1146f0e57 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h @@ -25,6 +25,7 @@ static const char* integrateKernelCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* integrateKernelCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index d854dfe97..7f125298b 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -25,6 +25,7 @@ static const char* solverSetupCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverSetupCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index c16c71685..d3c905995 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -25,6 +25,7 @@ static const char* solverSetup2CL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverSetup2CL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index 2476d1cab..e70b44373 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -25,6 +25,7 @@ static const char* solverUtilsCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -48,6 +49,9 @@ static const char* solverUtilsCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index 05a557f0e..01d6f8b45 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -15,6 +15,7 @@ static const char* updateAabbsKernelCL= \ "#ifdef __cplusplus\n" "#else\n" "#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" "#define b3Fabs fabs\n" "#define b3Sqrt native_sqrt\n" "#define b3Sin native_sin\n" @@ -38,6 +39,9 @@ static const char* updateAabbsKernelCL= \ " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " return cross(a1, b1);\n" " }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" "#endif \n" " \n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"