diff --git a/build/premake4.lua b/build/premake4.lua index 78bdeb312..8105683aa 100644 --- a/build/premake4.lua +++ b/build/premake4.lua @@ -92,6 +92,7 @@ include "../opencl/lds_bank_conflict" include "../opencl/reduce" include "../opencl/gpu_broadphase/test" + include "../opencl/gpu_sat/test" end \ No newline at end of file diff --git a/build/stringify.bat b/build/stringify.bat index 820661d04..0c04729f6 100644 --- a/build/stringify.bat +++ b/build/stringify.bat @@ -11,6 +11,9 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/parallel_primitives/ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sap.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapKernels.h" --stringname="sapCL" stringify premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sapFast.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/sat.cl" --headerfile="../opencl/gpu_sat/kernels/satKernels.h" --stringname="satKernelsCL" stringify +premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_sat/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify + pause \ No newline at end of file diff --git a/opencl/gpu_sat/host/ConvexHullContact.cpp b/opencl/gpu_sat/host/ConvexHullContact.cpp new file mode 100644 index 000000000..6bdf260b1 --- /dev/null +++ b/opencl/gpu_sat/host/ConvexHullContact.cpp @@ -0,0 +1,581 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +///This file was written by Erwin Coumans +///Separating axis rest based on work from Pierre Terdiman, see +///And contact clipping based on work from Simon Hobbs + +//#define BT_DEBUG_SAT_FACE + +#include "ConvexHullContact.h" +#include //memcpy +#include "btConvexPolyhedronCL.h" + +typedef btAlignedObjectArray btVertexArray; +#include "parallel_primitives/host/btQuickprof.h" + +#include //for FLT_MAX +#include "../basic_initialize/btOpenCLUtils.h" +#include "parallel_primitives/host/btLauncherCL.h" +//#include "AdlQuaternion.h" + +#include "../kernels/satKernels.h" +#include "../kernels/satClipHullContacts.h" +#include "parallel_primitives/host/btAabbUtil2.h" + +#define dot3F4 btDot + +GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) +:m_context(ctx), +m_device(device), +m_queue(q), +m_findSeparatingAxisKernel(0), +m_totalContactsOut(m_context, m_queue) +{ + m_totalContactsOut.push_back(0); + + cl_int errNum=0; + + if (1) + { + const char* src = satKernelsCL; + cl_program satProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,"","opencl/gpu_rigidbody_pipeline2/sat.cl"); + btAssert(errNum==CL_SUCCESS); + + m_findSeparatingAxisKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg ); + + + m_findCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg ); + + m_processCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg ); + btAssert(errNum==CL_SUCCESS); + } + + if (1) + { + const char* srcClip = satClipKernelsCL; + cl_program satClipContactsProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,"","opencl/gpu_rigidbody_pipeline2/satClipHullContacts.cl"); + btAssert(errNum==CL_SUCCESS); + + m_clipHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + m_clipCompoundsHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + + m_findClippingFacesKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + m_clipFacesAndContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + m_clipHullHullConcaveConvexKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + m_extractManifoldAndAddContactKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + m_newContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, + "newContactReductionKernel",&errNum,satClipContactsProg); + btAssert(errNum==CL_SUCCESS); + + + + + } else + { + m_clipHullHullKernel=0; + m_clipCompoundsHullHullKernel = 0; + m_findClippingFacesKernel = 0; + m_newContactReductionKernel=0; + m_clipFacesAndContactReductionKernel = 0; + m_clipHullHullConcaveConvexKernel = 0; + m_extractManifoldAndAddContactKernel = 0; + } + + +} + +GpuSatCollision::~GpuSatCollision() +{ + if (m_findSeparatingAxisKernel) + clReleaseKernel(m_findSeparatingAxisKernel); + + if (m_findCompoundPairsKernel) + clReleaseKernel(m_findCompoundPairsKernel); + + if (m_processCompoundPairsKernel) + clReleaseKernel(m_processCompoundPairsKernel); + + if (m_findClippingFacesKernel) + clReleaseKernel(m_findClippingFacesKernel); + + if (m_clipFacesAndContactReductionKernel) + clReleaseKernel(m_clipFacesAndContactReductionKernel); + if (m_newContactReductionKernel) + clReleaseKernel(m_newContactReductionKernel); + + if (m_clipHullHullKernel) + clReleaseKernel(m_clipHullHullKernel); + if (m_clipCompoundsHullHullKernel) + clReleaseKernel(m_clipCompoundsHullHullKernel); + + if (m_clipHullHullConcaveConvexKernel) + clReleaseKernel(m_clipHullHullConcaveConvexKernel); + if (m_extractManifoldAndAddContactKernel) + clReleaseKernel(m_extractManifoldAndAddContactKernel); +} + + + +void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray* pairs, int nPairs, + const btOpenCLArray* bodyBuf, + btOpenCLArray* contactOut, int& nContacts, + + const btOpenCLArray& convexData, + const btOpenCLArray& gpuVertices, + const btOpenCLArray& gpuUniqueEdges, + const btOpenCLArray& gpuFaces, + const btOpenCLArray& gpuIndices, + const btOpenCLArray& gpuCollidables, + const btOpenCLArray& gpuChildShapes, + + const btOpenCLArray& clAabbs, + btOpenCLArray& worldVertsB1GPU, + btOpenCLArray& clippingFacesOutGPU, + btOpenCLArray& worldNormalsAGPU, + btOpenCLArray& worldVertsA1GPU, + btOpenCLArray& worldVertsB2GPU, + int numObjects, + int maxTriConvexPairCapacity, + btOpenCLArray& triangleConvexPairsOut, + int& numTriConvexPairsOut + ) +{ + if (!nPairs) + return; + + BT_PROFILE("computeConvexConvexContactsGPUSAT"); + // printf("nContacts = %d\n",nContacts); + + btOpenCLArray sepNormals(m_context,m_queue); + sepNormals.resize(nPairs); + btOpenCLArray hasSeparatingNormals(m_context,m_queue); + hasSeparatingNormals.resize(nPairs); + + int concaveCapacity=maxTriConvexPairCapacity; + btOpenCLArray concaveSepNormals(m_context,m_queue); + concaveSepNormals.resize(concaveCapacity); + + btOpenCLArray numConcavePairsOut(m_context,m_queue); + numConcavePairsOut.push_back(0); + + int compoundPairCapacity=65536*10; + btOpenCLArray gpuCompoundPairs(m_context,m_queue); + gpuCompoundPairs.resize(compoundPairCapacity); + + btOpenCLArray gpuCompoundSepNormals(m_context,m_queue); + gpuCompoundSepNormals.resize(compoundPairCapacity); + + + btOpenCLArray gpuHasCompoundSepNormals(m_context,m_queue); + gpuHasCompoundSepNormals.resize(compoundPairCapacity); + + btOpenCLArray numCompoundPairsOut(m_context,m_queue); + numCompoundPairsOut.push_back(0); + + int numCompoundPairs = 0; + + bool findSeparatingAxisOnGpu = true;//false; + int numConcave =0; + + { + clFinish(m_queue); + if (findSeparatingAxisOnGpu) + { + + + BT_PROFILE("findSeparatingAxisKernel"); + btBufferInfoCL bInfo[] = { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( clAabbs.getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( triangleConvexPairsOut.getBufferCL()), + btBufferInfoCL( concaveSepNormals.getBufferCL()), + btBufferInfoCL( numConcavePairsOut.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_findSeparatingAxisKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + numConcave = numConcavePairsOut.at(0); + if (numConcave > maxTriConvexPairCapacity) + numConcave = maxTriConvexPairCapacity; + triangleConvexPairsOut.resize(numConcave); + + + + + { + BT_PROFILE("findCompoundPairsKernel"); + btBufferInfoCL bInfo[] = + { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( clAabbs.getBufferCL(),true), + btBufferInfoCL( gpuChildShapes.getBufferCL(),true), + btBufferInfoCL( gpuCompoundPairs.getBufferCL()), + btBufferInfoCL( numCompoundPairsOut.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_findCompoundPairsKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst( compoundPairCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + numCompoundPairs = numCompoundPairsOut.at(0); + //printf("numCompoundPairs =%d\n",numCompoundPairs ); + if (numCompoundPairs > compoundPairCapacity) + numCompoundPairs = compoundPairCapacity; + + gpuCompoundPairs.resize(numCompoundPairs); + gpuHasCompoundSepNormals.resize(numCompoundPairs); + gpuCompoundSepNormals.resize(numCompoundPairs); + + + if (numCompoundPairs) + { + + BT_PROFILE("processCompoundPairsKernel"); + btBufferInfoCL bInfo[] = + { + btBufferInfoCL( gpuCompoundPairs.getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( clAabbs.getBufferCL(),true), + btBufferInfoCL( gpuChildShapes.getBufferCL(),true), + btBufferInfoCL( gpuCompoundSepNormals.getBufferCL()), + btBufferInfoCL( gpuHasCompoundSepNormals.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_processCompoundPairsKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( numCompoundPairs ); + + int num = numCompoundPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + + + //printf("numConcave = %d\n",numConcave); + + }//if (findSeparatingAxisOnGpu) + + +// printf("hostNormals.size()=%d\n",hostNormals.size()); + //int numPairs = pairCount.at(0); + + + + } +#ifdef __APPLE__ + bool contactClippingOnGpu = true; +#else + bool contactClippingOnGpu = true; +#endif + + if (contactClippingOnGpu) + { + //BT_PROFILE("clipHullHullKernel"); + + + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + + //concave-convex contact clipping + + if (numConcave) + { + BT_PROFILE("clipHullHullConcaveConvexKernel"); + nContacts = m_totalContactsOut.at(0); + btBufferInfoCL bInfo[] = { + btBufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( concaveSepNormals.getBufferCL()), + btBufferInfoCL( contactOut->getBufferCL()), + btBufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + btLauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( numConcave ); + int num = numConcave; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + } + + + //convex-convex contact clipping + if (1) + { + BT_PROFILE("clipHullHullKernel"); + bool breakupKernel = false; + +#ifdef __APPLE__ + breakupKernel = true; +#endif + + if (breakupKernel) + { + + + + int vertexFaceCapacity = 64; + + + worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + + + clippingFacesOutGPU.resize(nPairs); + + + worldNormalsAGPU.resize(nPairs); + + + worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); + + + worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); + + + + { + BT_PROFILE("findClippingFacesKernel"); + btBufferInfoCL bInfo[] = { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( clippingFacesOutGPU.getBufferCL()), + btBufferInfoCL( worldVertsA1GPU.getBufferCL()), + btBufferInfoCL( worldNormalsAGPU.getBufferCL()), + btBufferInfoCL( worldVertsB1GPU.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_findClippingFacesKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( vertexFaceCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + + + + + + ///clip face B against face A, reduce contacts and append them to a global contact array + if (1) + { + BT_PROFILE("clipFacesAndContactReductionKernel"); + //nContacts = m_totalContactsOut.at(0); + //int h = hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + btBufferInfoCL bInfo[] = { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( contactOut->getBufferCL()), + btBufferInfoCL( clippingFacesOutGPU.getBufferCL()), + btBufferInfoCL( worldVertsA1GPU.getBufferCL()), + btBufferInfoCL( worldNormalsAGPU.getBufferCL()), + btBufferInfoCL( worldVertsB1GPU.getBufferCL()), + btBufferInfoCL( worldVertsB2GPU.getBufferCL()), + btBufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + + launcher.setConst( nPairs ); + int debugMode = 0; + launcher.setConst( debugMode); + + /* + int serializationBytes = launcher.getSerializationBufferSize(); + unsigned char* buf = (unsigned char*)malloc(serializationBytes+1); + int actualWritten = launcher.serializeArguments(buf,serializationBytes+1); + FILE* f = fopen("clipFacesAndContactReductionKernel.bin","wb"); + fwrite(buf,actualWritten,1,f); + fclose(f); + free(buf); + printf("serializationBytes=%d, actualWritten=%d\n",serializationBytes,actualWritten); + */ + + int num = nPairs; + + launcher.launch1D( num); + clFinish(m_queue); + { +// nContacts = m_totalContactsOut.at(0); + // printf("nContacts = %d\n",nContacts); + + contactOut->reserve(nContacts+nPairs); + + { + BT_PROFILE("newContactReductionKernel"); + btBufferInfoCL bInfo[] = + { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( contactOut->getBufferCL()), + btBufferInfoCL( clippingFacesOutGPU.getBufferCL()), + btBufferInfoCL( worldVertsB2GPU.getBufferCL()), + btBufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + btLauncherCL launcher(m_queue, m_newContactReductionKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + +// Contact4 pt = contactOut->at(0); + + // printf("nContacts = %d\n",nContacts); + } + } + } + else + { + + if (nPairs) + { + btBufferInfoCL bInfo[] = { + btBufferInfoCL( pairs->getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( sepNormals.getBufferCL()), + btBufferInfoCL( hasSeparatingNormals.getBufferCL()), + btBufferInfoCL( contactOut->getBufferCL()), + btBufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + btLauncherCL launcher(m_queue, m_clipHullHullKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } + + int nCompoundsPairs = gpuCompoundPairs.size(); + + if (nCompoundsPairs) + { + btBufferInfoCL bInfo[] = { + btBufferInfoCL( gpuCompoundPairs.getBufferCL(), true ), + btBufferInfoCL( bodyBuf->getBufferCL(),true), + btBufferInfoCL( gpuCollidables.getBufferCL(),true), + btBufferInfoCL( convexData.getBufferCL(),true), + btBufferInfoCL( gpuVertices.getBufferCL(),true), + btBufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + btBufferInfoCL( gpuFaces.getBufferCL(),true), + btBufferInfoCL( gpuIndices.getBufferCL(),true), + btBufferInfoCL( gpuChildShapes.getBufferCL(),true), + btBufferInfoCL( gpuCompoundSepNormals.getBufferCL(),true), + btBufferInfoCL( gpuHasCompoundSepNormals.getBufferCL(),true), + btBufferInfoCL( contactOut->getBufferCL()), + btBufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + btLauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); + launcher.setConst( nCompoundsPairs ); + int num = nCompoundsPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } + } + } + + } +} diff --git a/opencl/gpu_sat/host/ConvexHullContact.h b/opencl/gpu_sat/host/ConvexHullContact.h new file mode 100644 index 000000000..299a5d6ae --- /dev/null +++ b/opencl/gpu_sat/host/ConvexHullContact.h @@ -0,0 +1,85 @@ + +#ifndef _CONVEX_HULL_CONTACT_H +#define _CONVEX_HULL_CONTACT_H + +#include "parallel_primitives/host/btOpenCLArray.h" +#include "btRigidBodyCL.h" +#include "parallel_primitives/host/btAlignedObjectArray.h" +#include "btConvexUtility.h" +#include "btConvexPolyhedronCL.h" +#include "btCollidable.h" +#include "btContact4.h" +#include "parallel_primitives/host/btInt2.h" +#include "parallel_primitives/host/btInt4.h" + +//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" + + +struct btYetAnotherAabb +{ + union + { + float m_min[4]; + int m_minIndices[4]; + }; + union + { + float m_max[4]; + //int m_signedMaxIndices[4]; + //unsigned int m_unsignedMaxIndices[4]; + }; +}; + +struct GpuSatCollision +{ + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + cl_kernel m_findSeparatingAxisKernel; + cl_kernel m_findCompoundPairsKernel; + cl_kernel m_processCompoundPairsKernel; + + cl_kernel m_clipHullHullKernel; + cl_kernel m_clipCompoundsHullHullKernel; + + cl_kernel m_clipFacesAndContactReductionKernel; + cl_kernel m_findClippingFacesKernel; + + cl_kernel m_clipHullHullConcaveConvexKernel; + cl_kernel m_extractManifoldAndAddContactKernel; + cl_kernel m_newContactReductionKernel; + + + btOpenCLArray m_totalContactsOut; + + GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ); + virtual ~GpuSatCollision(); + + + void computeConvexConvexContactsGPUSAT( const btOpenCLArray* pairs, int nPairs, + const btOpenCLArray* bodyBuf, + btOpenCLArray* contactOut, int& nContacts, + const btOpenCLArray& hostConvexData, + const btOpenCLArray& vertices, + const btOpenCLArray& uniqueEdges, + const btOpenCLArray& faces, + const btOpenCLArray& indices, + const btOpenCLArray& gpuCollidables, + const btOpenCLArray& gpuChildShapes, + + const btOpenCLArray& clAabbs, + btOpenCLArray& worldVertsB1GPU, + btOpenCLArray& clippingFacesOutGPU, + btOpenCLArray& worldNormalsAGPU, + btOpenCLArray& worldVertsA1GPU, + btOpenCLArray& worldVertsB2GPU, + int numObjects, + int maxTriConvexPairCapacity, + btOpenCLArray& triangleConvexPairs, + int& numTriConvexPairsOut + ); + + +}; + +#endif //_CONVEX_HULL_CONTACT_H diff --git a/opencl/gpu_sat/host/btCollidable.h b/opencl/gpu_sat/host/btCollidable.h new file mode 100644 index 000000000..6209671de --- /dev/null +++ b/opencl/gpu_sat/host/btCollidable.h @@ -0,0 +1,38 @@ + +#ifndef BT_COLLIDABLE_H +#define BT_COLLIDABLE_H + +struct btCollidable +{ + int m_numChildShapes; + float m_radius; + int m_shapeType; + int m_shapeIndex; +}; + +struct btCollidableNew +{ + short int m_shapeType; + short int m_numShapes; + int m_shapeIndex; +}; + +struct btGpuChildShape +{ + float m_childPosition[4]; + float m_childOrientation[4]; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +}; + +struct btCompoundOverlappingPair +{ + int m_bodyIndexA; + int m_bodyIndexB; +// int m_pairType; + int m_childShapeIndexA; + int m_childShapeIndexB; +}; +#endif //BT_COLLIDABLE_H diff --git a/opencl/gpu_sat/host/btContact4.h b/opencl/gpu_sat/host/btContact4.h new file mode 100644 index 000000000..a14f60a2c --- /dev/null +++ b/opencl/gpu_sat/host/btContact4.h @@ -0,0 +1,42 @@ +#ifndef BT_CONTACT4_H +#define BT_CONTACT4_H + +#include "parallel_primitives/host/btVector3.h" + + +ATTRIBUTE_ALIGNED16(struct) btContact4 +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btVector3 m_worldPos[4]; + btVector3 m_worldNormal; +// float m_restituitionCoeff; +// float m_frictionCoeff; + unsigned short m_restituitionCoeffCmp; + unsigned short m_frictionCoeffCmp; + int m_batchIdx; + + int m_bodyAPtrAndSignBit; + int m_bodyBPtrAndSignBit; + + int getBodyA()const {return abs(m_bodyAPtrAndSignBit);} + int getBodyB()const {return abs(m_bodyBPtrAndSignBit);} + bool isBodyAFixed()const { return m_bodyAPtrAndSignBit<0;} + bool isBodyBFixed()const { return m_bodyBPtrAndSignBit<0;} + // todo. make it safer + int& getBatchIdx() { return m_batchIdx; } + const int& getBatchIdx() const { return m_batchIdx; } + float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); } + void setRestituitionCoeff( float c ) { btAssert( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (unsigned short)(c*0xffff); } + float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); } + void setFrictionCoeff( float c ) { btAssert( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (unsigned short)(c*0xffff); } + + float& getNPoints() { return m_worldNormal[3]; } + float getNPoints() const { return m_worldNormal[3]; } + + float getPenetration(int idx) const { return m_worldPos[idx][3]; } + + bool isInvalid() const { return (getBodyA()==0 || getBodyB()==0); } +}; + +#endif //BT_CONTACT4_H diff --git a/opencl/gpu_sat/host/btConvexPolyhedronCL.h b/opencl/gpu_sat/host/btConvexPolyhedronCL.h new file mode 100644 index 000000000..ee789f026 --- /dev/null +++ b/opencl/gpu_sat/host/btConvexPolyhedronCL.h @@ -0,0 +1,64 @@ +#ifndef CONVEX_POLYHEDRON_CL +#define CONVEX_POLYHEDRON_CL + +#include "parallel_primitives/host/btTransform.h" + +struct btGpuFace +{ + btVector4 m_plane; + int m_indexOffset; + int m_numIndices; +}; + +ATTRIBUTE_ALIGNED16(struct) btConvexPolyhedronCL +{ + btVector3 m_localCenter; + btVector3 m_extents; + btVector3 mC; + btVector3 mE; + + btScalar m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; + + + + inline void project(const btTransform& trans, const btVector3& dir, const btAlignedObjectArray& vertices, btScalar& min, btScalar& max) const + { + min = FLT_MAX; + max = -FLT_MAX; + int numVerts = m_numVertices; + + const btVector3 localDir = trans.getBasis().transpose()*dir; + const btVector3 localDi2 = quatRotate(trans.getRotation().inverse(),dir); + + btScalar offset = trans.getOrigin().dot(dir); + + for(int i=0;i max) max = dp; + } + if(min>max) + { + btScalar tmp = min; + min = max; + max = tmp; + } + min += offset; + max += offset; + } + +}; + +#endif //CONVEX_POLYHEDRON_CL \ No newline at end of file diff --git a/opencl/gpu_sat/host/btConvexUtility.cpp b/opencl/gpu_sat/host/btConvexUtility.cpp new file mode 100644 index 000000000..a86bf3b1e --- /dev/null +++ b/opencl/gpu_sat/host/btConvexUtility.cpp @@ -0,0 +1,513 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Erwin Coumans + + +#include "btConvexUtility.h" +#include "LinearMath/btConvexHullComputer.h" +#include "LinearMath/btGrahamScan2dConvexHull.h" +#include "LinearMath/btQuaternion.h" +#include "LinearMath/btHashMap.h" + +#include "../gpu_rigidbody_pipeline2/ConvexPolyhedronCL.h" + + + +btConvexUtility::~btConvexUtility() +{ +} + +bool btConvexUtility::initializePolyhedralFeatures(const btVector3* orgVertices, int numPoints, bool mergeCoplanarTriangles) +{ + + + + btConvexHullComputer conv; + conv.compute(&orgVertices[0].getX(), sizeof(btVector3),numPoints,0.f,0.f); + + btAlignedObjectArray faceNormals; + int numFaces = conv.faces.size(); + faceNormals.resize(numFaces); + btConvexHullComputer* convexUtil = &conv; + + + btAlignedObjectArray tmpFaces; + tmpFaces.resize(numFaces); + + int numVertices = convexUtil->vertices.size(); + m_vertices.resize(numVertices); + for (int p=0;pvertices[p]; + } + + + for (int i=0;ifaces[i]; + //printf("face=%d\n",face); + const btConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face]; + const btConvexHullComputer::Edge* edge = firstEdge; + + btVector3 edges[3]; + int numEdges = 0; + //compute face normals + + do + { + + int src = edge->getSourceVertex(); + tmpFaces[i].m_indices.push_back(src); + int targ = edge->getTargetVertex(); + btVector3 wa = convexUtil->vertices[src]; + + btVector3 wb = convexUtil->vertices[targ]; + btVector3 newEdge = wb-wa; + newEdge.normalize(); + if (numEdges<2) + edges[numEdges++] = newEdge; + + edge = edge->getNextEdgeOfFace(); + } while (edge!=firstEdge); + + btScalar planeEq = 1e30f; + + + if (numEdges==2) + { + faceNormals[i] = edges[0].cross(edges[1]); + faceNormals[i].normalize(); + tmpFaces[i].m_plane[0] = faceNormals[i].getX(); + tmpFaces[i].m_plane[1] = faceNormals[i].getY(); + tmpFaces[i].m_plane[2] = faceNormals[i].getZ(); + tmpFaces[i].m_plane[3] = planeEq; + + } + else + { + btAssert(0);//degenerate? + faceNormals[i].setZero(); + } + + for (int v=0;veq) + { + planeEq=eq; + } + } + tmpFaces[i].m_plane[3] = -planeEq; + } + + //merge coplanar faces and copy them to m_polyhedron + + btScalar faceWeldThreshold= 0.999f; + btAlignedObjectArray todoFaces; + for (int i=0;i coplanarFaceGroup; + int refFace = todoFaces[todoFaces.size()-1]; + + coplanarFaceGroup.push_back(refFace); + btMyFace& faceA = tmpFaces[refFace]; + todoFaces.pop_back(); + + btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]); + for (int j=todoFaces.size()-1;j>=0;j--) + { + int i = todoFaces[j]; + btMyFace& faceB = tmpFaces[i]; + btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]); + if (faceNormalA.dot(faceNormalB)>faceWeldThreshold) + { + coplanarFaceGroup.push_back(i); + todoFaces.remove(i); + } + } + + + bool did_merge = false; + if (coplanarFaceGroup.size()>1) + { + //do the merge: use Graham Scan 2d convex hull + + btAlignedObjectArray orgpoints; + btVector3 averageFaceNormal(0,0,0); + + for (int i=0;im_faces.push_back(tmpFaces[coplanarFaceGroup[i]]); + + btMyFace& face = tmpFaces[coplanarFaceGroup[i]]; + btVector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]); + averageFaceNormal+=faceNormal; + for (int f=0;f hull; + + averageFaceNormal.normalize(); + GrahamScanConvexHull2D(orgpoints,hull,averageFaceNormal); + + for (int i=0;i1e-6 || fabsf(v.y())>1e-6 || fabsf(v.z())>1e-6) return false; + return true; +} + +struct btInternalVertexPair +{ + btInternalVertexPair(short int v0,short int v1) + :m_v0(v0), + m_v1(v1) + { + if (m_v1>m_v0) + btSwap(m_v0,m_v1); + } + short int m_v0; + short int m_v1; + int getHash() const + { + return m_v0+(m_v1<<16); + } + bool equals(const btInternalVertexPair& other) const + { + return m_v0==other.m_v0 && m_v1==other.m_v1; + } +}; + +struct btInternalEdge +{ + btInternalEdge() + :m_face0(-1), + m_face1(-1) + { + } + short int m_face0; + short int m_face1; +}; + +// + +#ifdef TEST_INTERNAL_OBJECTS +bool btConvexUtility::testContainment() const +{ + for(int p=0;p<8;p++) + { + btVector3 LocalPt; + if(p==0) LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], m_extents[2]); + else if(p==1) LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], -m_extents[2]); + else if(p==2) LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], m_extents[2]); + else if(p==3) LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], -m_extents[2]); + else if(p==4) LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], m_extents[2]); + else if(p==5) LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], -m_extents[2]); + else if(p==6) LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], m_extents[2]); + else if(p==7) LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], -m_extents[2]); + + for(int i=0;i0.0f) + return false; + } + } + return true; +} +#endif + +void btConvexUtility::initialize() +{ + + btHashMap edges; + + btScalar TotalArea = 0.0f; + + m_localCenter.setValue(0, 0, 0); + for(int i=0;im_face0>=0); + btAssert(edptr->m_face1<0); + edptr->m_face1 = i; + } else + { + btInternalEdge ed; + ed.m_face0 = i; + edges.insert(vp,ed); + } + } + } + +#ifdef USE_CONNECTED_FACES + for(int i=0;im_face0>=0); + btAssert(edptr->m_face1>=0); + + int connectedFace = (edptr->m_face0==i)?edptr->m_face1:edptr->m_face0; + m_faces[i].m_connectedFaces[j] = connectedFace; + } + } +#endif//USE_CONNECTED_FACES + + for(int i=0;iMaxX) MaxX = pt.x(); + if(pt.y()MaxY) MaxY = pt.y(); + if(pt.z()MaxZ) MaxZ = pt.z(); + } + mC.setValue(MaxX+MinX, MaxY+MinY, MaxZ+MinZ); + mE.setValue(MaxX-MinX, MaxY-MinY, MaxZ-MinZ); + + + +// const btScalar r = m_radius / sqrtf(2.0f); + const btScalar r = m_radius / sqrtf(3.0f); + const int LargestExtent = mE.maxAxis(); + const btScalar Step = (mE[LargestExtent]*0.5f - r)/1024.0f; + m_extents[0] = m_extents[1] = m_extents[2] = r; + m_extents[LargestExtent] = mE[LargestExtent]*0.5f; + bool FoundBox = false; + for(int j=0;j<1024;j++) + { + if(testContainment()) + { + FoundBox = true; + break; + } + + m_extents[LargestExtent] -= Step; + } + if(!FoundBox) + { + m_extents[0] = m_extents[1] = m_extents[2] = r; + } + else + { + // Refine the box + const btScalar Step = (m_radius - r)/1024.0f; + const int e0 = (1< m_indices; + btScalar m_plane[4]; +}; + +ATTRIBUTE_ALIGNED16(class) btConvexUtility +{ + public: + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btVector3 m_localCenter; + btVector3 m_extents; + btVector3 mC; + btVector3 mE; + btScalar m_radius; + + btAlignedObjectArray m_vertices; + btAlignedObjectArray m_faces; + btAlignedObjectArray m_uniqueEdges; + + + btConvexUtility() + { + } + virtual ~btConvexUtility(); + + bool initializePolyhedralFeatures(const btVector3* orgVertices, int numVertices, bool mergeCoplanarTriangles=true); + + void initialize(); + bool testContainment() const; + + + +}; +#endif + \ No newline at end of file diff --git a/opencl/gpu_sat/host/btRigidBodyCL.h b/opencl/gpu_sat/host/btRigidBodyCL.h new file mode 100644 index 000000000..9bf0f633d --- /dev/null +++ b/opencl/gpu_sat/host/btRigidBodyCL.h @@ -0,0 +1,35 @@ +#ifndef BT_RIGID_BODY_CL +#define BT_RIGID_BODY_CL + +#include "parallel_primitives/host/btScalar.h" +#include "parallel_primitives/host/btMatrix3x3.h" + +ATTRIBUTE_ALIGNED16(struct) btRigidBodyCL +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + btVector3 m_pos; + btQuaternion m_quat; + btVector3 m_linVel; + btVector3 m_angVel; + + int m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; + + float getInvMass() const + { + return m_invMass; + } +}; + + +struct Inertia +{ + btMatrix3x3 m_invInertiaWorld; + btMatrix3x3 m_initInvInertia; +}; + + +#endif//BT_RIGID_BODY_CL diff --git a/opencl/gpu_sat/kernels/sat.cl b/opencl/gpu_sat/kernels/sat.cl new file mode 100644 index 000000000..3eb1f8faf --- /dev/null +++ b/opencl/gpu_sat/kernels/sat.cl @@ -0,0 +1,1262 @@ +//keep this enum in sync with the CPU version (in AdlCollisionShape.h) +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_CONCAVE_TRIMESH 5 +#define TRIANGLE_NUM_CONVEX_FACES 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 + + + +typedef unsigned int u32; + +///keep this in sync with btCollidable.h +typedef struct +{ + int m_numChildShapes; + int blaat2; + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + + +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; +} ConvexPolyhedronCL; + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} btGpuFace; + +#define make_float4 (float4) + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); + + +// float4 a1 = make_float4(a.xyz,0.f); +// float4 b1 = make_float4(b.xyz,0.f); + +// return cross(a1,b1); + +//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); + + // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); + + //return c; +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + v = make_float4(v.xyz,0.f); + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + +inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;im_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, __global const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;im_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA,const float4 ornA, + const float4 posB,const float4 ornB, + float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); + project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); + + if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) + return false; + return true; +} + + + +bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;im_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + +bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* verticesA, + __global const float4* uniqueEdgesA, + __global const btGpuFace* facesA, + __global const int* indicesA, + const float4* verticesB, + const float4* uniqueEdgesB, + const btGpuFace* facesB, + const int* indicesB, + float4* sep, + float* dmin) +{ + int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;im_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS *= -1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + +bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = qtRotate(ornA,edge0); + + for(int e1=0;e1m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = qtRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje *= -1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); + + if(Max00.0f) + { + *sep = -(*sep); + } + return true; +} + + +inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA,const float4 ornA, + const float4 posB,const float4 ornB, + float4* sep_axis, __global const float4* vertices,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0); + project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1); + + if(Max0m_numFaces; + // Test normals from hullA + for(int i=0;im_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + + curPlaneTests++; + + float d; + if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d)) + return false; + + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + + return true; +} + + + + +bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + float4* sep, + float* dmin) +{ + int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = qtRotate(ornA,edge0); + + for(int e1=0;e1m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = qtRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje*=-1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); + + if(Max00.0f) + { + *sep = -(*sep); + } + return true; +} + + +// work-in-progress +__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global const btGpuChildShape* gpuChildShapes, + __global volatile float4* gpuCompoundSepNormalsOut, + __global volatile int* gpuHasCompoundSepNormalsOut, + int numCompoundPairs + ) +{ + + int i = get_global_id(0); + if (i= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + gpuHasCompoundSepNormalsOut[i] = 0; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int hasSeparatingAxis = 5; + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + posA.w = 0.f; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = make_float4(1,0,0,0); + bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else//(!sepB) + { + bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + if (sepEE) + { + gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); + gpuHasCompoundSepNormalsOut[i] = 1; + }//sepEE + }//(!sepB) + }//(!sepA) + + + } + +} + +// work-in-progress +__kernel void findCompoundPairsKernel( __global const int2* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global const btGpuChildShape* gpuChildShapes, + __global volatile int4* gpuCompoundPairsOut, + __global volatile int* numCompoundPairsOut, + int numPairs, + int maxNumCompoundPairsCapacity + ) +{ + + int i = get_global_id(0); + + if (i vert.x) + triAabb.m_min.x = vert.x; + if (triAabb.m_min.y > vert.y) + triAabb.m_min.y = vert.y; + if (triAabb.m_min.z > vert.z) + triAabb.m_min.z = vert.z; + + if (triAabb.m_max.x < vert.x) + triAabb.m_max.x = vert.x; + if (triAabb.m_max.y < vert.y) + triAabb.m_max.y = vert.y; + if (triAabb.m_max.z < vert.z) + triAabb.m_max.z = vert.z; +#else + triAabb.m_min = min(triAabb.m_min,vert); + triAabb.m_max = max(triAabb.m_max,vert); +#endif + } + + overlap = true; + overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; + overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; + overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; + + if (overlap) + { + float dmin = FLT_MAX; + int hasSeparatingAxis=5; + float4 sepAxis=make_float4(1,2,3,4); + +#if 1 + + int localCC=0; + numActualConcaveConvexTests++; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; +//double-check next test +// if (numVertsIn < 2) +// return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + +int clipFaceAgainstHull(const float4 separatingNormal, __global const ConvexPolyhedronCL* hullA, + const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + __global const float4* vertices, + __global const btGpuFace* faces, + __global const int* indices, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + btGpuFace polyA = faces[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0m_vertexOffset+indices[polyA.m_indexOffset+e0]]; + const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;im_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + btGpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; + const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;im_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, + faces, + indices,localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + + +int clipHullAgainstHullLocalA(const float4 separatingNormal, + const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* localContactsOut, + int localContactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, + verticesA,facesA,indicesA, + verticesB,facesB,indicesB, + localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + +#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ +for(int offset=0; offset64) + nPoints = 64; + + float4 center = make_float4(0.f); + { + + for (int i=0;i a[ie].x )? a[0].x: a[ie].x; + a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; + a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; + a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; + } + + idx[0] = (int)a[0].x & 0xff; + idx[1] = (int)a[0].y & 0xff; + idx[2] = (int)a[0].z & 0xff; + idx[3] = (int)a[0].w & 0xff; + } + } + + { + float2 h[64]; + PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); + REDUCE_MIN( h, nPoints ); + max00 = h[0]; + } + } + + contactIdx[0] = idx[0]; + contactIdx[1] = idx[1]; + contactIdx[2] = idx[2]; + contactIdx[3] = idx[3]; + +// if( max00.y < 0.0f ) +// contactIdx[0] = (int)max00.x; + + //does this sort happen on GPU too? + //std::sort( contactIdx, contactIdx+4 ); + + return 4; + } +} + + + +__kernel void extractManifoldAndAddContactKernel(__global const int2* pairs, + __global const BodyData* rigidBodies, + __global const float4* closestPointsWorld, + __global const float4* separatingNormalsWorld, + __global const int* contactCounts, + __global const int* contactOffsets, + __global Contact4* restrict contactsOut, + counter32_t nContactsOut, + int numPairs, + int pairIndex + ) +{ + int idx = get_global_id(0); + + if (idxm_worldNormal = normal; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = idx; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; + for (int i=0;im_worldPos[i] = localPoints[contactIdx[i]]; + } + GET_NPOINTS(*c) = nContacts; + } + } +} + + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + +void trMul(float4 translationA, Quaternion orientationA, + float4 translationB, Quaternion orientationB, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtMul(orientationA,orientationB); + *translationOut = transform(&translationB,&translationA,&orientationA); +} + +void computeContactPlaneConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const btGpuFace* faces, + __global Contact4* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numPairs) +{ + float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; + float radius = collidables[collidableIndexB].m_radius; + float4 posA1 = rigidBodies[bodyIndexA].m_pos; + float4 ornA1 = rigidBodies[bodyIndexA].m_quat; + float4 posB1 = rigidBodies[bodyIndexB].m_pos; + float4 ornB1 = rigidBodies[bodyIndexB].m_quat; + + bool hasCollision = false; + float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); + float planeConstant = planeEq.w; + float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; + { + float4 invPosA;Quaternion invOrnA; + trInverse(posA1,ornA1,&invPosA,&invOrnA); + trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); + } + float4 planeInConvexPos1; Quaternion planeInConvexOrn1; + { + float4 invPosB;Quaternion invOrnB; + trInverse(posB1,ornB1,&invPosB,&invOrnB); + trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); + } + float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius; + float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); + float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant; + hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold(); + if (hasCollision) + { + float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1; + float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1); + float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1); + float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance; + pOnB1.w = distance; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < numPairs) + { + __global Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormal = normalOnSurfaceB1; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPos[0] = pOnB1; + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if (hasCollision) +} + + +__kernel void clipHullHullKernel( __global const int2* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global Contact4* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numPairs) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i 0.00001) + { + normalOnSurfaceB = diff / len; + } + float4 contactPosB = posB + normalOnSurfaceB*radiusB; + contactPosB.w = dist; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < numPairs) + { + __global Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormal = -normalOnSurfaceB; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_worldPos[0] = contactPosB; + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if ( len <= (radiusA+radiusB)) + + return; + }//SHAPE_SPHERE SHAPE_SPHERE + + if (hasSeparatingAxis[i]) + { + + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + + int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, + rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + //if ((dstIdx+nReducedContacts) < capacity) + { + __global Contact4* c = globalContactsOut+ dstIdx; + c->m_worldNormal = normal; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + + for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + + + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -gpuCompoundSepNormalsOut[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + //if ((dstIdx+nReducedContacts) < capacity) + { + __global Contact4* c = globalContactsOut+ dstIdx; + c->m_worldNormal = normal; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = gpuCompoundPairs[pairIndex].x; + int bodyB = gpuCompoundPairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + + for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (gpuHasCompoundSepNormalsOut[i]) + }// if (i 0.00001) + { + normalOnSurfaceB = diff / len; + } + float4 contactPosB = posB + normalOnSurfaceB*radiusB; + contactPosB.w = dist; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < numPairs) + { + __global Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormal = normalOnSurfaceB; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_worldPos[0] = contactPosB; + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if ( len <= (radiusA+radiusB)) + }//SHAPE_SPHERE SHAPE_SPHERE + }//if (i0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + //if ((dstIdx+nReducedContacts) < capacity) + { + __global Contact4* c = globalContactsOut+ dstIdx; + c->m_worldNormal = normal; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = concavePairsIn[pairIndex].x; + int bodyB = concavePairsIn[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + + for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (im_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + +int clipFaces(__global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + __global float4* worldVertsB2, + int capacityWorldVertsB2, + const float minDist, float maxDist, + __global int4* clippingFaces, + int pairIndex) +{ + int numContactsOut = 0; + + int closestFaceA = clippingFaces[pairIndex].x; + int closestFaceB = clippingFaces[pairIndex].y; + int numVertsInA = clippingFaces[pairIndex].z; + int numVertsInB = clippingFaces[pairIndex].w; + + int numVertsOut = 0; + + if (closestFaceA<0) + return numContactsOut; + + __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; + __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e0=0) + { + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e00) + { + + __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; + float4 normal = -separatingNormals[i]; + + int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + +//#if 0 + + if (dstIdx < numPairs) + { + __global Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormal = normal; + c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + + switch (nReducedContacts) + { + case 4: + c->m_worldPos[3] = pointsIn[contactIdx.w]; + case 3: + c->m_worldPos[2] = pointsIn[contactIdx.z]; + case 2: + c->m_worldPos[1] = pointsIn[contactIdx.y]; + case 1: + c->m_worldPos[0] = pointsIn[contactIdx.x]; + default: + { + } + }; + + GET_NPOINTS(*c) = nReducedContacts; + + } + + +//#endif + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i= 0, so output intersection\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start >= 0, end < 0 so output intersection and end\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" }\n" +" firstVertex = endVertex;\n" +" ds = de;\n" +" }\n" +" return numVertsOut;\n" +"}\n" +"\n" +"\n" +"\n" +"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" +"{\n" +" \n" +" int ve;\n" +" float ds, de;\n" +" int numVertsOut = 0;\n" +"//double-check next test\n" +"// if (numVertsIn < 2)\n" +"// return 0;\n" +"\n" +" float4 firstVertex=pVtxIn[numVertsIn-1];\n" +" float4 endVertex = pVtxIn[0];\n" +" \n" +" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +"\n" +" for (ve = 0; ve < numVertsIn; ve++)\n" +" {\n" +" endVertex=pVtxIn[ve];\n" +"\n" +" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +"\n" +" if (ds<0)\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start < 0, end < 0, so output endVertex\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" else\n" +" {\n" +" // Start < 0, end >= 0, so output intersection\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start >= 0, end < 0 so output intersection and end\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" }\n" +" firstVertex = endVertex;\n" +" ds = de;\n" +" }\n" +" return numVertsOut;\n" +"}\n" +"\n" +"\n" +"int clipFaceAgainstHull(const float4 separatingNormal, __global const ConvexPolyhedronCL* hullA, \n" +" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +" float4* worldVertsB2, int capacityWorldVertsB2,\n" +" const float minDist, float maxDist,\n" +" __global const float4* vertices,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" float4* contactsOut,\n" +" int contactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +"\n" +" float4* pVtxIn = worldVertsB1;\n" +" float4* pVtxOut = worldVertsB2;\n" +" \n" +" int numVertsIn = numWorldVertsB1;\n" +" int numVertsOut = 0;\n" +"\n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;facem_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" faces[hullA->m_faceOffset+face].m_plane.x, \n" +" faces[hullA->m_faceOffset+face].m_plane.y, \n" +" faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" }\n" +" }\n" +" }\n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +"\n" +" btGpuFace polyA = faces[hullA->m_faceOffset+closestFaceA];\n" +"\n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" int numVerticesA = polyA.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" +" const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +" const float4 edge0 = a - b;\n" +" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +"\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = transform(&a,&posA,&ornA);\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" \n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" \n" +" //clip face\n" +" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +"\n" +" //btSwap(pVtxIn,pVtxOut);\n" +" float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsIn = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +"\n" +" \n" +" // only keep points that are behind the witness face\n" +" {\n" +" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float localPlaneEq = polyA.m_plane.w;\n" +" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +" for (int i=0;im_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x, \n" +" facesA[hullA->m_faceOffset+face].m_plane.y, \n" +" facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" }\n" +" }\n" +" }\n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +"\n" +" btGpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" +"\n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" int numVerticesA = polyA.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" +" const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +" const float4 edge0 = a - b;\n" +" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +"\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = transform(&a,&posA,&ornA);\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" \n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" \n" +" //clip face\n" +" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +"\n" +" //btSwap(pVtxIn,pVtxOut);\n" +" float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsIn = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +"\n" +" \n" +" // only keep points that are behind the witness face\n" +" {\n" +" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float localPlaneEq = polyA.m_plane.w;\n" +" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +" for (int i=0;im_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" +" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +"\n" +" {\n" +" const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +"\n" +" if (closestFaceB>=0)\n" +" {\n" +" numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" +" posA,ornA,\n" +" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" +" faces,\n" +" indices,localContactsOut,localContactCapacity);\n" +" }\n" +"\n" +" return numContactsOut;\n" +"}\n" +"\n" +"\n" +"int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" +" const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" const float4* verticesA,\n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB,\n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* localContactsOut,\n" +" int localContactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +"\n" +"\n" +" int closestFaceB=-1;\n" +" float dmax = -FLT_MAX;\n" +"\n" +" {\n" +" for(int face=0;facem_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +"\n" +" {\n" +" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +"\n" +" if (closestFaceB>=0)\n" +" {\n" +" numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" +" posA,ornA,\n" +" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" +" verticesA,facesA,indicesA,\n" +" verticesB,facesB,indicesB,\n" +" localContactsOut,localContactCapacity);\n" +" }\n" +"\n" +" return numContactsOut;\n" +"}\n" +"\n" +"#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; }\n" +"#define REDUCE_MIN(v, n) {int i=0;" +"for(int offset=0; offset64)\n" +" nPoints = 64;\n" +" \n" +" float4 center = make_float4(0.f);\n" +" {\n" +" \n" +" for (int i=0;i a[ie].x )? a[0].x: a[ie].x;\n" +" a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" +" a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" +" a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" +" }\n" +"\n" +" idx[0] = (int)a[0].x & 0xff;\n" +" idx[1] = (int)a[0].y & 0xff;\n" +" idx[2] = (int)a[0].z & 0xff;\n" +" idx[3] = (int)a[0].w & 0xff;\n" +" }\n" +" }\n" +"\n" +" {\n" +" float2 h[64];\n" +" PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" +" REDUCE_MIN( h, nPoints );\n" +" max00 = h[0];\n" +" }\n" +" }\n" +"\n" +" contactIdx[0] = idx[0];\n" +" contactIdx[1] = idx[1];\n" +" contactIdx[2] = idx[2];\n" +" contactIdx[3] = idx[3];\n" +"\n" +"// if( max00.y < 0.0f )\n" +"// contactIdx[0] = (int)max00.x;\n" +"\n" +" //does this sort happen on GPU too?\n" +" //std::sort( contactIdx, contactIdx+4 );\n" +"\n" +" return 4;\n" +" }\n" +"}\n" +"\n" +"\n" +"\n" +"__kernel void extractManifoldAndAddContactKernel(__global const int2* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const float4* closestPointsWorld,\n" +" __global const float4* separatingNormalsWorld,\n" +" __global const int* contactCounts,\n" +" __global const int* contactOffsets,\n" +" __global Contact4* restrict contactsOut,\n" +" counter32_t nContactsOut,\n" +" int numPairs,\n" +" int pairIndex\n" +" )\n" +"{\n" +" int idx = get_global_id(0);\n" +" \n" +" if (idxm_worldNormal = normal;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = idx;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +" for (int i=0;im_worldPos[i] = localPoints[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nContacts;\n" +" }\n" +" }\n" +"}\n" +"\n" +"\n" +"void trInverse(float4 translationIn, Quaternion orientationIn,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtInvert(orientationIn);\n" +" *translationOut = qtRotate(*orientationOut, -translationIn);\n" +"}\n" +"\n" +"void trMul(float4 translationA, Quaternion orientationA,\n" +" float4 translationB, Quaternion orientationB,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtMul(orientationA,orientationB);\n" +" *translationOut = transform(&translationB,&translationA,&orientationA);\n" +"}\n" +"\n" +"void computeContactPlaneConvex(int pairIndex,\n" +" int bodyIndexA, int bodyIndexB, \n" +" int collidableIndexA, int collidableIndexB, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const btGpuFace* faces,\n" +" __global Contact4* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numPairs)\n" +"{\n" +" float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +" float radius = collidables[collidableIndexB].m_radius;\n" +" float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" +" float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" +" float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" +" float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" +" \n" +" bool hasCollision = false;\n" +" float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +" float planeConstant = planeEq.w;\n" +" float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +" {\n" +" float4 invPosA;Quaternion invOrnA;\n" +" trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" +" trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +" }\n" +" float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" +" {\n" +" float4 invPosB;Quaternion invOrnB;\n" +" trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" +" trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); \n" +" }\n" +" float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" +" float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +" float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" +" hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" +" if (hasCollision)\n" +" {\n" +" float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1;\n" +" float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" +" float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" +" float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" +" pOnB1.w = distance;\n" +"\n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < numPairs)\n" +" {\n" +" __global Contact4* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormal = normalOnSurfaceB1;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +" c->m_worldPos[0] = pOnB1;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if (hasCollision)\n" +"}\n" +"\n" +"\n" +"__kernel void clipHullHullKernel( __global const int2* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global Contact4* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numPairs)\n" +"{\n" +"\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" float4 worldVertsB1[64];\n" +" float4 worldVertsB2[64];\n" +" int capacityWorldVerts = 64; \n" +"\n" +" float4 localContactsOut[64];\n" +" int localContactCapacity=64;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +"\n" +" if (i 0.00001)\n" +" {\n" +" normalOnSurfaceB = diff / len;\n" +" }\n" +" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +" contactPosB.w = dist;\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < numPairs)\n" +" {\n" +" __global Contact4* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormal = -normalOnSurfaceB;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_worldPos[0] = contactPosB;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if ( len <= (radiusA+radiusB))\n" +"\n" +" return;\n" +" }//SHAPE_SPHERE SHAPE_SPHERE\n" +"\n" +" if (hasSeparatingAxis[i])\n" +" {\n" +"\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +"\n" +"\n" +" \n" +" int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" +" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" vertices,faces,indices,\n" +" localContactsOut,localContactCapacity);\n" +" \n" +" if (numLocalContactsOut>0)\n" +" {\n" +" float4 normal = -separatingNormals[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +"\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" //if ((dstIdx+nReducedContacts) < capacity)\n" +" {\n" +" __global Contact4* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormal = normal;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +"\n" +" for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (i= 0)\n" +" {\n" +" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" posA = newPosA;\n" +" ornA = newOrnA;\n" +" } else\n" +" {\n" +" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" }\n" +" \n" +" if (childShapeIndexB>=0)\n" +" {\n" +" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" } else\n" +" {\n" +" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" +" }\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +"\n" +" \n" +"\n" +"\n" +" \n" +" int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" +" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" vertices,faces,indices,\n" +" localContactsOut,localContactCapacity);\n" +" \n" +" if (numLocalContactsOut>0)\n" +" {\n" +" float4 normal = -gpuCompoundSepNormalsOut[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +"\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" //if ((dstIdx+nReducedContacts) < capacity)\n" +" {\n" +" __global Contact4* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormal = normal;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = gpuCompoundPairs[pairIndex].x;\n" +" int bodyB = gpuCompoundPairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +"\n" +" for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (gpuHasCompoundSepNormalsOut[i])\n" +" }// if (i 0.00001)\n" +" {\n" +" normalOnSurfaceB = diff / len;\n" +" }\n" +" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +" contactPosB.w = dist;\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < numPairs)\n" +" {\n" +" __global Contact4* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormal = normalOnSurfaceB;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_worldPos[0] = contactPosB;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if ( len <= (radiusA+radiusB))\n" +" }//SHAPE_SPHERE SHAPE_SPHERE\n" +" }//if (i0)\n" +" {\n" +" float4 normal = -separatingNormals[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +"\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" //if ((dstIdx+nReducedContacts) < capacity)\n" +" {\n" +" __global Contact4* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormal = normal;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = concavePairsIn[pairIndex].x;\n" +" int bodyB = concavePairsIn[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +"\n" +" for (int i=0;im_worldPos[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (im_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" +" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const btGpuFace polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;facem_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" faces[hullA->m_faceOffset+face].m_plane.x,\n" +" faces[hullA->m_faceOffset+face].m_plane.y,\n" +" faces[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" for(int e0=0;e0m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" +"\n" +"\n" +"\n" +"int clipFaces(__global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" __global float4* worldVertsB2, \n" +" int capacityWorldVertsB2,\n" +" const float minDist, float maxDist,\n" +" __global int4* clippingFaces,\n" +" int pairIndex)\n" +"{\n" +" int numContactsOut = 0;\n" +" \n" +" int closestFaceA = clippingFaces[pairIndex].x;\n" +" int closestFaceB = clippingFaces[pairIndex].y;\n" +" int numVertsInA = clippingFaces[pairIndex].z;\n" +" int numVertsInB = clippingFaces[pairIndex].w;\n" +" \n" +" int numVertsOut = 0;\n" +" \n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +" \n" +" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +" \n" +" \n" +" \n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" \n" +" for(int e0=0;e0=0)\n" +" {\n" +" \n" +" \n" +" \n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" \n" +" for(int e0=0;e00)\n" +" {\n" +"\n" +" __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" +" float4 normal = -separatingNormals[i];\n" +" \n" +" int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +"//#if 0\n" +" \n" +" if (dstIdx < numPairs)\n" +" {\n" +" __global Contact4* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormal = normal;\n" +" c->m_coeffs = (u32)(0.f*0xffff) | ((u32)(0.7f*0xffff)<<16);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" \n" +" switch (nReducedContacts)\n" +" {\n" +" case 4:\n" +" c->m_worldPos[3] = pointsIn[contactIdx.w];\n" +" case 3:\n" +" c->m_worldPos[2] = pointsIn[contactIdx.z];\n" +" case 2:\n" +" c->m_worldPos[1] = pointsIn[contactIdx.y];\n" +" case 1:\n" +" c->m_worldPos[0] = pointsIn[contactIdx.x];\n" +" default:\n" +" {\n" +" }\n" +" };\n" +" \n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" \n" +" }\n" +" \n" +" \n" +"//#endif\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (im_numVertices;\n" +"\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;im_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"\n" +"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +"\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;im_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"\n" +"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA,const float4 ornA,\n" +" const float4 posB,const float4 ornB,\n" +" float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +"{\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +"\n" +" if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +" return false;\n" +" return true;\n" +"}\n" +"\n" +"\n" +"\n" +"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" \n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +"\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" int i = get_global_id(0);\n" +"\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;im_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS*=-1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"\n" +"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* verticesA, \n" +" __global const float4* uniqueEdgesA, \n" +" __global const btGpuFace* facesA,\n" +" __global const int* indicesA,\n" +" const float4* verticesB,\n" +" const float4* uniqueEdgesB, \n" +" const btGpuFace* facesB,\n" +" const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" int i = get_global_id(0);\n" +"\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;im_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS *= -1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"\n" +"\n" +"\n" +"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" int i = get_global_id(0);\n" +"\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +"\n" +" int curPlaneTests=0;\n" +"\n" +" int curEdgeEdge = 0;\n" +" // Test edges\n" +" for(int e0=0;e0m_numUniqueEdges;e0++)\n" +" {\n" +" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +" float4 edge0World = qtRotate(ornA,edge0);\n" +"\n" +" for(int e1=0;e1m_numUniqueEdges;e1++)\n" +" {\n" +" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +" float4 edge1World = qtRotate(ornB,edge1);\n" +"\n" +"\n" +" float4 crossje = cross3(edge0World,edge1World);\n" +"\n" +" curEdgeEdge++;\n" +" if(!IsAlmostZero(crossje))\n" +" {\n" +" crossje = normalize3(crossje);\n" +" if (dot3F4(DeltaC2,crossje)<0)\n" +" crossje *= -1.f;\n" +"\n" +" float dist;\n" +" bool result = true;\n" +" {\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +" \n" +" if(Max00.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"\n" +"\n" +"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA,const float4 ornA,\n" +" const float4 posB,const float4 ornB,\n" +" float4* sep_axis, __global const float4* vertices,float* depth)\n" +"{\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" +"\n" +" if(Max0m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;im_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" \n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS*=-1.f;\n" +" \n" +" curPlaneTests++;\n" +" \n" +" float d;\n" +" if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" +" return false;\n" +" \n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +"\n" +"\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" \n" +" return true;\n" +"}\n" +"\n" +"\n" +"\n" +"\n" +"bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices, \n" +" __global const float4* uniqueEdges, \n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" int i = get_global_id(0);\n" +"\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +"\n" +" int curPlaneTests=0;\n" +"\n" +" int curEdgeEdge = 0;\n" +" // Test edges\n" +" for(int e0=0;e0m_numUniqueEdges;e0++)\n" +" {\n" +" const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" +" float4 edge0World = qtRotate(ornA,edge0);\n" +"\n" +" for(int e1=0;e1m_numUniqueEdges;e1++)\n" +" {\n" +" const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" +" float4 edge1World = qtRotate(ornB,edge1);\n" +"\n" +"\n" +" float4 crossje = cross3(edge0World,edge1World);\n" +"\n" +" curEdgeEdge++;\n" +" if(!IsAlmostZero(crossje))\n" +" {\n" +" crossje = normalize3(crossje);\n" +" if (dot3F4(DeltaC2,crossje)<0)\n" +" crossje*=-1.f;\n" +" \n" +" float dist;\n" +" bool result = true;\n" +" {\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +" \n" +" if(Max00.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"\n" +"\n" +"// work-in-progress\n" +"__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global volatile float4* gpuCompoundSepNormalsOut,\n" +" __global volatile int* gpuHasCompoundSepNormalsOut,\n" +" int numCompoundPairs\n" +" )\n" +"{\n" +"\n" +" int i = get_global_id(0);\n" +" if (i= 0)\n" +" {\n" +" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" posA = newPosA;\n" +" ornA = newOrnA;\n" +" } else\n" +" {\n" +" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" }\n" +" \n" +" if (childShapeIndexB>=0)\n" +" {\n" +" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" } else\n" +" {\n" +" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" +" }\n" +" \n" +" gpuHasCompoundSepNormalsOut[i] = 0;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" int hasSeparatingAxis = 5;\n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" posA.w = 0.f;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal = make_float4(1,0,0,0);\n" +" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +" hasSeparatingAxis = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +"\n" +" if (!sepB)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else//(!sepB)\n" +" {\n" +" bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +" if (sepEE)\n" +" {\n" +" gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" +" gpuHasCompoundSepNormalsOut[i] = 1;\n" +" }//sepEE\n" +" }//(!sepB)\n" +" }//(!sepA)\n" +" \n" +" \n" +" }\n" +" \n" +"}\n" +"\n" +"// work-in-progress\n" +"__kernel void findCompoundPairsKernel( __global const int2* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global volatile int4* gpuCompoundPairsOut,\n" +" __global volatile int* numCompoundPairsOut,\n" +" int numPairs,\n" +" int maxNumCompoundPairsCapacity\n" +" )\n" +"{\n" +"\n" +" int i = get_global_id(0);\n" +"\n" +" if (i vert.x)\n" +" triAabb.m_min.x = vert.x;\n" +" if (triAabb.m_min.y > vert.y)\n" +" triAabb.m_min.y = vert.y;\n" +" if (triAabb.m_min.z > vert.z)\n" +" triAabb.m_min.z = vert.z;\n" +"\n" +" if (triAabb.m_max.x < vert.x)\n" +" triAabb.m_max.x = vert.x;\n" +" if (triAabb.m_max.y < vert.y)\n" +" triAabb.m_max.y = vert.y;\n" +" if (triAabb.m_max.z < vert.z)\n" +" triAabb.m_max.z = vert.z;\n" +"#else \n" +" triAabb.m_min = min(triAabb.m_min,vert); \n" +" triAabb.m_max = max(triAabb.m_max,vert); \n" +"#endif \n" +" }\n" +"\n" +" overlap = true;\n" +" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +" \n" +" if (overlap)\n" +" {\n" +" float dmin = FLT_MAX;\n" +" int hasSeparatingAxis=5;\n" +" float4 sepAxis=make_float4(1,2,3,4);\n" +"\n" +"#if 1\n" +" \n" +" int localCC=0;\n" +" numActualConcaveConvexTests++;\n" +"\n" +" //a triangle has 3 unique edges\n" +" convexPolyhedronA.m_numUniqueEdges = 3;\n" +" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +" float4 uniqueEdgesA[3];\n" +" \n" +" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +"\n" +"\n" +" convexPolyhedronA.m_faceOffset = 0;\n" +" \n" +" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +" \n" +" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +" int indicesA[3+3+2+2+2];\n" +" int curUsedIndices=0;\n" +" int fidx=0;\n" +"\n" +" //front size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[0] = 0;\n" +" indicesA[1] = 1;\n" +" indicesA[2] = 2;\n" +" curUsedIndices+=3;\n" +" float c = face.m_plane.w;\n" +" facesA[fidx].m_plane.x = normal.x;\n" +" facesA[fidx].m_plane.y = normal.y;\n" +" facesA[fidx].m_plane.z = normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" //back size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[3]=2;\n" +" indicesA[4]=1;\n" +" indicesA[5]=0;\n" +" curUsedIndices+=3;\n" +" float c = dot(normal,verticesA[0]);\n" +" float c1 = -face.m_plane.w;\n" +" facesA[fidx].m_plane.x = -normal.x;\n" +" facesA[fidx].m_plane.y = -normal.y;\n" +" facesA[fidx].m_plane.z = -normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +"\n" +" bool addEdgePlanes = true;\n" +" if (addEdgePlanes)\n" +" {\n" +" int numVertices=3;\n" +" int prevVertex = numVertices-1;\n" +" for (int i=0;i +#include "../basic_initialize/btOpenCLUtils.h" +#include "../host/ConvexHullContact.h" + +#include "parallel_primitives/host/btVector3.h" +#include "parallel_primitives/host/btFillCL.h" +#include "parallel_primitives/host/btBoundSearchCL.h" +#include "parallel_primitives/host/btRadixSort32CL.h" +#include "parallel_primitives/host/btPrefixScanCL.h" +#include "parallel_primitives/host/CommandLineArgs.h" +#include "../host/ConvexHullContact.h" + +#include "parallel_primitives/host/btMinMax.h" +int g_nPassed = 0; +int g_nFailed = 0; +bool g_testFailed = 0; + +#define TEST_INIT g_testFailed = 0; +#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;} +#define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++; +#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) + +cl_context g_context=0; +cl_device_id g_device=0; +cl_command_queue g_queue =0; +const char* g_deviceName = 0; + +void initCL(int preferredDeviceIndex, int preferredPlatformIndex) +{ + void* glCtx=0; + void* glDC = 0; + int ciErrNum = 0; + //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers) + + cl_device_type deviceType = CL_DEVICE_TYPE_ALL; + + g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + int numDev = btOpenCLUtils::getNumDevices(g_context); + if (numDev>0) + { + btOpenCLDeviceInfo info; + g_device= btOpenCLUtils::getDevice(g_context,0); + g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum); + oclCHECKERROR(ciErrNum, CL_SUCCESS); + btOpenCLUtils::printDeviceInfo(g_device); + btOpenCLUtils::getDeviceInfo(g_device,&info); + g_deviceName = info.m_deviceName; + } +} + +void exitCL() +{ + clReleaseCommandQueue(g_queue); + clReleaseContext(g_context); +} + + +inline void gpuConvexHullContactTest() +{ + TEST_INIT; + + TEST_ASSERT(1); + + GpuSatCollision* sat = new GpuSatCollision(g_context,g_device,g_queue); + + delete sat; + + TEST_REPORT( "gpuConvexHullContactTest" ); +} + +int main(int argc, char** argv) +{ + int preferredDeviceIndex = -1; + int preferredPlatformIndex = -1; + + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("deviceId", preferredDeviceIndex); + args.GetCmdLineArgument("platformId", preferredPlatformIndex); + + initCL(preferredDeviceIndex,preferredPlatformIndex); + + gpuConvexHullContactTest(); + + printf("%d tests passed\n",g_nPassed, g_nFailed); + if (g_nFailed) + { + printf("%d tests failed\n",g_nFailed); + } + printf("End, press \n"); + + getchar(); + + exitCL(); + +} + diff --git a/opencl/gpu_sat/test/premake4.lua b/opencl/gpu_sat/test/premake4.lua new file mode 100644 index 000000000..b4cbae720 --- /dev/null +++ b/opencl/gpu_sat/test/premake4.lua @@ -0,0 +1,46 @@ +function createProject(vendor) + hasCL = findOpenCL(vendor) + + if (hasCL) then + + project ("OpenCL_sat_test_" .. vendor) + + initOpenCL(vendor) + + language "C++" + + kind "ConsoleApp" + targetdir "../../../bin" + includedirs {"..","../.."} + + + files { + "main.cpp", + "../../basic_initialize/btOpenCLInclude.h", + "../../basic_initialize/btOpenCLUtils.cpp", + "../../basic_initialize/btOpenCLUtils.h", + "../host/ConvexHullContact.cpp", + "../host/ConvexHullContact.h", + "../../parallel_primitives/host/btFillCL.cpp", + "../../parallel_primitives/host/btFillCL.h", + "../../parallel_primitives/host/btBoundSearchCL.cpp", + "../../parallel_primitives/host/btBoundSearchCL.h", + "../../parallel_primitives/host/btPrefixScanCL.cpp", + "../../parallel_primitives/host/btPrefixScanCL.h", + "../../parallel_primitives/host/btRadixSort32CL.cpp", + "../../parallel_primitives/host/btRadixSort32CL.h", + "../../parallel_primitives/host/btAlignedAllocator.cpp", + "../../parallel_primitives/host/btAlignedAllocator.h", + "../../parallel_primitives/host/btAlignedObjectArray.h", + "../../parallel_primitives/host/btQuickprof.cpp", + "../../parallel_primitives/host/btQuickprof.h", + + } + + end +end + +createProject("AMD") +createProject("Intel") +createProject("NVIDIA") +createProject("Apple") \ No newline at end of file diff --git a/opencl/parallel_primitives/host/btFillCL.h b/opencl/parallel_primitives/host/btFillCL.h index a9303a73d..9de498b3d 100644 --- a/opencl/parallel_primitives/host/btFillCL.h +++ b/opencl/parallel_primitives/host/btFillCL.h @@ -4,83 +4,9 @@ #include "btOpenCLArray.h" #include "btScalar.h" -ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4 -{ - BT_DECLARE_ALIGNED_ALLOCATOR(); +#include "btInt2.h" +#include "btInt4.h" - union - { - struct - { - unsigned int x,y,z,w; - }; - struct - { - unsigned int s[4]; - }; - }; -}; - -ATTRIBUTE_ALIGNED16(struct) btInt4 -{ - BT_DECLARE_ALIGNED_ALLOCATOR(); - - union - { - struct - { - int x,y,z,w; - }; - struct - { - int s[4]; - }; - }; -}; - -struct btUnsignedInt2 -{ - union - { - struct - { - unsigned int x,y; - }; - struct - { - unsigned int s[2]; - }; - }; -}; - -struct btInt2 -{ - union - { - struct - { - int x,y; - }; - struct - { - int s[2]; - }; - }; -}; - -SIMD_FORCE_INLINE btInt4 btMakeInt4(int x, int y, int z, int w = 0) -{ - btInt4 v; - v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; - return v; -} - -SIMD_FORCE_INLINE btUnsignedInt4 btMakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0) -{ - btUnsignedInt4 v; - v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; - return v; -} class btFillCL { diff --git a/opencl/parallel_primitives/host/btInt2.h b/opencl/parallel_primitives/host/btInt2.h new file mode 100644 index 000000000..a0a2977d8 --- /dev/null +++ b/opencl/parallel_primitives/host/btInt2.h @@ -0,0 +1,35 @@ +#ifndef BT_INT2_H +#define BT_INT2_H + +struct btUnsignedInt2 +{ + union + { + struct + { + unsigned int x,y; + }; + struct + { + unsigned int s[2]; + }; + }; +}; + +struct btInt2 +{ + union + { + struct + { + int x,y; + }; + struct + { + int s[2]; + }; + }; +}; + + +#endif \ No newline at end of file diff --git a/opencl/parallel_primitives/host/btInt4.h b/opencl/parallel_primitives/host/btInt4.h new file mode 100644 index 000000000..c4539e481 --- /dev/null +++ b/opencl/parallel_primitives/host/btInt4.h @@ -0,0 +1,55 @@ +#ifndef BT_INT4_H +#define BT_INT4_H + +#include "btScalar.h" + +ATTRIBUTE_ALIGNED16(struct) btUnsignedInt4 +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + union + { + struct + { + unsigned int x,y,z,w; + }; + struct + { + unsigned int s[4]; + }; + }; +}; + +ATTRIBUTE_ALIGNED16(struct) btInt4 +{ + BT_DECLARE_ALIGNED_ALLOCATOR(); + + union + { + struct + { + int x,y,z,w; + }; + struct + { + int s[4]; + }; + }; +}; + +SIMD_FORCE_INLINE btInt4 btMakeInt4(int x, int y, int z, int w = 0) +{ + btInt4 v; + v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; + return v; +} + +SIMD_FORCE_INLINE btUnsignedInt4 btMakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0) +{ + btUnsignedInt4 v; + v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w; + return v; +} + + +#endif //BT_INT4_H