diff --git a/demo/gpudemo/main_opengl3core.cpp b/demo/gpudemo/main_opengl3core.cpp index 1f69a0e0f..1c51f8357 100644 --- a/demo/gpudemo/main_opengl3core.cpp +++ b/demo/gpudemo/main_opengl3core.cpp @@ -359,6 +359,8 @@ const char* g_deviceName = "blaat"; int main(int argc, char* argv[]) { + + printf("main start"); CommandLineArgs args(argc,argv); diff --git a/opencl/basic_initialize/btOpenCLUtils.cpp b/opencl/basic_initialize/btOpenCLUtils.cpp index af29461e7..539de319b 100644 --- a/opencl/basic_initialize/btOpenCLUtils.cpp +++ b/opencl/basic_initialize/btOpenCLUtils.cpp @@ -16,6 +16,10 @@ subject to the following restrictions: //original author: Roman Ponomarev //cleanup by Erwin Coumans +bool gDebugForceLoadingFromSource = false; +bool gDebugSkipLoadingBinary = false; + + #include #ifdef _WIN32 @@ -57,8 +61,6 @@ static const char* spPlatformVendor = #endif //_WIN32 #endif -bool gDebugForceLoadingFromSource = false; -bool gDebugSkipLoadingBinary = false; void MyFatalBreakAPPLE( const char * errstr , const void * private_info , @@ -519,7 +521,7 @@ static const char* strip2(const char* name, const char* pattern) return oriptr; } -cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg , const char* clFileNameForCaching) +cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg , const char* clFileNameForCaching, bool disableBinaryCaching) { const char* additionalMacros = additionalMacrosArg?additionalMacrosArg:""; @@ -530,7 +532,7 @@ cl_program btOpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_dev char binaryFileName[BT_MAX_STRING_LENGTH]; char* bla=0; - if (clFileNameForCaching && !(gDebugSkipLoadingBinary||gDebugForceLoadingFromSource) ) + if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary||gDebugForceLoadingFromSource) ) { char deviceName[256]; @@ -874,7 +876,7 @@ cl_kernel btOpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_devic if (!m_cpProgram) { - m_cpProgram = btOpenCLUtils_compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros,0); + m_cpProgram = btOpenCLUtils_compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros,0, false); } diff --git a/opencl/basic_initialize/btOpenCLUtils.h b/opencl/basic_initialize/btOpenCLUtils.h index a1c7fbd7c..29a732e41 100644 --- a/opencl/basic_initialize/btOpenCLUtils.h +++ b/opencl/basic_initialize/btOpenCLUtils.h @@ -41,7 +41,7 @@ void btOpenCLUtils_printDeviceInfo(cl_device_id device); cl_kernel btOpenCLUtils_compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog,const char* additionalMacros); //optional -cl_program btOpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros , const char* srcFileNameForCaching); +cl_program btOpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros , const char* srcFileNameForCaching, bool disableBinaryCaching); //the following optional APIs provide access using specific platform information int btOpenCLUtils_getNumPlatforms(cl_int* pErrNum); @@ -141,9 +141,9 @@ struct btOpenCLUtils } //optional - static inline cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0) + static inline cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0, bool disableBinaryCaching=false) { - return btOpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching); + return btOpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching, disableBinaryCaching); } //the following optional APIs provide access using specific platform information diff --git a/opencl/gpu_sat/host/ConvexHullContact.cpp b/opencl/gpu_sat/host/ConvexHullContact.cpp index 2f0c2806f..bcde25850 100644 --- a/opencl/gpu_sat/host/ConvexHullContact.cpp +++ b/opencl/gpu_sat/host/ConvexHullContact.cpp @@ -111,10 +111,11 @@ m_totalContactsOut(m_context, m_queue) if (1) { const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_sat/kernels/bvhTraversal.cl"); + //cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_sat/kernels/bvhTraversal.cl"); + cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,0,&errNum,"","opencl/gpu_sat/kernels/bvhTraversal.cl", true); btAssert(errNum==CL_SUCCESS); - m_bvhTraversalKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg); + m_bvhTraversalKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"-g"); btAssert(errNum==CL_SUCCESS); } @@ -329,6 +330,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray subTreesGPU(this->m_context,this->m_queue,numSubTrees); subTreesGPU.copyFromHost(bvhData[0]->getSubtreeInfoArray()); + + btVector3 bvhAabbMin = bvhData[0]->m_bvhAabbMin; btVector3 bvhAabbMax = bvhData[0]->m_bvhAabbMax; btVector3 bvhQuantization = bvhData[0]->m_bvhQuantization; @@ -357,7 +360,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray pairsOutCPU; triangleConvexPairsOut.copyToHost(pairsOutCPU); - + clFinish(m_queue); printf("np=%d\n", np); diff --git a/opencl/gpu_sat/kernels/bvhTraversal.cl b/opencl/gpu_sat/kernels/bvhTraversal.cl index 8411ac01e..00130a1b5 100644 --- a/opencl/gpu_sat/kernels/bvhTraversal.cl +++ b/opencl/gpu_sat/kernels/bvhTraversal.cl @@ -47,7 +47,7 @@ typedef struct } */ -int getTriangleIndex(__global const btQuantizedBvhNode* rootNode) +int getTriangleIndex(const btQuantizedBvhNode* rootNode) { unsigned int x=0; unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); @@ -55,13 +55,13 @@ int getTriangleIndex(__global const btQuantizedBvhNode* rootNode) return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); } -bool isLeaf(__global const btQuantizedBvhNode* rootNode) +int isLeaf(const btQuantizedBvhNode* rootNode) { //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0); + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; } -int getEscapeIndex(__global const btQuantizedBvhNode* rootNode) +int getEscapeIndex(const btQuantizedBvhNode* rootNode) { return -rootNode->m_escapeIndexOrTriangleIndex; } @@ -129,13 +129,30 @@ typedef struct } btAabbCL; -bool testQuantizedAabbAgainstQuantizedAabb(__private const unsigned short int* aabbMin1,__private const unsigned short int* aabbMax1,__global const unsigned short int* aabbMin2,__global const unsigned short int* aabbMax2) +int testQuantizedAabbAgainstQuantizedAabb( + const unsigned short int* aabbMin1, + const unsigned short int* aabbMax1, + const unsigned short int* aabbMin2, + const unsigned short int* aabbMax2) { - bool overlap = true; - overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap; - overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap; - overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap; - return overlap; + //int overlap = 1; + if (aabbMin1[0] > aabbMax2[0]) + return 0; + if (aabbMax1[0] < aabbMin2[0]) + return 0; + if (aabbMin1[1] > aabbMax2[1]) + return 0; + if (aabbMax1[1] < aabbMin2[1]) + return 0; + if (aabbMin1[2] > aabbMax2[2]) + return 0; + if (aabbMax1[2] < aabbMin2[2]) + return 0; + return 1; + //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; + //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; + //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; + //return overlap; } @@ -176,87 +193,77 @@ __kernel void bvhTraversalKernel( __global const int2* pairs, int numPairs, int maxNumConcavePairsCapacity) { - - int i = get_global_id(0); + int id = get_global_id(0); + if (id>=numPairs) + return; - if (im_quantizedAabbMin,subtree->m_quantizedAabbMax); - if (overlap != 0) + btQuantizedBvhNode rootNode = quantizedNodes[curIndex]; + aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax); + isLeafNode = isLeaf(&rootNode); + if (aabbOverlap) { - int startNodeIndex = subtree->m_rootNodeIndex; - int endNodeIndex = subtree->m_rootNodeIndex+subtree->m_subtreeSize; - - int curIndex = startNodeIndex; - int subTreeSize = endNodeIndex - startNodeIndex; - __global const btQuantizedBvhNode* rootNode = &quantizedNodes[startNodeIndex]; - int escapeIndex; - bool isLeafNode; - unsigned aabbOverlap; - while (curIndex < endNodeIndex) + if (isLeafNode) { - aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); - isLeafNode = isLeaf(rootNode); - if (isLeafNode && aabbOverlap) + int triangleIndex = getTriangleIndex(&rootNode); + + int pairIdx = atomic_inc(numConcavePairsOut); + if (pairIdxprocessNode(rootNode->getPartId(),rootNode->getTriangleIndex()); - int triangleIndex = getTriangleIndex(rootNode); - int pairIdx = atomic_inc(numConcavePairsOut); - if (pairIdxm_escapeIndexOrTriangleIndex&~(y));\n" "}\n" "\n" -"bool isLeaf(__global const btQuantizedBvhNode* rootNode)\n" +"int isLeaf(const btQuantizedBvhNode* rootNode)\n" "{\n" " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0);\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" "}\n" " \n" -"int getEscapeIndex(__global const btQuantizedBvhNode* rootNode)\n" +"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" "{\n" " return -rootNode->m_escapeIndexOrTriangleIndex;\n" "}\n" @@ -131,13 +131,30 @@ static const char* bvhTraversalKernelCL= \ "} btAabbCL;\n" "\n" "\n" -"bool testQuantizedAabbAgainstQuantizedAabb(__private const unsigned short int* aabbMin1,__private const unsigned short int* aabbMax1,__global const unsigned short int* aabbMin2,__global const unsigned short int* aabbMax2)\n" +"int testQuantizedAabbAgainstQuantizedAabb(\n" +" const unsigned short int* aabbMin1,\n" +" const unsigned short int* aabbMax1,\n" +" const unsigned short int* aabbMin2,\n" +" const unsigned short int* aabbMax2)\n" "{\n" -" bool overlap = true;\n" -" overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;\n" -" overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;\n" -" overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;\n" -" return overlap;\n" +" //int overlap = 1;\n" +" if (aabbMin1[0] > aabbMax2[0])\n" +" return 0;\n" +" if (aabbMax1[0] < aabbMin2[0])\n" +" return 0;\n" +" if (aabbMin1[1] > aabbMax2[1])\n" +" return 0;\n" +" if (aabbMax1[1] < aabbMin2[1])\n" +" return 0;\n" +" if (aabbMin1[2] > aabbMax2[2])\n" +" return 0;\n" +" if (aabbMax1[2] < aabbMin2[2])\n" +" return 0;\n" +" return 1;\n" +" //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" +" //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" +" //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" +" //return overlap;\n" "}\n" "\n" "\n" @@ -178,88 +195,78 @@ static const char* bvhTraversalKernelCL= \ " int numPairs,\n" " int maxNumConcavePairsCapacity)\n" "{\n" -"\n" -" int i = get_global_id(0);\n" +" int id = get_global_id(0);\n" +" if (id>=numPairs)\n" +" return;\n" " \n" -" if (im_quantizedAabbMin,subtree->m_quantizedAabbMax);\n" -" if (overlap != 0)\n" +" btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" +" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" +" isLeafNode = isLeaf(&rootNode);\n" +" if (aabbOverlap)\n" " {\n" -" int startNodeIndex = subtree->m_rootNodeIndex;\n" -" int endNodeIndex = subtree->m_rootNodeIndex+subtree->m_subtreeSize;\n" -"\n" -" int curIndex = startNodeIndex;\n" -" int subTreeSize = endNodeIndex - startNodeIndex;\n" -" __global const btQuantizedBvhNode* rootNode = &quantizedNodes[startNodeIndex];\n" -" int escapeIndex;\n" -" bool isLeafNode;\n" -" unsigned aabbOverlap;\n" -" while (curIndex < endNodeIndex)\n" +" if (isLeafNode)\n" " {\n" -" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);\n" -" isLeafNode = isLeaf(rootNode);\n" -" if (isLeafNode && aabbOverlap)\n" +" int triangleIndex = getTriangleIndex(&rootNode);\n" +" \n" +" int pairIdx = atomic_inc(numConcavePairsOut);\n" +" if (pairIdxprocessNode(rootNode->getPartId(),rootNode->getTriangleIndex());\n" -" int triangleIndex = getTriangleIndex(rootNode);\n" -" int pairIdx = atomic_inc(numConcavePairsOut);\n" -" if (pairIdx