diff --git a/build3/premake4.lua b/build3/premake4.lua index 75c88712d..4e6218f28 100644 --- a/build3/premake4.lua +++ b/build3/premake4.lua @@ -117,7 +117,7 @@ if not _OPTIONS["ios"] then include "../test/OpenCL/BasicInitialize" include "../test/OpenCL/KernelLaunch"-- - include "../test/OpenCL/BroadphaseCollision" +-- include "../test/OpenCL/BroadphaseCollision" -- include "../test/OpenCL/NarrowphaseCollision" include "../test/OpenCL/ParallelPrimitives" include "../test/OpenCL/RadixSortBenchmark" diff --git a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h index 0daf823bb..58192d5c3 100644 --- a/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h +++ b/src/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h @@ -21,7 +21,7 @@ struct b3Contact4Data int m_unused1; int m_unused2; - b3Float4 m_localPosA; +// b3Float4 m_localPosA; }; inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact) diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index 089e02832..88c35d189 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -31,7 +31,7 @@ static const char* primitiveContactsKernelsCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index 547b0e374..b6d09cbfd 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -63,7 +63,7 @@ static const char* satClipKernelsCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 66514fb9e..03c1c2af8 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -886,7 +886,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (b3GpuBatchContacts) { B3_PROFILE("gpu batchContacts"); - maxNumBatches = 250;//250; + maxNumBatches = 150;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); } else { diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 84537fa19..be17ecf22 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -17,7 +17,7 @@ subject to the following restrictions: #include "b3Solver.h" ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments -bool useNewBatchingKernel = true; +bool useNewBatchingKernel = false; #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index b26580bd4..aede516f1 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -43,7 +43,7 @@ static const char* batchingKernelsCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl index 90d580336..8a52c6817 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl @@ -201,50 +201,10 @@ __kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __ if (i!=numValidConstraints) { -// tmp = cs[i]; -// cs[i] = cs[numValidConstraints]; -// cs[numValidConstraints] = tmp; + tmp = cs[i]; + cs[i] = cs[numValidConstraints]; + cs[numValidConstraints] = tmp; -#ifdef CHECK_SIZE - tmp.m_worldPos[0] = cs[i].m_worldPos[0]; - tmp.m_worldPos[1] = cs[i].m_worldPos[1]; - tmp.m_worldPos[2] = cs[i].m_worldPos[2]; - tmp.m_worldPos[3] = cs[i].m_worldPos[3]; - tmp.m_worldNormal = cs[i].m_worldNormal; - tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp; - tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp; - tmp.m_batchIdx = cs[i].m_batchIdx; - tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit; - tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit; - tmp.m_childIndexA = cs[i].m_childIndexA; - tmp.m_childIndexB = cs[i].m_childIndexB; - - cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0]; - cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1]; - cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2]; - cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3]; - cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal; - cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp; - cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp; - cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx; - cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit; - cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit; - cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA; - cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB; - - cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0]; - cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1]; - cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2]; - cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3]; - cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal; - cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp; - cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp; - cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx; - cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit; - cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit; - cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA; - cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB; -#endif } diff --git a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 10f2c5dcc..ae4ce0987 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -43,7 +43,7 @@ static const char* batchingKernelsNewCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" @@ -202,47 +202,9 @@ static const char* batchingKernelsNewCL= \ " cs[i].m_batchIdx = batchIdx;\n" " if (i!=numValidConstraints)\n" " {\n" -"// tmp = cs[i];\n" -"// cs[i] = cs[numValidConstraints];\n" -"// cs[numValidConstraints] = tmp;\n" -"#ifdef CHECK_SIZE\n" -" tmp.m_worldPos[0] = cs[i].m_worldPos[0];\n" -" tmp.m_worldPos[1] = cs[i].m_worldPos[1];\n" -" tmp.m_worldPos[2] = cs[i].m_worldPos[2];\n" -" tmp.m_worldPos[3] = cs[i].m_worldPos[3];\n" -" tmp.m_worldNormal = cs[i].m_worldNormal;\n" -" tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp;\n" -" tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp;\n" -" tmp.m_batchIdx = cs[i].m_batchIdx;\n" -" tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit;\n" -" tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit;\n" -" tmp.m_childIndexA = cs[i].m_childIndexA;\n" -" tmp.m_childIndexB = cs[i].m_childIndexB;\n" -" cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0];\n" -" cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1];\n" -" cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2];\n" -" cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3];\n" -" cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal;\n" -" cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp;\n" -" cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp;\n" -" cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx;\n" -" cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit;\n" -" cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit;\n" -" cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA;\n" -" cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB;\n" -" cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0];\n" -" cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1];\n" -" cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2];\n" -" cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3];\n" -" cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal;\n" -" cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp;\n" -" cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp;\n" -" cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx;\n" -" cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit;\n" -" cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit;\n" -" cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA;\n" -" cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB;\n" -"#endif\n" +" tmp = cs[i];\n" +" cs[i] = cs[numValidConstraints];\n" +" cs[numValidConstraints] = tmp;\n" " }\n" " numValidConstraints++;\n" " \n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index 3cba942d1..717317008 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -43,7 +43,7 @@ static const char* solverSetupCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index 8dbadde70..a7a35ca93 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -43,7 +43,7 @@ static const char* solverSetup2CL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n" diff --git a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index 4c8a43843..d976ce8e6 100644 --- a/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -43,7 +43,7 @@ static const char* solverUtilsCL= \ " int m_childIndexB;\n" " int m_unused1;\n" " int m_unused2;\n" -" b3Float4 m_localPosA;\n" +"// b3Float4 m_localPosA;\n" "};\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "{\n"