added Samurai Monastry wavefront .obj
added alternative batching kernel (slow) tweaked controls a bit added command-line options --selected_demo=<int> and --new_batching started looking into parallel 3d sap
This commit is contained in:
@@ -129,10 +129,10 @@ struct InternalDataRenderer : public GLInstanceRendererInternalData
|
|||||||
|
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
//m_cameraDistance -= deltay*0.1;
|
m_cameraDistance -= deltay*0.1;
|
||||||
btVector3 fwd = m_cameraTargetPosition-m_cameraPosition;
|
//btVector3 fwd = m_cameraTargetPosition-m_cameraPosition;
|
||||||
fwd.normalize();
|
//fwd.normalize();
|
||||||
m_cameraTargetPosition += fwd*deltay*0.1;
|
//m_cameraTargetPosition += fwd*deltay*0.1;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class GLInstancingRenderer
|
|||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GLInstancingRenderer(int m_maxObjectCapacity, int maxShapeCapacityInBytes = 10*1024*1024);
|
GLInstancingRenderer(int m_maxObjectCapacity, int maxShapeCapacityInBytes = 56*1024*1024);
|
||||||
virtual ~GLInstancingRenderer();
|
virtual ~GLInstancingRenderer();
|
||||||
|
|
||||||
void init();
|
void init();
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernel
|
|||||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup.h" --stringname="solverSetupCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup.h" --stringname="solverSetupCL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup2.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup2.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/batchingKernels.cl" --headerfile="../opencl/gpu_rigidbody/kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/batchingKernels.cl" --headerfile="../opencl/gpu_rigidbody/kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify
|
||||||
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl" --headerfile="../opencl/gpu_rigidbody/kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify
|
||||||
|
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveContact.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveContact.h" --stringname="solveContactCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveContact.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveContact.h" --stringname="solveContactCL" stringify
|
||||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveFriction.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveFriction.h" --stringname="solveFrictionCL" stringify
|
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveFriction.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveFriction.h" --stringname="solveFrictionCL" stringify
|
||||||
|
|
||||||
|
|||||||
740
data/samurai_monastry.mtl
Normal file
740
data/samurai_monastry.mtl
Normal file
@@ -0,0 +1,740 @@
|
|||||||
|
# Blender MTL File: 'None'
|
||||||
|
# Material Count: 82
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:blinn5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:Arena:sda:sda:Material3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Arena_02:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Floor_04:Arena:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:blinn7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainEntrance:lambert2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainEntrance:MainTemple:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple3:Stairs_side_Material5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple3:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple3:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple3:blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple3:sda:Material3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:MainTemple4:sda:Material3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:SideTemple5:Material7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:blinn7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple1:pasted__blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple:MainTemple:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MainTemple:MainTemple:blinn5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Material
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MediumTemple:MediumTemple:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MediumTemple:MediumTemple:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MediumTemple:MediumTemple:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MediumTemple:MediumTemple:blinn5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl MediumTemple:SmallTemple:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple1:Out_Temple:lambert2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple1:Out_Temple:lambert3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple1:Out_Temple:lambert4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple1:Out_Temple:lambert6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple1:Out_Temple:lambert7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple:Out_Temple:lambert2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple:Out_Temple:lambert3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple:Out_Temple:lambert4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple:Out_Temple:lambert6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl Out_Temple:Out_Temple:lambert7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl PaperLanterns1:Paper_Lantern1:scene_material
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:Arena:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple1:Material2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple1:Material3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple1:Material7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple1:Material8SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple1:Material9SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple4:Material5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SideTemple4:Stairs_blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:SmallTemple1:phong2
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:side_temple_blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple03:side_temple_floor_blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple04:Arena:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple04:SideTemple1:Material2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple04:SmallTemple1:phong2
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple04:blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple04:blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:SideTemple:Material3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:SideTemple:Material5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:SideTemple:Material7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:SideTemple:Material8SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:SideTemple:Material9SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SideTemple:blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SmallTemple:SmallTemple:floor
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SmallTemple:SmallTemple:lambert2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SmallTemple:SmallTemple:polymsh47_XSIPOLYCLS_scene_material1
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl SmallTemple:SmallTemple:window
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl StoneLanterns:Lantern1:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn2SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn3SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn4SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn5SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn6SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl blinn7SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
|
|
||||||
|
newmtl pagodas:pagoda1:blinn1SG
|
||||||
|
Ns 96.078431
|
||||||
|
Ka 0.000000 0.000000 0.000000
|
||||||
|
Kd 0.640000 0.640000 0.640000
|
||||||
|
Ks 0.500000 0.500000 0.500000
|
||||||
|
Ni 1.000000
|
||||||
|
d 1.000000
|
||||||
|
illum 2
|
||||||
331426
data/samurai_monastry.obj
Normal file
331426
data/samurai_monastry.obj
Normal file
File diff suppressed because it is too large
Load Diff
@@ -64,11 +64,12 @@ btAlignedObjectArray<const char*> demoNames;
|
|||||||
int selectedDemo = 0;
|
int selectedDemo = 0;
|
||||||
GpuDemo::CreateFunc* allDemos[]=
|
GpuDemo::CreateFunc* allDemos[]=
|
||||||
{
|
{
|
||||||
ConcaveScene::MyCreateFunc,
|
GpuConvexScene::MyCreateFunc,
|
||||||
|
ConcaveScene::MyCreateFunc,
|
||||||
|
|
||||||
GpuConvexScene::MyCreateFunc,
|
GpuConvexScene::MyCreateFunc,
|
||||||
GpuCompoundScene::MyCreateFunc,
|
GpuCompoundScene::MyCreateFunc,
|
||||||
GpuConvexScene::MyCreateFunc,
|
|
||||||
|
|
||||||
GpuRigidBodyDemo::MyCreateFunc,
|
GpuRigidBodyDemo::MyCreateFunc,
|
||||||
|
|
||||||
@@ -295,7 +296,7 @@ sth_stash* initFont(GLPrimitiveRenderer* primRender)
|
|||||||
|
|
||||||
void Usage()
|
void Usage()
|
||||||
{
|
{
|
||||||
printf("\nprogram.exe [--cl_device=<int>] [--benchmark] [--disable_opencl] [--cl_platform=<int>] [--x_dim=<int>] [--y_dim=<num>] [--z_dim=<int>] [--x_gap=<float>] [--y_gap=<float>] [--z_gap=<float>] [--use_concave_mesh]\n");
|
printf("\nprogram.exe [--selected_demo=<int>] [--cl_device=<int>] [--benchmark] [--disable_opencl] [--cl_platform=<int>] [--x_dim=<int>] [--y_dim=<num>] [--z_dim=<int>] [--x_gap=<float>] [--y_gap=<float>] [--z_gap=<float>] [--use_concave_mesh] [--new_batching]\n");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -360,6 +361,7 @@ void DumpSimulationTime(FILE* f)
|
|||||||
}
|
}
|
||||||
///extern const char* g_deviceName;
|
///extern const char* g_deviceName;
|
||||||
const char* g_deviceName = "blaat";
|
const char* g_deviceName = "blaat";
|
||||||
|
extern bool useNewBatchingKernel;
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
@@ -376,6 +378,10 @@ int main(int argc, char* argv[])
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
args.GetCmdLineArgument("selected_demo",selectedDemo);
|
||||||
|
|
||||||
|
useNewBatchingKernel = args.CheckCmdLineFlag("new_batching");
|
||||||
bool benchmark=args.CheckCmdLineFlag("benchmark");
|
bool benchmark=args.CheckCmdLineFlag("benchmark");
|
||||||
dump_timings=args.CheckCmdLineFlag("dump_timings");
|
dump_timings=args.CheckCmdLineFlag("dump_timings");
|
||||||
ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
|
ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
|
||||||
|
|||||||
@@ -20,6 +20,9 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "OpenGLWindow/GLInstanceGraphicsShape.h"
|
#include "OpenGLWindow/GLInstanceGraphicsShape.h"
|
||||||
|
#define CONCAVE_GAPX 16
|
||||||
|
#define CONCAVE_GAPY 8
|
||||||
|
#define CONCAVE_GAPZ 16
|
||||||
|
|
||||||
|
|
||||||
GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(objLoader* obj)
|
GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(objLoader* obj)
|
||||||
@@ -157,8 +160,11 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
|||||||
//char* fileName = "data/plane100.obj";
|
//char* fileName = "data/plane100.obj";
|
||||||
//char* fileName = "data/teddy.obj";//"plane.obj";
|
//char* fileName = "data/teddy.obj";//"plane.obj";
|
||||||
// char* fileName = "data/sponza_closed.obj";//"plane.obj";
|
// char* fileName = "data/sponza_closed.obj";//"plane.obj";
|
||||||
char* fileName = "data/leoTest1.obj";
|
//char* fileName = "data/leoTest1.obj";
|
||||||
btVector3 shift(150,-100,-120);
|
char* fileName = "data/samurai_monastry.obj";
|
||||||
|
|
||||||
|
btVector3 shift(0,0,0);//150,-100,-120);
|
||||||
|
btVector4 scaling(10,10,10,1);//4,4,4,1);
|
||||||
FILE* f = 0;
|
FILE* f = 0;
|
||||||
|
|
||||||
char relativeFileName[1024];
|
char relativeFileName[1024];
|
||||||
@@ -189,7 +195,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
|||||||
|
|
||||||
{
|
{
|
||||||
GLInstanceGraphicsShape* shape = createGraphicsShapeFromWavefrontObj(objData);
|
GLInstanceGraphicsShape* shape = createGraphicsShapeFromWavefrontObj(objData);
|
||||||
btVector4 scaling(4,4,4,1);
|
|
||||||
|
|
||||||
btAlignedObjectArray<btVector3> verts;
|
btAlignedObjectArray<btVector3> verts;
|
||||||
for (int i=0;i<shape->m_numvertices;i++)
|
for (int i=0;i<shape->m_numvertices;i++)
|
||||||
@@ -216,7 +222,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
|||||||
int shapeId = ci.m_instancingRenderer->registerShape(&shape->m_vertices->at(0).xyzw[0], shape->m_numvertices, &shape->m_indices->at(0), shape->m_numIndices);
|
int shapeId = ci.m_instancingRenderer->registerShape(&shape->m_vertices->at(0).xyzw[0], shape->m_numvertices, &shape->m_indices->at(0), shape->m_numIndices);
|
||||||
btQuaternion orn(0,0,0,1);
|
btQuaternion orn(0,0,0,1);
|
||||||
|
|
||||||
btVector4 color(0,0,1,1.f);//0.5);//1.f
|
btVector4 color(0.3,0.3,1,1.f);//0.5);//1.f
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -246,6 +252,15 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
|||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
|
int curColor = 0;
|
||||||
|
btVector4 colors[4] =
|
||||||
|
{
|
||||||
|
btVector4(1,1,1,1),
|
||||||
|
btVector4(1,1,0.3,1),
|
||||||
|
btVector4(0.3,1,1,1),
|
||||||
|
btVector4(0.3,0.3,1,1),
|
||||||
|
};
|
||||||
|
|
||||||
btVector4 scaling(1,1,1,1);
|
btVector4 scaling(1,1,1,1);
|
||||||
int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling);
|
int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling);
|
||||||
for (int i=0;i<ci.arraySizeX;i++)
|
for (int i=0;i<ci.arraySizeX;i++)
|
||||||
@@ -257,10 +272,12 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
|||||||
float mass = 1;
|
float mass = 1;
|
||||||
|
|
||||||
//btVector3 position(-2*ci.gapX+i*ci.gapX,25+j*ci.gapY,-2*ci.gapZ+k*ci.gapZ);
|
//btVector3 position(-2*ci.gapX+i*ci.gapX,25+j*ci.gapY,-2*ci.gapZ+k*ci.gapZ);
|
||||||
btVector3 position(-(ci.arraySizeX/2)*ci.gapX+i*ci.gapX,50+j*ci.gapY,-(ci.arraySizeZ/2)*ci.gapZ+k*ci.gapZ);
|
btVector3 position(-(ci.arraySizeX/2)*CONCAVE_GAPX+i*CONCAVE_GAPX,50+j*CONCAVE_GAPY,-(ci.arraySizeZ/2)*CONCAVE_GAPZ+k*CONCAVE_GAPZ);
|
||||||
btQuaternion orn(1,0,0,0);
|
btQuaternion orn(1,0,0,0);
|
||||||
|
|
||||||
btVector4 color(0,1,0,1);
|
btVector4 color = colors[curColor];
|
||||||
|
curColor++;
|
||||||
|
curColor&=3;
|
||||||
|
|
||||||
int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling);
|
int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling);
|
||||||
int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index);
|
int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index);
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
|
|||||||
{
|
{
|
||||||
float mass = j==0? 0.f : 1.f;
|
float mass = j==0? 0.f : 1.f;
|
||||||
|
|
||||||
btVector3 position((j&1)+i*2.2,2+j*3.,(j&1)+k*2.2);
|
btVector3 position((j&1)+i*2.2,2+j*2.,(j&1)+k*2.2);
|
||||||
btQuaternion orn(1,0,0,0);
|
btQuaternion orn(1,0,0,0);
|
||||||
|
|
||||||
btVector4 color = colors[curColor];
|
btVector4 color = colors[curColor];
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ m_smallAabbsGPU(ctx,q),
|
|||||||
m_largeAabbsGPU(ctx,q),
|
m_largeAabbsGPU(ctx,q),
|
||||||
m_overlappingPairs(ctx,q),
|
m_overlappingPairs(ctx,q),
|
||||||
m_gpuSmallSortData(ctx,q),
|
m_gpuSmallSortData(ctx,q),
|
||||||
m_gpuSmallSortedAabbs(ctx,q)
|
m_gpuSmallSortedAabbs(ctx,q),
|
||||||
|
m_currentBuffer(-1)
|
||||||
{
|
{
|
||||||
const char* sapSrc = sapCL;
|
const char* sapSrc = sapCL;
|
||||||
const char* sapFastSrc = sapFastCL;
|
const char* sapFastSrc = sapFastCL;
|
||||||
@@ -86,18 +87,87 @@ static bool TestAabbAgainstAabb2(const btVector3 &aabbMin1, const btVector3 &aab
|
|||||||
return overlap;
|
return overlap;
|
||||||
}
|
}
|
||||||
|
|
||||||
void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|
||||||
|
|
||||||
|
//http://stereopsis.com/radix.html
|
||||||
|
static unsigned int FloatFlip(float fl)
|
||||||
{
|
{
|
||||||
int axis = 0;//todo on GPU for now hardcode
|
unsigned int f = *(unsigned int*)&fl;
|
||||||
|
unsigned int mask = -(int)(f >> 31) | 0x80000000;
|
||||||
|
return f ^ mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
void btGpuSapBroadphase::init3dSap()
|
||||||
|
{
|
||||||
|
if (m_currentBuffer<0)
|
||||||
|
{
|
||||||
|
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||||
|
|
||||||
|
m_currentBuffer = 0;
|
||||||
|
for (int axis=0;axis<3;axis++)
|
||||||
|
{
|
||||||
|
for (int buf=0;buf<2;buf++)
|
||||||
|
{
|
||||||
|
int totalNumAabbs = m_allAabbsCPU.size();
|
||||||
|
m_sortedAxisCPU[axis][buf].resize(totalNumAabbs);
|
||||||
|
|
||||||
|
if (buf==m_currentBuffer)
|
||||||
|
{
|
||||||
|
for (int i=0;i<totalNumAabbs;i++)
|
||||||
|
{
|
||||||
|
m_sortedAxisCPU[axis][buf][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||||
|
m_sortedAxisCPU[axis][buf][i].m_value = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||||
|
{
|
||||||
|
btAssert(m_currentBuffer>=0);
|
||||||
|
if (m_currentBuffer<0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||||
|
|
||||||
|
for (int axis=0;axis<3;axis++)
|
||||||
|
{
|
||||||
|
for (int buf=0;buf<2;buf++)
|
||||||
|
{
|
||||||
|
btAssert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
m_currentBuffer = 1-m_currentBuffer;
|
||||||
|
|
||||||
|
for (int axis=0;axis<3;axis++)
|
||||||
|
{
|
||||||
|
int totalNumAabbs = m_allAabbsCPU.size();
|
||||||
|
for (int i=0;i<totalNumAabbs;i++)
|
||||||
|
{
|
||||||
|
m_sortedAxisCPU[axis][m_currentBuffer][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||||
|
m_sortedAxisCPU[axis][m_currentBuffer][i].m_value = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void btGpuSapBroadphase::calculateOverlappingPairsHost()
|
||||||
|
{
|
||||||
|
//test
|
||||||
|
//if (m_currentBuffer>=0)
|
||||||
|
// calculateOverlappingPairsHostIncremental3Sap();
|
||||||
|
|
||||||
|
int axis=0;
|
||||||
|
|
||||||
btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
||||||
|
|
||||||
|
|
||||||
if (forceHost)
|
|
||||||
{
|
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||||
|
|
||||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
|
||||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
int numSmallAabbs = m_smallAabbsCPU.size();
|
||||||
@@ -105,7 +175,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
{
|
{
|
||||||
//sync aabb
|
//sync aabb
|
||||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
||||||
m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
|
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -116,7 +186,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
{
|
{
|
||||||
//sync aabb
|
//sync aabb
|
||||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
||||||
m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
|
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -175,8 +245,15 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
m_overlappingPairs.resize(0);
|
m_overlappingPairs.resize(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
//init3dSap();
|
||||||
}
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void btGpuSapBroadphase::calculateOverlappingPairs()
|
||||||
|
{
|
||||||
|
int axis = 0;//todo on GPU for now hardcode
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -185,8 +262,8 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
if (syncOnHost)
|
if (syncOnHost)
|
||||||
{
|
{
|
||||||
BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
||||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
|
||||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||||
|
|
||||||
m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
|
m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
|
||||||
{
|
{
|
||||||
@@ -195,7 +272,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
{
|
{
|
||||||
//sync aabb
|
//sync aabb
|
||||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
||||||
m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
|
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -226,8 +303,8 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
if (syncOnHost)
|
if (syncOnHost)
|
||||||
{
|
{
|
||||||
BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
||||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
|
||||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||||
|
|
||||||
m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
|
m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
|
||||||
{
|
{
|
||||||
@@ -236,7 +313,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
{
|
{
|
||||||
//sync aabb
|
//sync aabb
|
||||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
||||||
m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
|
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -432,6 +509,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
|||||||
|
|
||||||
}//BT_PROFILE("GPU_RADIX SORT");
|
}//BT_PROFILE("GPU_RADIX SORT");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void btGpuSapBroadphase::writeAabbsToGpu()
|
void btGpuSapBroadphase::writeAabbsToGpu()
|
||||||
|
|||||||
@@ -24,6 +24,10 @@ class btGpuSapBroadphase
|
|||||||
|
|
||||||
class btRadixSort32CL* m_sorter;
|
class btRadixSort32CL* m_sorter;
|
||||||
|
|
||||||
|
///test for 3d SAP
|
||||||
|
btAlignedObjectArray<btSortData> m_sortedAxisCPU[3][2];
|
||||||
|
int m_currentBuffer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
btOpenCLArray<btSapAabb> m_allAabbsGPU;
|
btOpenCLArray<btSapAabb> m_allAabbsGPU;
|
||||||
@@ -45,7 +49,11 @@ class btGpuSapBroadphase
|
|||||||
btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||||
virtual ~btGpuSapBroadphase();
|
virtual ~btGpuSapBroadphase();
|
||||||
|
|
||||||
void calculateOverlappingPairs(bool forceHost=false);
|
void calculateOverlappingPairs();
|
||||||
|
void calculateOverlappingPairsHost();
|
||||||
|
|
||||||
|
void init3dSap();
|
||||||
|
void calculateOverlappingPairsHostIncremental3Sap();
|
||||||
|
|
||||||
void createProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
void createProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
||||||
void createLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
void createLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ subject to the following restrictions:
|
|||||||
|
|
||||||
#include "Solver.h"
|
#include "Solver.h"
|
||||||
|
|
||||||
|
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
|
||||||
|
bool useNewBatchingKernel = false;
|
||||||
|
|
||||||
#define SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl"
|
#define SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl"
|
||||||
#define SOLVER_SETUP2_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup2.cl"
|
#define SOLVER_SETUP2_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup2.cl"
|
||||||
@@ -24,6 +26,7 @@ subject to the following restrictions:
|
|||||||
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
||||||
|
|
||||||
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
||||||
|
#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl"
|
||||||
|
|
||||||
|
|
||||||
#include "../kernels/solverSetup.h"
|
#include "../kernels/solverSetup.h"
|
||||||
@@ -33,6 +36,9 @@ subject to the following restrictions:
|
|||||||
#include "../kernels/solveFriction.h"
|
#include "../kernels/solveFriction.h"
|
||||||
|
|
||||||
#include "../kernels/batchingKernels.h"
|
#include "../kernels/batchingKernels.h"
|
||||||
|
#include "../kernels/batchingKernelsNew.h"
|
||||||
|
|
||||||
|
|
||||||
#include "BulletCommon/btQuickprof.h"
|
#include "BulletCommon/btQuickprof.h"
|
||||||
#include "../../parallel_primitives/host/btLauncherCL.h"
|
#include "../../parallel_primitives/host/btLauncherCL.h"
|
||||||
#include "BulletCommon/btVector3.h"
|
#include "BulletCommon/btVector3.h"
|
||||||
@@ -94,7 +100,7 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
|||||||
const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
|
const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
|
||||||
|
|
||||||
m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
|
m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
|
||||||
m_contactBuffer = new btOpenCLArray<btContact4>(ctx,queue);
|
m_contactBuffer2 = new btOpenCLArray<btContact4>(ctx,queue);
|
||||||
|
|
||||||
m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
||||||
m_numConstraints->resize(N_SPLIT*N_SPLIT);
|
m_numConstraints->resize(N_SPLIT*N_SPLIT);
|
||||||
@@ -108,6 +114,8 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
|||||||
|
|
||||||
cl_int pErrNum;
|
cl_int pErrNum;
|
||||||
const char* batchKernelSource = batchingKernelsCL;
|
const char* batchKernelSource = batchingKernelsCL;
|
||||||
|
const char* batchKernelNewSource = batchingKernelsNewCL;
|
||||||
|
|
||||||
const char* solverSetupSource = solverSetupCL;
|
const char* solverSetupSource = solverSetupCL;
|
||||||
const char* solverSetup2Source = solverSetup2CL;
|
const char* solverSetup2Source = solverSetup2CL;
|
||||||
const char* solveContactSource = solveContactCL;
|
const char* solveContactSource = solveContactCL;
|
||||||
@@ -159,13 +167,20 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
|||||||
m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
|
m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
|
||||||
btAssert(m_batchingKernel);
|
btAssert(m_batchingKernel);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||||
|
btAssert(batchingNewProg);
|
||||||
|
|
||||||
|
m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||||
|
//m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
|
||||||
|
btAssert(m_batchingKernelNew);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Solver::~Solver()
|
Solver::~Solver()
|
||||||
{
|
{
|
||||||
delete m_sortDataBuffer;
|
delete m_sortDataBuffer;
|
||||||
delete m_contactBuffer;
|
delete m_contactBuffer2;
|
||||||
|
|
||||||
delete m_sort32;
|
delete m_sort32;
|
||||||
delete m_scan;
|
delete m_scan;
|
||||||
@@ -173,6 +188,7 @@ Solver::~Solver()
|
|||||||
|
|
||||||
|
|
||||||
clReleaseKernel(m_batchingKernel);
|
clReleaseKernel(m_batchingKernel);
|
||||||
|
clReleaseKernel(m_batchingKernelNew);
|
||||||
|
|
||||||
clReleaseKernel( m_solveContactKernel);
|
clReleaseKernel( m_solveContactKernel);
|
||||||
clReleaseKernel( m_solveFrictionKernel);
|
clReleaseKernel( m_solveFrictionKernel);
|
||||||
@@ -843,7 +859,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
|||||||
|
|
||||||
btBufferInfoCL bInfo[] = {
|
btBufferInfoCL bInfo[] = {
|
||||||
btBufferInfoCL( contacts->getBufferCL() ),
|
btBufferInfoCL( contacts->getBufferCL() ),
|
||||||
btBufferInfoCL( m_contactBuffer->getBufferCL() ),
|
btBufferInfoCL( m_contactBuffer2->getBufferCL()),
|
||||||
btBufferInfoCL( nNative->getBufferCL() ),
|
btBufferInfoCL( nNative->getBufferCL() ),
|
||||||
btBufferInfoCL( offsetsNative->getBufferCL() ),
|
btBufferInfoCL( offsetsNative->getBufferCL() ),
|
||||||
#ifdef BATCH_DEBUG
|
#ifdef BATCH_DEBUG
|
||||||
@@ -852,10 +868,22 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
BT_PROFILE("batchingKernel");
|
BT_PROFILE("batchingKernel");
|
||||||
btLauncherCL launcher( m_queue, m_batchingKernel);
|
//btLauncherCL launcher( m_queue, m_batchingKernel);
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
|
||||||
|
|
||||||
|
btLauncherCL launcher( m_queue, k);
|
||||||
|
if (!useNewBatchingKernel )
|
||||||
|
{
|
||||||
|
launcher.setBuffer( contacts->getBufferCL() );
|
||||||
|
}
|
||||||
|
launcher.setBuffer( m_contactBuffer2->getBufferCL() );
|
||||||
|
launcher.setBuffer( nNative->getBufferCL());
|
||||||
|
launcher.setBuffer( offsetsNative->getBufferCL());
|
||||||
|
|
||||||
//launcher.setConst( cdata );
|
//launcher.setConst( cdata );
|
||||||
launcher.setConst(staticIdx);
|
launcher.setConst(staticIdx);
|
||||||
|
|
||||||
@@ -899,7 +927,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// copy buffer to buffer
|
// copy buffer to buffer
|
||||||
btAssert(m_contactBuffer->size()==nContacts);
|
//btAssert(m_contactBuffer->size()==nContacts);
|
||||||
//contacts->copyFromOpenCLArray( *m_contactBuffer);
|
//contacts->copyFromOpenCLArray( *m_contactBuffer);
|
||||||
//clFinish(m_queue);//needed?
|
//clFinish(m_queue);//needed?
|
||||||
|
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ class Solver : public SolverBase
|
|||||||
|
|
||||||
int m_nIterations;
|
int m_nIterations;
|
||||||
cl_kernel m_batchingKernel;
|
cl_kernel m_batchingKernel;
|
||||||
|
cl_kernel m_batchingKernelNew;
|
||||||
cl_kernel m_solveContactKernel;
|
cl_kernel m_solveContactKernel;
|
||||||
cl_kernel m_solveFrictionKernel;
|
cl_kernel m_solveFrictionKernel;
|
||||||
cl_kernel m_contactToConstraintKernel;
|
cl_kernel m_contactToConstraintKernel;
|
||||||
@@ -106,7 +107,7 @@ class Solver : public SolverBase
|
|||||||
class btPrefixScanCL* m_scan;
|
class btPrefixScanCL* m_scan;
|
||||||
|
|
||||||
btOpenCLArray<btSortData>* m_sortDataBuffer;
|
btOpenCLArray<btSortData>* m_sortDataBuffer;
|
||||||
btOpenCLArray<btContact4>* m_contactBuffer;
|
btOpenCLArray<btContact4>* m_contactBuffer2;
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ struct btConfig
|
|||||||
int m_maxTriConvexPairCapacity;
|
int m_maxTriConvexPairCapacity;
|
||||||
|
|
||||||
btConfig()
|
btConfig()
|
||||||
:m_maxConvexBodies(64*1024),
|
:m_maxConvexBodies(32*1024),
|
||||||
m_maxConvexShapes(8192),
|
m_maxConvexShapes(8192),
|
||||||
m_maxVerticesPerFace(64),
|
m_maxVerticesPerFace(64),
|
||||||
m_maxFacesPerShape(64),
|
m_maxFacesPerShape(64),
|
||||||
@@ -26,7 +26,7 @@ struct btConfig
|
|||||||
m_maxConvexIndices(8192),
|
m_maxConvexIndices(8192),
|
||||||
m_maxConvexUniqueEdges(8192),
|
m_maxConvexUniqueEdges(8192),
|
||||||
m_maxCompoundChildShapes(8192),
|
m_maxCompoundChildShapes(8192),
|
||||||
m_maxTriConvexPairCapacity(64*1024)
|
m_maxTriConvexPairCapacity(512*1024)
|
||||||
{
|
{
|
||||||
m_maxBroadphasePairs = 16*m_maxConvexBodies;
|
m_maxBroadphasePairs = 16*m_maxConvexBodies;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,12 +17,14 @@
|
|||||||
#define SOLVER_CONTACT_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveContact.cl"
|
#define SOLVER_CONTACT_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveContact.cl"
|
||||||
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
||||||
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
||||||
|
#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl"
|
||||||
|
|
||||||
#include "../kernels/solverSetup.h"
|
#include "../kernels/solverSetup.h"
|
||||||
#include "../kernels/solverSetup2.h"
|
#include "../kernels/solverSetup2.h"
|
||||||
#include "../kernels/solveContact.h"
|
#include "../kernels/solveContact.h"
|
||||||
#include "../kernels/solveFriction.h"
|
#include "../kernels/solveFriction.h"
|
||||||
#include "../kernels/batchingKernels.h"
|
#include "../kernels/batchingKernels.h"
|
||||||
|
#include "../kernels/batchingKernelsNew.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -48,13 +50,13 @@ struct btGpuBatchingPgsSolverInternalData
|
|||||||
int m_nIterations;
|
int m_nIterations;
|
||||||
|
|
||||||
btOpenCLArray<btGpuConstraint4>* m_contactCGPU;
|
btOpenCLArray<btGpuConstraint4>* m_contactCGPU;
|
||||||
|
|
||||||
btOpenCLArray<unsigned int>* m_numConstraints;
|
btOpenCLArray<unsigned int>* m_numConstraints;
|
||||||
btOpenCLArray<unsigned int>* m_offsets;
|
btOpenCLArray<unsigned int>* m_offsets;
|
||||||
|
|
||||||
Solver* m_solverGPU;
|
Solver* m_solverGPU;
|
||||||
|
|
||||||
cl_kernel m_batchingKernel;
|
cl_kernel m_batchingKernel;
|
||||||
|
cl_kernel m_batchingKernelNew;
|
||||||
cl_kernel m_solveContactKernel;
|
cl_kernel m_solveContactKernel;
|
||||||
cl_kernel m_solveFrictionKernel;
|
cl_kernel m_solveFrictionKernel;
|
||||||
cl_kernel m_contactToConstraintKernel;
|
cl_kernel m_contactToConstraintKernel;
|
||||||
@@ -72,6 +74,11 @@ struct btGpuBatchingPgsSolverInternalData
|
|||||||
btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
|
btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
|
||||||
btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
|
btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
|
||||||
btOpenCLArray<btContact4>* m_pBufContactOutGPU;
|
btOpenCLArray<btContact4>* m_pBufContactOutGPU;
|
||||||
|
|
||||||
|
|
||||||
|
btAlignedObjectArray<unsigned int> m_idxBuffer;
|
||||||
|
btAlignedObjectArray<btSortData> m_sortData;
|
||||||
|
btAlignedObjectArray<btContact4> m_old;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -114,6 +121,7 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
|
|
||||||
cl_int pErrNum;
|
cl_int pErrNum;
|
||||||
const char* batchKernelSource = batchingKernelsCL;
|
const char* batchKernelSource = batchingKernelsCL;
|
||||||
|
const char* batchKernelNewSource = batchingKernelsNewCL;
|
||||||
const char* solverSetupSource = solverSetupCL;
|
const char* solverSetupSource = solverSetupCL;
|
||||||
const char* solverSetup2Source = solverSetup2CL;
|
const char* solverSetup2Source = solverSetup2CL;
|
||||||
const char* solveContactSource = solveContactCL;
|
const char* solveContactSource = solveContactCL;
|
||||||
@@ -166,7 +174,14 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
|||||||
btAssert(m_data->m_batchingKernel);
|
btAssert(m_data->m_batchingKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||||
|
btAssert(batchingNewProg);
|
||||||
|
|
||||||
|
m_data->m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||||
|
btAssert(m_data->m_batchingKernelNew);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -186,6 +201,7 @@ btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
|
|||||||
|
|
||||||
|
|
||||||
clReleaseKernel(m_data->m_batchingKernel);
|
clReleaseKernel(m_data->m_batchingKernel);
|
||||||
|
clReleaseKernel(m_data->m_batchingKernelNew);
|
||||||
|
|
||||||
clReleaseKernel( m_data->m_solveContactKernel);
|
clReleaseKernel( m_data->m_solveContactKernel);
|
||||||
clReleaseKernel( m_data->m_solveFrictionKernel);
|
clReleaseKernel( m_data->m_solveFrictionKernel);
|
||||||
@@ -406,7 +422,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
||||||
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
|
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
|
||||||
|
|
||||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
|
||||||
btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
||||||
|
|
||||||
void* additionalData = 0;//m_data->m_frictionCGPU;
|
void* additionalData = 0;//m_data->m_frictionCGPU;
|
||||||
@@ -419,16 +435,17 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
{
|
{
|
||||||
|
|
||||||
if( m_data->m_solverGPU->m_contactBuffer)
|
if( m_data->m_solverGPU->m_contactBuffer2)
|
||||||
{
|
{
|
||||||
m_data->m_solverGPU->m_contactBuffer->resize(nContacts);
|
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( m_data->m_solverGPU->m_contactBuffer == 0 )
|
if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
|
||||||
{
|
{
|
||||||
m_data->m_solverGPU->m_contactBuffer = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
|
m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
|
||||||
m_data->m_solverGPU->m_contactBuffer->resize(nContacts);
|
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||||
}
|
}
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
|
||||||
@@ -438,7 +455,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
|
//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
|
||||||
|
|
||||||
|
|
||||||
btOpenCLArray<btContact4>* contactNative = contactsIn;
|
|
||||||
const btOpenCLArray<btRigidBodyCL>* bodyNative = bodyBuf;
|
const btOpenCLArray<btRigidBodyCL>* bodyNative = bodyBuf;
|
||||||
|
|
||||||
|
|
||||||
@@ -477,7 +494,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
||||||
|
|
||||||
|
|
||||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||||
launcher.setConst( cdata.m_nContacts );
|
launcher.setConst( cdata.m_nContacts );
|
||||||
@@ -536,7 +553,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
btInt4 cdata;
|
btInt4 cdata;
|
||||||
cdata.x = nContacts;
|
cdata.x = nContacts;
|
||||||
|
|
||||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer->getBufferCL())
|
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||||
, btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
, btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||||
btLauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
btLauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||||
@@ -554,19 +571,18 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
|
|
||||||
clFinish(m_data->m_queue);
|
clFinish(m_data->m_queue);
|
||||||
|
|
||||||
|
|
||||||
if (nContacts)
|
if (nContacts)
|
||||||
{
|
{
|
||||||
BT_PROFILE("gpu m_copyConstraintKernel");
|
BT_PROFILE("gpu m_copyConstraintKernel");
|
||||||
|
btInt4 cdata; cdata.x = nContacts;
|
||||||
btInt4 cdata; cdata.x = nContacts;
|
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) };
|
||||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer->getBufferCL() ), btBufferInfoCL( contactNative->getBufferCL() ) };
|
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
launcher.setConst( cdata );
|
||||||
launcher.setConst( cdata );
|
launcher.launch1D( nContacts, 64 );
|
||||||
launcher.launch1D( nContacts, 64 );
|
clFinish(m_data->m_queue);
|
||||||
clFinish(m_data->m_queue);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool compareGPU = false;
|
bool compareGPU = false;
|
||||||
@@ -575,13 +591,13 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
if (gpuBatchContacts)
|
if (gpuBatchContacts)
|
||||||
{
|
{
|
||||||
BT_PROFILE("gpu batchContacts");
|
BT_PROFILE("gpu batchContacts");
|
||||||
maxNumBatches = 50;
|
maxNumBatches = 25;//250;
|
||||||
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
BT_PROFILE("cpu batchContacts");
|
BT_PROFILE("cpu batchContacts");
|
||||||
btAlignedObjectArray<btContact4> cpuContacts;
|
btAlignedObjectArray<btContact4> cpuContacts;
|
||||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
btOpenCLArray<btContact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||||
contactsIn->copyToHost(cpuContacts);
|
contactsIn->copyToHost(cpuContacts);
|
||||||
|
|
||||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||||
@@ -611,8 +627,11 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
numNonzeroGrid++;
|
numNonzeroGrid++;
|
||||||
//printf("cpu batch\n");
|
//printf("cpu batch\n");
|
||||||
|
|
||||||
int simdWidth = 32;
|
|
||||||
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
int simdWidth =64;//-1;//32;
|
||||||
|
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||||
|
|
||||||
|
|
||||||
maxNumBatches = btMax(numBatches,maxNumBatches);
|
maxNumBatches = btMax(numBatches,maxNumBatches);
|
||||||
|
|
||||||
clFinish(m_data->m_queue);
|
clFinish(m_data->m_queue);
|
||||||
@@ -622,7 +641,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
BT_PROFILE("m_contactBuffer->copyFromHost");
|
BT_PROFILE("m_contactBuffer->copyFromHost");
|
||||||
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -636,7 +655,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
|||||||
{
|
{
|
||||||
//BT_PROFILE("gpu convertToConstraints");
|
//BT_PROFILE("gpu convertToConstraints");
|
||||||
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
||||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer /*contactNative*/,
|
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
||||||
contactConstraintOut,
|
contactConstraintOut,
|
||||||
additionalData, nContacts,
|
additionalData, nContacts,
|
||||||
(SolverBase::ConstraintCfg&) csCfg );
|
(SolverBase::ConstraintCfg&) csCfg );
|
||||||
@@ -700,6 +719,13 @@ static bool sortfnc(const btSortData& a,const btSortData& b)
|
|||||||
return (a.m_key<b.m_key);
|
return (a.m_key<b.m_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
btAlignedObjectArray<int> bodyUsed;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
btAlignedObjectArray<unsigned int> idxBuffer;
|
btAlignedObjectArray<unsigned int> idxBuffer;
|
||||||
btAlignedObjectArray<btSortData> sortData;
|
btAlignedObjectArray<btSortData> sortData;
|
||||||
btAlignedObjectArray<btContact4> old;
|
btAlignedObjectArray<btContact4> old;
|
||||||
@@ -830,3 +856,301 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
|
|||||||
#endif
|
#endif
|
||||||
return batchIdx;
|
return batchIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||||
|
{
|
||||||
|
|
||||||
|
BT_PROFILE("sortConstraintByBatch");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bodyUsed.resize(2*simdWidth);
|
||||||
|
|
||||||
|
for (int q=0;q<2*simdWidth;q++)
|
||||||
|
bodyUsed[q]=0;
|
||||||
|
|
||||||
|
int curBodyUsed = 0;
|
||||||
|
|
||||||
|
int numIter = 0;
|
||||||
|
|
||||||
|
m_data->m_sortData.resize(numConstraints);
|
||||||
|
m_data->m_idxBuffer.resize(numConstraints);
|
||||||
|
m_data->m_old.resize(numConstraints);
|
||||||
|
|
||||||
|
unsigned int* idxSrc = &m_data->m_idxBuffer[0];
|
||||||
|
|
||||||
|
#if defined(_DEBUG)
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
cs[i].getBatchIdx() = -1;
|
||||||
|
#endif
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
idxSrc[i] = i;
|
||||||
|
|
||||||
|
int numValidConstraints = 0;
|
||||||
|
int unprocessedConstraintIndex = 0;
|
||||||
|
|
||||||
|
int batchIdx = 0;
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
BT_PROFILE("cpu batch innerloop");
|
||||||
|
|
||||||
|
while( numValidConstraints < numConstraints)
|
||||||
|
{
|
||||||
|
numIter++;
|
||||||
|
int nCurrentBatch = 0;
|
||||||
|
// clear flag
|
||||||
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
|
bodyUsed[i] = 0;
|
||||||
|
curBodyUsed = 0;
|
||||||
|
|
||||||
|
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
int idx = idxSrc[i];
|
||||||
|
btAssert( idx < numConstraints );
|
||||||
|
// check if it can go
|
||||||
|
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||||
|
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||||
|
int bodyA = abs(bodyAS);
|
||||||
|
int bodyB = abs(bodyBS);
|
||||||
|
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||||
|
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||||
|
int aUnavailable = 0;
|
||||||
|
int bUnavailable = 0;
|
||||||
|
if (!aIsStatic)
|
||||||
|
{
|
||||||
|
for (int j=0;j<curBodyUsed;j++)
|
||||||
|
{
|
||||||
|
if (bodyA == bodyUsed[j])
|
||||||
|
{
|
||||||
|
aUnavailable=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!aUnavailable)
|
||||||
|
if (!bIsStatic)
|
||||||
|
{
|
||||||
|
for (int j=0;j<curBodyUsed;j++)
|
||||||
|
{
|
||||||
|
if (bodyB == bodyUsed[j])
|
||||||
|
{
|
||||||
|
bUnavailable=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||||
|
{
|
||||||
|
if (!aIsStatic)
|
||||||
|
{
|
||||||
|
bodyUsed[curBodyUsed++] = bodyA;
|
||||||
|
}
|
||||||
|
if (!bIsStatic)
|
||||||
|
{
|
||||||
|
bodyUsed[curBodyUsed++] = bodyB;
|
||||||
|
}
|
||||||
|
|
||||||
|
cs[idx].getBatchIdx() = batchIdx;
|
||||||
|
m_data->m_sortData[idx].m_key = batchIdx;
|
||||||
|
m_data->m_sortData[idx].m_value = idx;
|
||||||
|
|
||||||
|
if (i!=numValidConstraints)
|
||||||
|
{
|
||||||
|
btSwap(idxSrc[i], idxSrc[numValidConstraints]);
|
||||||
|
}
|
||||||
|
|
||||||
|
numValidConstraints++;
|
||||||
|
{
|
||||||
|
nCurrentBatch++;
|
||||||
|
if( nCurrentBatch == simdWidth )
|
||||||
|
{
|
||||||
|
nCurrentBatch = 0;
|
||||||
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
|
bodyUsed[i] = 0;
|
||||||
|
|
||||||
|
|
||||||
|
curBodyUsed = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
batchIdx ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BT_PROFILE("quickSort");
|
||||||
|
//m_data->m_sortData.quickSort(sortfnc);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
BT_PROFILE("reorder");
|
||||||
|
// reorder
|
||||||
|
|
||||||
|
memcpy( &m_data->m_old[0], cs, sizeof(btContact4)*numConstraints);
|
||||||
|
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
btAssert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
|
||||||
|
int idx = m_data->m_sortData[idxSrc[i]].m_value;
|
||||||
|
cs[i] = m_data->m_old[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_DEBUG)
|
||||||
|
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
btAssert( cs[i].getBatchIdx() != -1 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
return batchIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline int btGpuBatchingPgsSolver::sortConstraintByBatch3( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||||
|
{
|
||||||
|
|
||||||
|
BT_PROFILE("sortConstraintByBatch");
|
||||||
|
|
||||||
|
static int maxSwaps = 0;
|
||||||
|
int numSwaps = 0;
|
||||||
|
|
||||||
|
static int maxNumConstraints = 0;
|
||||||
|
if (maxNumConstraints<numConstraints)
|
||||||
|
{
|
||||||
|
maxNumConstraints = numConstraints;
|
||||||
|
printf("maxNumConstraints = %d\n",maxNumConstraints );
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyUsed.resize(2*simdWidth);
|
||||||
|
|
||||||
|
for (int q=0;q<2*simdWidth;q++)
|
||||||
|
bodyUsed[q]=0;
|
||||||
|
|
||||||
|
int curBodyUsed = 0;
|
||||||
|
|
||||||
|
int numIter = 0;
|
||||||
|
|
||||||
|
m_data->m_sortData.resize(0);
|
||||||
|
m_data->m_idxBuffer.resize(0);
|
||||||
|
m_data->m_old.resize(0);
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(_DEBUG)
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
cs[i].getBatchIdx() = -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int numValidConstraints = 0;
|
||||||
|
int unprocessedConstraintIndex = 0;
|
||||||
|
|
||||||
|
int batchIdx = 0;
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
BT_PROFILE("cpu batch innerloop");
|
||||||
|
|
||||||
|
while( numValidConstraints < numConstraints)
|
||||||
|
{
|
||||||
|
numIter++;
|
||||||
|
int nCurrentBatch = 0;
|
||||||
|
// clear flag
|
||||||
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
|
bodyUsed[i] = 0;
|
||||||
|
curBodyUsed = 0;
|
||||||
|
|
||||||
|
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
int idx = i;
|
||||||
|
btAssert( idx < numConstraints );
|
||||||
|
// check if it can go
|
||||||
|
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||||
|
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||||
|
int bodyA = abs(bodyAS);
|
||||||
|
int bodyB = abs(bodyBS);
|
||||||
|
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||||
|
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||||
|
int aUnavailable = 0;
|
||||||
|
int bUnavailable = 0;
|
||||||
|
if (!aIsStatic)
|
||||||
|
{
|
||||||
|
for (int j=0;j<curBodyUsed;j++)
|
||||||
|
{
|
||||||
|
if (bodyA == bodyUsed[j])
|
||||||
|
{
|
||||||
|
aUnavailable=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!aUnavailable)
|
||||||
|
if (!bIsStatic)
|
||||||
|
{
|
||||||
|
for (int j=0;j<curBodyUsed;j++)
|
||||||
|
{
|
||||||
|
if (bodyB == bodyUsed[j])
|
||||||
|
{
|
||||||
|
bUnavailable=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||||
|
{
|
||||||
|
if (!aIsStatic)
|
||||||
|
{
|
||||||
|
bodyUsed[curBodyUsed++] = bodyA;
|
||||||
|
}
|
||||||
|
if (!bIsStatic)
|
||||||
|
{
|
||||||
|
bodyUsed[curBodyUsed++] = bodyB;
|
||||||
|
}
|
||||||
|
|
||||||
|
cs[idx].getBatchIdx() = batchIdx;
|
||||||
|
|
||||||
|
if (i!=numValidConstraints)
|
||||||
|
{
|
||||||
|
btSwap(cs[i],cs[numValidConstraints]);
|
||||||
|
numSwaps++;
|
||||||
|
}
|
||||||
|
|
||||||
|
numValidConstraints++;
|
||||||
|
{
|
||||||
|
nCurrentBatch++;
|
||||||
|
if( nCurrentBatch == simdWidth )
|
||||||
|
{
|
||||||
|
nCurrentBatch = 0;
|
||||||
|
for(int i=0; i<curBodyUsed; i++)
|
||||||
|
bodyUsed[i] = 0;
|
||||||
|
curBodyUsed = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batchIdx ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_DEBUG)
|
||||||
|
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||||
|
for(int i=0; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
btAssert( cs[i].getBatchIdx() != -1 );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (maxSwaps<numSwaps)
|
||||||
|
{
|
||||||
|
maxSwaps = numSwaps;
|
||||||
|
printf("maxSwaps = %d\n", maxSwaps);
|
||||||
|
}
|
||||||
|
|
||||||
|
return batchIdx;
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,10 +12,18 @@ class btGpuBatchingPgsSolver
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct btGpuBatchingPgsSolverInternalData* m_data;
|
struct btGpuBatchingPgsSolverInternalData* m_data;
|
||||||
|
|
||||||
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||||
|
|
||||||
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||||
|
inline int sortConstraintByBatch2( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||||
|
inline int sortConstraintByBatch3( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||||
btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
||||||
|
|
||||||
|
|||||||
@@ -63,10 +63,15 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
|||||||
{
|
{
|
||||||
|
|
||||||
//update worldspace AABBs from local AABB/worldtransform
|
//update worldspace AABBs from local AABB/worldtransform
|
||||||
setupGpuAabbsFull();
|
{
|
||||||
|
setupGpuAabbsFull();
|
||||||
|
}
|
||||||
|
|
||||||
//compute overlapping pairs
|
//compute overlapping pairs
|
||||||
m_data->m_broadphaseSap->calculateOverlappingPairs();
|
{
|
||||||
|
//m_data->m_broadphaseSap->calculateOverlappingPairsHost();
|
||||||
|
m_data->m_broadphaseSap->calculateOverlappingPairs();
|
||||||
|
}
|
||||||
|
|
||||||
//compute contact points
|
//compute contact points
|
||||||
|
|
||||||
|
|||||||
236
opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl
Normal file
236
opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty.
|
||||||
|
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it freely,
|
||||||
|
subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
//Originally written by Erwin Coumans
|
||||||
|
|
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
|
||||||
|
|
||||||
|
#ifdef cl_ext_atomic_counters_32
|
||||||
|
#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
|
||||||
|
#else
|
||||||
|
#define counter32_t volatile __global int*
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SIMD_WIDTH 64
|
||||||
|
|
||||||
|
typedef unsigned int u32;
|
||||||
|
typedef unsigned short u16;
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
#define GET_GROUP_IDX get_group_id(0)
|
||||||
|
#define GET_LOCAL_IDX get_local_id(0)
|
||||||
|
#define GET_GLOBAL_IDX get_global_id(0)
|
||||||
|
#define GET_GROUP_SIZE get_local_size(0)
|
||||||
|
#define GET_NUM_GROUPS get_num_groups(0)
|
||||||
|
#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
|
||||||
|
#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
|
||||||
|
#define AtomInc(x) atom_inc(&(x))
|
||||||
|
#define AtomInc1(x, out) out = atom_inc(&(x))
|
||||||
|
#define AppendInc(x, out) out = atomic_inc(x)
|
||||||
|
#define AtomAdd(x, value) atom_add(&(x), value)
|
||||||
|
#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
|
||||||
|
#define AtomXhg(x, value) atom_xchg ( &(x), value )
|
||||||
|
|
||||||
|
|
||||||
|
#define SELECT_UINT4( b, a, condition ) select( b,a,condition )
|
||||||
|
|
||||||
|
#define make_float4 (float4)
|
||||||
|
#define make_float2 (float2)
|
||||||
|
#define make_uint4 (uint4)
|
||||||
|
#define make_int4 (int4)
|
||||||
|
#define make_uint2 (uint2)
|
||||||
|
#define make_int2 (int2)
|
||||||
|
|
||||||
|
|
||||||
|
#define max2 max
|
||||||
|
#define min2 min
|
||||||
|
|
||||||
|
|
||||||
|
#define WG_SIZE 64
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float4 m_worldPos[4];
|
||||||
|
float4 m_worldNormal;
|
||||||
|
u32 m_coeffs;
|
||||||
|
int m_batchIdx;
|
||||||
|
|
||||||
|
int m_bodyAPtrAndSignBit;//sign bit set for fixed objects
|
||||||
|
int m_bodyBPtrAndSignBit;
|
||||||
|
}Contact4;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int m_n;
|
||||||
|
int m_start;
|
||||||
|
int m_staticIdx;
|
||||||
|
int m_paddings[1];
|
||||||
|
} ConstBuffer;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int m_a;
|
||||||
|
int m_b;
|
||||||
|
u32 m_idx;
|
||||||
|
}Elem;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// batching on the GPU
|
||||||
|
__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )
|
||||||
|
{
|
||||||
|
int wgIdx = GET_GROUP_IDX;
|
||||||
|
int lIdx = GET_LOCAL_IDX;
|
||||||
|
|
||||||
|
const int m_n = gN[wgIdx];
|
||||||
|
const int m_start = gStart[wgIdx];
|
||||||
|
|
||||||
|
if( lIdx == 0 )
|
||||||
|
{
|
||||||
|
for (int i=0;i<m_n;i++)
|
||||||
|
{
|
||||||
|
int srcIdx = i+m_start;
|
||||||
|
int batchIndex = i;
|
||||||
|
gConstraints[ srcIdx ].m_batchIdx = batchIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define CHECK_SIZE (WG_SIZE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
u32 readBuf(__local u32* buff, int idx)
|
||||||
|
{
|
||||||
|
idx = idx % (32*CHECK_SIZE);
|
||||||
|
int bitIdx = idx%32;
|
||||||
|
int bufIdx = idx/32;
|
||||||
|
return buff[bufIdx] & (1<<bitIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeBuf(__local u32* buff, int idx)
|
||||||
|
{
|
||||||
|
idx = idx % (32*CHECK_SIZE);
|
||||||
|
int bitIdx = idx%32;
|
||||||
|
int bufIdx = idx/32;
|
||||||
|
buff[bufIdx] |= (1<<bitIdx);
|
||||||
|
//atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 tryWrite(__local u32* buff, int idx)
|
||||||
|
{
|
||||||
|
idx = idx % (32*CHECK_SIZE);
|
||||||
|
int bitIdx = idx%32;
|
||||||
|
int bufIdx = idx/32;
|
||||||
|
u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||||
|
return ((ans >> bitIdx)&1) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// batching on the GPU
|
||||||
|
__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )
|
||||||
|
{
|
||||||
|
int wgIdx = GET_GROUP_IDX;
|
||||||
|
int lIdx = GET_LOCAL_IDX;
|
||||||
|
const int numConstraints = gN[wgIdx];
|
||||||
|
const int m_start = gStart[wgIdx];
|
||||||
|
|
||||||
|
|
||||||
|
__local u32 ldsFixedBuffer[CHECK_SIZE];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if( lIdx == 0 )
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
__global Contact4* cs = &gConstraints[m_start];
|
||||||
|
|
||||||
|
|
||||||
|
int numValidConstraints = 0;
|
||||||
|
int batchIdx = 0;
|
||||||
|
|
||||||
|
while( numValidConstraints < numConstraints)
|
||||||
|
{
|
||||||
|
int nCurrentBatch = 0;
|
||||||
|
// clear flag
|
||||||
|
|
||||||
|
for(int i=0; i<CHECK_SIZE; i++)
|
||||||
|
ldsFixedBuffer[i] = 0;
|
||||||
|
|
||||||
|
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
int bodyAS = cs[i].m_bodyAPtrAndSignBit;
|
||||||
|
int bodyBS = cs[i].m_bodyBPtrAndSignBit;
|
||||||
|
int bodyA = abs(bodyAS);
|
||||||
|
int bodyB = abs(bodyBS);
|
||||||
|
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||||
|
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||||
|
int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);
|
||||||
|
int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);
|
||||||
|
|
||||||
|
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||||
|
{
|
||||||
|
if (!aIsStatic)
|
||||||
|
{
|
||||||
|
writeBuf( ldsFixedBuffer, bodyA );
|
||||||
|
}
|
||||||
|
if (!bIsStatic)
|
||||||
|
{
|
||||||
|
writeBuf( ldsFixedBuffer, bodyB );
|
||||||
|
}
|
||||||
|
|
||||||
|
cs[i].m_batchIdx = batchIdx;
|
||||||
|
|
||||||
|
if (i!=numValidConstraints)
|
||||||
|
{
|
||||||
|
//btSwap(cs[i],cs[numValidConstraints]);
|
||||||
|
|
||||||
|
Contact4 tmp = cs[i];
|
||||||
|
cs[i] = cs[numValidConstraints];
|
||||||
|
cs[numValidConstraints] = tmp;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
numValidConstraints++;
|
||||||
|
|
||||||
|
nCurrentBatch++;
|
||||||
|
if( nCurrentBatch == SIMD_WIDTH)
|
||||||
|
{
|
||||||
|
nCurrentBatch = 0;
|
||||||
|
for(int i=0; i<CHECK_SIZE; i++)
|
||||||
|
ldsFixedBuffer[i] = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}//for
|
||||||
|
batchIdx ++;
|
||||||
|
}//while
|
||||||
|
}//if( lIdx == 0 )
|
||||||
|
|
||||||
|
//return batchIdx;
|
||||||
|
}
|
||||||
240
opencl/gpu_rigidbody/kernels/batchingKernelsNew.h
Normal file
240
opencl/gpu_rigidbody/kernels/batchingKernelsNew.h
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||||
|
static const char* batchingKernelsNewCL= \
|
||||||
|
"/*\n"
|
||||||
|
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||||
|
"\n"
|
||||||
|
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||||
|
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||||
|
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||||
|
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||||
|
"subject to the following restrictions:\n"
|
||||||
|
"\n"
|
||||||
|
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||||
|
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||||
|
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||||
|
"*/\n"
|
||||||
|
"//Originally written by Erwin Coumans\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||||
|
"\n"
|
||||||
|
"#ifdef cl_ext_atomic_counters_32\n"
|
||||||
|
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||||
|
"#else\n"
|
||||||
|
"#define counter32_t volatile __global int*\n"
|
||||||
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
|
"#define SIMD_WIDTH 64\n"
|
||||||
|
"\n"
|
||||||
|
"typedef unsigned int u32;\n"
|
||||||
|
"typedef unsigned short u16;\n"
|
||||||
|
"typedef unsigned char u8;\n"
|
||||||
|
"\n"
|
||||||
|
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||||
|
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||||
|
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||||
|
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||||
|
"#define GET_NUM_GROUPS get_num_groups(0)\n"
|
||||||
|
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||||
|
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
|
||||||
|
"#define AtomInc(x) atom_inc(&(x))\n"
|
||||||
|
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
|
||||||
|
"#define AppendInc(x, out) out = atomic_inc(x)\n"
|
||||||
|
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||||
|
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||||
|
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||||
|
"\n"
|
||||||
|
"#define make_float4 (float4)\n"
|
||||||
|
"#define make_float2 (float2)\n"
|
||||||
|
"#define make_uint4 (uint4)\n"
|
||||||
|
"#define make_int4 (int4)\n"
|
||||||
|
"#define make_uint2 (uint2)\n"
|
||||||
|
"#define make_int2 (int2)\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#define max2 max\n"
|
||||||
|
"#define min2 min\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#define WG_SIZE 64\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"typedef struct \n"
|
||||||
|
"{\n"
|
||||||
|
" float4 m_worldPos[4];\n"
|
||||||
|
" float4 m_worldNormal;\n"
|
||||||
|
" u32 m_coeffs;\n"
|
||||||
|
" int m_batchIdx;\n"
|
||||||
|
"\n"
|
||||||
|
" int m_bodyAPtrAndSignBit;//sign bit set for fixed objects\n"
|
||||||
|
" int m_bodyBPtrAndSignBit;\n"
|
||||||
|
"}Contact4;\n"
|
||||||
|
"\n"
|
||||||
|
"typedef struct \n"
|
||||||
|
"{\n"
|
||||||
|
" int m_n;\n"
|
||||||
|
" int m_start;\n"
|
||||||
|
" int m_staticIdx;\n"
|
||||||
|
" int m_paddings[1];\n"
|
||||||
|
"} ConstBuffer;\n"
|
||||||
|
"\n"
|
||||||
|
"typedef struct \n"
|
||||||
|
"{\n"
|
||||||
|
" int m_a;\n"
|
||||||
|
" int m_b;\n"
|
||||||
|
" u32 m_idx;\n"
|
||||||
|
"}Elem;\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"// batching on the GPU\n"
|
||||||
|
"__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n"
|
||||||
|
"{\n"
|
||||||
|
" int wgIdx = GET_GROUP_IDX;\n"
|
||||||
|
" int lIdx = GET_LOCAL_IDX;\n"
|
||||||
|
" \n"
|
||||||
|
" const int m_n = gN[wgIdx];\n"
|
||||||
|
" const int m_start = gStart[wgIdx];\n"
|
||||||
|
" \n"
|
||||||
|
" if( lIdx == 0 )\n"
|
||||||
|
" {\n"
|
||||||
|
" for (int i=0;i<m_n;i++)\n"
|
||||||
|
" {\n"
|
||||||
|
" int srcIdx = i+m_start;\n"
|
||||||
|
" int batchIndex = i;\n"
|
||||||
|
" gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"#define CHECK_SIZE (WG_SIZE)\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"u32 readBuf(__local u32* buff, int idx)\n"
|
||||||
|
"{\n"
|
||||||
|
" idx = idx % (32*CHECK_SIZE);\n"
|
||||||
|
" int bitIdx = idx%32;\n"
|
||||||
|
" int bufIdx = idx/32;\n"
|
||||||
|
" return buff[bufIdx] & (1<<bitIdx);\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"void writeBuf(__local u32* buff, int idx)\n"
|
||||||
|
"{\n"
|
||||||
|
" idx = idx % (32*CHECK_SIZE);\n"
|
||||||
|
" int bitIdx = idx%32;\n"
|
||||||
|
" int bufIdx = idx/32;\n"
|
||||||
|
" buff[bufIdx] |= (1<<bitIdx);\n"
|
||||||
|
" //atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"u32 tryWrite(__local u32* buff, int idx)\n"
|
||||||
|
"{\n"
|
||||||
|
" idx = idx % (32*CHECK_SIZE);\n"
|
||||||
|
" int bitIdx = idx%32;\n"
|
||||||
|
" int bufIdx = idx/32;\n"
|
||||||
|
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||||
|
" return ((ans >> bitIdx)&1) == 0;\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
"\n"
|
||||||
|
"// batching on the GPU\n"
|
||||||
|
"__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n"
|
||||||
|
"{\n"
|
||||||
|
" int wgIdx = GET_GROUP_IDX;\n"
|
||||||
|
" int lIdx = GET_LOCAL_IDX;\n"
|
||||||
|
" const int numConstraints = gN[wgIdx];\n"
|
||||||
|
" const int m_start = gStart[wgIdx];\n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" if( lIdx == 0 )\n"
|
||||||
|
" {\n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" __global Contact4* cs = &gConstraints[m_start]; \n"
|
||||||
|
" \n"
|
||||||
|
" \n"
|
||||||
|
" int numValidConstraints = 0;\n"
|
||||||
|
" int batchIdx = 0;\n"
|
||||||
|
"\n"
|
||||||
|
" while( numValidConstraints < numConstraints)\n"
|
||||||
|
" {\n"
|
||||||
|
" int nCurrentBatch = 0;\n"
|
||||||
|
" // clear flag\n"
|
||||||
|
" \n"
|
||||||
|
" for(int i=0; i<CHECK_SIZE; i++) \n"
|
||||||
|
" ldsFixedBuffer[i] = 0; \n"
|
||||||
|
"\n"
|
||||||
|
" for(int i=numValidConstraints; i<numConstraints; i++)\n"
|
||||||
|
" {\n"
|
||||||
|
"\n"
|
||||||
|
" int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n"
|
||||||
|
" int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n"
|
||||||
|
" int bodyA = abs(bodyAS);\n"
|
||||||
|
" int bodyB = abs(bodyBS);\n"
|
||||||
|
" bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n"
|
||||||
|
" bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n"
|
||||||
|
" int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n"
|
||||||
|
" int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n"
|
||||||
|
" \n"
|
||||||
|
" if( aUnavailable==0 && bUnavailable==0 ) // ok\n"
|
||||||
|
" {\n"
|
||||||
|
" if (!aIsStatic)\n"
|
||||||
|
" {\n"
|
||||||
|
" writeBuf( ldsFixedBuffer, bodyA );\n"
|
||||||
|
" }\n"
|
||||||
|
" if (!bIsStatic)\n"
|
||||||
|
" {\n"
|
||||||
|
" writeBuf( ldsFixedBuffer, bodyB );\n"
|
||||||
|
" }\n"
|
||||||
|
"\n"
|
||||||
|
" cs[i].m_batchIdx = batchIdx;\n"
|
||||||
|
"\n"
|
||||||
|
" if (i!=numValidConstraints)\n"
|
||||||
|
" {\n"
|
||||||
|
" //btSwap(cs[i],cs[numValidConstraints]);\n"
|
||||||
|
" \n"
|
||||||
|
" Contact4 tmp = cs[i];\n"
|
||||||
|
" cs[i] = cs[numValidConstraints];\n"
|
||||||
|
" cs[numValidConstraints] = tmp;\n"
|
||||||
|
" \n"
|
||||||
|
" }\n"
|
||||||
|
"\n"
|
||||||
|
" numValidConstraints++;\n"
|
||||||
|
" \n"
|
||||||
|
" nCurrentBatch++;\n"
|
||||||
|
" if( nCurrentBatch == SIMD_WIDTH)\n"
|
||||||
|
" {\n"
|
||||||
|
" nCurrentBatch = 0;\n"
|
||||||
|
" for(int i=0; i<CHECK_SIZE; i++) \n"
|
||||||
|
" ldsFixedBuffer[i] = 0;\n"
|
||||||
|
" \n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
" }//for\n"
|
||||||
|
" batchIdx ++;\n"
|
||||||
|
" }//while\n"
|
||||||
|
" }//if( lIdx == 0 )\n"
|
||||||
|
" \n"
|
||||||
|
" //return batchIdx;\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
;
|
||||||
Reference in New Issue
Block a user