added Samurai Monastry wavefront .obj
added alternative batching kernel (slow) tweaked controls a bit added command-line options --selected_demo=<int> and --new_batching started looking into parallel 3d sap
This commit is contained in:
@@ -129,10 +129,10 @@ struct InternalDataRenderer : public GLInstanceRendererInternalData
|
||||
|
||||
} else
|
||||
{
|
||||
//m_cameraDistance -= deltay*0.1;
|
||||
btVector3 fwd = m_cameraTargetPosition-m_cameraPosition;
|
||||
fwd.normalize();
|
||||
m_cameraTargetPosition += fwd*deltay*0.1;
|
||||
m_cameraDistance -= deltay*0.1;
|
||||
//btVector3 fwd = m_cameraTargetPosition-m_cameraPosition;
|
||||
//fwd.normalize();
|
||||
//m_cameraTargetPosition += fwd*deltay*0.1;
|
||||
}
|
||||
} else
|
||||
{
|
||||
|
||||
@@ -44,7 +44,7 @@ class GLInstancingRenderer
|
||||
|
||||
|
||||
public:
|
||||
GLInstancingRenderer(int m_maxObjectCapacity, int maxShapeCapacityInBytes = 10*1024*1024);
|
||||
GLInstancingRenderer(int m_maxObjectCapacity, int maxShapeCapacityInBytes = 56*1024*1024);
|
||||
virtual ~GLInstancingRenderer();
|
||||
|
||||
void init();
|
||||
|
||||
@@ -21,6 +21,8 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernel
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup.h" --stringname="solverSetupCL" stringify
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solverSetup2.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solverSetup2.h" --stringname="solverSetup2CL" stringify
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/batchingKernels.cl" --headerfile="../opencl/gpu_rigidbody/kernels/batchingKernels.h" --stringname="batchingKernelsCL" stringify
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl" --headerfile="../opencl/gpu_rigidbody/kernels/batchingKernelsNew.h" --stringname="batchingKernelsNewCL" stringify
|
||||
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveContact.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveContact.h" --stringname="solveContactCL" stringify
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/solveFriction.cl" --headerfile="../opencl/gpu_rigidbody/kernels/solveFriction.h" --stringname="solveFrictionCL" stringify
|
||||
|
||||
|
||||
740
data/samurai_monastry.mtl
Normal file
740
data/samurai_monastry.mtl
Normal file
@@ -0,0 +1,740 @@
|
||||
# Blender MTL File: 'None'
|
||||
# Material Count: 82
|
||||
|
||||
newmtl Arena_02:Arena:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:Arena:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:Arena:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:Arena:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:Arena:blinn5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:Arena:sda:sda:Material3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Arena_02:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Floor_04:Arena:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:blinn7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainEntrance:lambert2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainEntrance:MainTemple:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple3:Stairs_side_Material5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple3:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple3:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple3:blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple3:sda:Material3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:MainTemple4:sda:Material3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:SideTemple5:Material7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:blinn7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple1:pasted__blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple:MainTemple:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MainTemple:MainTemple:blinn5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Material
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MediumTemple:MediumTemple:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MediumTemple:MediumTemple:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MediumTemple:MediumTemple:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MediumTemple:MediumTemple:blinn5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl MediumTemple:SmallTemple:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple1:Out_Temple:lambert2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple1:Out_Temple:lambert3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple1:Out_Temple:lambert4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple1:Out_Temple:lambert6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple1:Out_Temple:lambert7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple:Out_Temple:lambert2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple:Out_Temple:lambert3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple:Out_Temple:lambert4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple:Out_Temple:lambert6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl Out_Temple:Out_Temple:lambert7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl PaperLanterns1:Paper_Lantern1:scene_material
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:Arena:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple1:Material2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple1:Material3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple1:Material7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple1:Material8SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple1:Material9SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple4:Material5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SideTemple4:Stairs_blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:SmallTemple1:phong2
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:side_temple_blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple03:side_temple_floor_blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple04:Arena:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple04:SideTemple1:Material2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple04:SmallTemple1:phong2
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple04:blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple04:blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:SideTemple:Material3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:SideTemple:Material5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:SideTemple:Material7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:SideTemple:Material8SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:SideTemple:Material9SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SideTemple:blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SmallTemple:SmallTemple:floor
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SmallTemple:SmallTemple:lambert2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SmallTemple:SmallTemple:polymsh47_XSIPOLYCLS_scene_material1
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl SmallTemple:SmallTemple:window
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl StoneLanterns:Lantern1:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn2SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn3SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn4SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn5SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn6SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl blinn7SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
|
||||
newmtl pagodas:pagoda1:blinn1SG
|
||||
Ns 96.078431
|
||||
Ka 0.000000 0.000000 0.000000
|
||||
Kd 0.640000 0.640000 0.640000
|
||||
Ks 0.500000 0.500000 0.500000
|
||||
Ni 1.000000
|
||||
d 1.000000
|
||||
illum 2
|
||||
331426
data/samurai_monastry.obj
Normal file
331426
data/samurai_monastry.obj
Normal file
File diff suppressed because it is too large
Load Diff
@@ -64,11 +64,12 @@ btAlignedObjectArray<const char*> demoNames;
|
||||
int selectedDemo = 0;
|
||||
GpuDemo::CreateFunc* allDemos[]=
|
||||
{
|
||||
ConcaveScene::MyCreateFunc,
|
||||
GpuConvexScene::MyCreateFunc,
|
||||
ConcaveScene::MyCreateFunc,
|
||||
|
||||
GpuConvexScene::MyCreateFunc,
|
||||
GpuCompoundScene::MyCreateFunc,
|
||||
GpuConvexScene::MyCreateFunc,
|
||||
|
||||
|
||||
GpuRigidBodyDemo::MyCreateFunc,
|
||||
|
||||
@@ -295,7 +296,7 @@ sth_stash* initFont(GLPrimitiveRenderer* primRender)
|
||||
|
||||
void Usage()
|
||||
{
|
||||
printf("\nprogram.exe [--cl_device=<int>] [--benchmark] [--disable_opencl] [--cl_platform=<int>] [--x_dim=<int>] [--y_dim=<num>] [--z_dim=<int>] [--x_gap=<float>] [--y_gap=<float>] [--z_gap=<float>] [--use_concave_mesh]\n");
|
||||
printf("\nprogram.exe [--selected_demo=<int>] [--cl_device=<int>] [--benchmark] [--disable_opencl] [--cl_platform=<int>] [--x_dim=<int>] [--y_dim=<num>] [--z_dim=<int>] [--x_gap=<float>] [--y_gap=<float>] [--z_gap=<float>] [--use_concave_mesh] [--new_batching]\n");
|
||||
};
|
||||
|
||||
|
||||
@@ -360,6 +361,7 @@ void DumpSimulationTime(FILE* f)
|
||||
}
|
||||
///extern const char* g_deviceName;
|
||||
const char* g_deviceName = "blaat";
|
||||
extern bool useNewBatchingKernel;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@@ -376,6 +378,10 @@ int main(int argc, char* argv[])
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
args.GetCmdLineArgument("selected_demo",selectedDemo);
|
||||
|
||||
useNewBatchingKernel = args.CheckCmdLineFlag("new_batching");
|
||||
bool benchmark=args.CheckCmdLineFlag("benchmark");
|
||||
dump_timings=args.CheckCmdLineFlag("dump_timings");
|
||||
ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
|
||||
|
||||
#include "OpenGLWindow/GLInstanceGraphicsShape.h"
|
||||
#define CONCAVE_GAPX 16
|
||||
#define CONCAVE_GAPY 8
|
||||
#define CONCAVE_GAPZ 16
|
||||
|
||||
|
||||
GLInstanceGraphicsShape* createGraphicsShapeFromWavefrontObj(objLoader* obj)
|
||||
@@ -157,8 +160,11 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
||||
//char* fileName = "data/plane100.obj";
|
||||
//char* fileName = "data/teddy.obj";//"plane.obj";
|
||||
// char* fileName = "data/sponza_closed.obj";//"plane.obj";
|
||||
char* fileName = "data/leoTest1.obj";
|
||||
btVector3 shift(150,-100,-120);
|
||||
//char* fileName = "data/leoTest1.obj";
|
||||
char* fileName = "data/samurai_monastry.obj";
|
||||
|
||||
btVector3 shift(0,0,0);//150,-100,-120);
|
||||
btVector4 scaling(10,10,10,1);//4,4,4,1);
|
||||
FILE* f = 0;
|
||||
|
||||
char relativeFileName[1024];
|
||||
@@ -189,7 +195,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
||||
|
||||
{
|
||||
GLInstanceGraphicsShape* shape = createGraphicsShapeFromWavefrontObj(objData);
|
||||
btVector4 scaling(4,4,4,1);
|
||||
|
||||
|
||||
btAlignedObjectArray<btVector3> verts;
|
||||
for (int i=0;i<shape->m_numvertices;i++)
|
||||
@@ -216,7 +222,7 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
||||
int shapeId = ci.m_instancingRenderer->registerShape(&shape->m_vertices->at(0).xyzw[0], shape->m_numvertices, &shape->m_indices->at(0), shape->m_numIndices);
|
||||
btQuaternion orn(0,0,0,1);
|
||||
|
||||
btVector4 color(0,0,1,1.f);//0.5);//1.f
|
||||
btVector4 color(0.3,0.3,1,1.f);//0.5);//1.f
|
||||
|
||||
|
||||
{
|
||||
@@ -246,6 +252,15 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
||||
|
||||
if (1)
|
||||
{
|
||||
int curColor = 0;
|
||||
btVector4 colors[4] =
|
||||
{
|
||||
btVector4(1,1,1,1),
|
||||
btVector4(1,1,0.3,1),
|
||||
btVector4(0.3,1,1,1),
|
||||
btVector4(0.3,0.3,1,1),
|
||||
};
|
||||
|
||||
btVector4 scaling(1,1,1,1);
|
||||
int colIndex = m_data->m_np->registerConvexHullShape(&cube_vertices[0],strideInBytes,numVertices, scaling);
|
||||
for (int i=0;i<ci.arraySizeX;i++)
|
||||
@@ -257,10 +272,12 @@ void ConcaveScene::setupScene(const ConstructionInfo& ci)
|
||||
float mass = 1;
|
||||
|
||||
//btVector3 position(-2*ci.gapX+i*ci.gapX,25+j*ci.gapY,-2*ci.gapZ+k*ci.gapZ);
|
||||
btVector3 position(-(ci.arraySizeX/2)*ci.gapX+i*ci.gapX,50+j*ci.gapY,-(ci.arraySizeZ/2)*ci.gapZ+k*ci.gapZ);
|
||||
btVector3 position(-(ci.arraySizeX/2)*CONCAVE_GAPX+i*CONCAVE_GAPX,50+j*CONCAVE_GAPY,-(ci.arraySizeZ/2)*CONCAVE_GAPZ+k*CONCAVE_GAPZ);
|
||||
btQuaternion orn(1,0,0,0);
|
||||
|
||||
btVector4 color(0,1,0,1);
|
||||
btVector4 color = colors[curColor];
|
||||
curColor++;
|
||||
curColor&=3;
|
||||
|
||||
int id = ci.m_instancingRenderer->registerGraphicsInstance(shapeId,position,orn,color,scaling);
|
||||
int pid = m_data->m_rigidBodyPipeline->registerPhysicsInstance(mass,position,orn,colIndex,index);
|
||||
|
||||
@@ -65,7 +65,7 @@ void GpuConvexScene::setupScene(const ConstructionInfo& ci)
|
||||
{
|
||||
float mass = j==0? 0.f : 1.f;
|
||||
|
||||
btVector3 position((j&1)+i*2.2,2+j*3.,(j&1)+k*2.2);
|
||||
btVector3 position((j&1)+i*2.2,2+j*2.,(j&1)+k*2.2);
|
||||
btQuaternion orn(1,0,0,0);
|
||||
|
||||
btVector4 color = colors[curColor];
|
||||
|
||||
@@ -20,7 +20,8 @@ m_smallAabbsGPU(ctx,q),
|
||||
m_largeAabbsGPU(ctx,q),
|
||||
m_overlappingPairs(ctx,q),
|
||||
m_gpuSmallSortData(ctx,q),
|
||||
m_gpuSmallSortedAabbs(ctx,q)
|
||||
m_gpuSmallSortedAabbs(ctx,q),
|
||||
m_currentBuffer(-1)
|
||||
{
|
||||
const char* sapSrc = sapCL;
|
||||
const char* sapFastSrc = sapFastCL;
|
||||
@@ -86,18 +87,87 @@ static bool TestAabbAgainstAabb2(const btVector3 &aabbMin1, const btVector3 &aab
|
||||
return overlap;
|
||||
}
|
||||
|
||||
void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
|
||||
|
||||
//http://stereopsis.com/radix.html
|
||||
static unsigned int FloatFlip(float fl)
|
||||
{
|
||||
int axis = 0;//todo on GPU for now hardcode
|
||||
unsigned int f = *(unsigned int*)&fl;
|
||||
unsigned int mask = -(int)(f >> 31) | 0x80000000;
|
||||
return f ^ mask;
|
||||
};
|
||||
|
||||
void btGpuSapBroadphase::init3dSap()
|
||||
{
|
||||
if (m_currentBuffer<0)
|
||||
{
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
m_currentBuffer = 0;
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
for (int buf=0;buf<2;buf++)
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
m_sortedAxisCPU[axis][buf].resize(totalNumAabbs);
|
||||
|
||||
if (buf==m_currentBuffer)
|
||||
{
|
||||
for (int i=0;i<totalNumAabbs;i++)
|
||||
{
|
||||
m_sortedAxisCPU[axis][buf][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||
m_sortedAxisCPU[axis][buf][i].m_value = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
btAssert(m_currentBuffer>=0);
|
||||
if (m_currentBuffer<0)
|
||||
return;
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
for (int buf=0;buf<2;buf++)
|
||||
{
|
||||
btAssert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
m_currentBuffer = 1-m_currentBuffer;
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
for (int i=0;i<totalNumAabbs;i++)
|
||||
{
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i].m_value = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void btGpuSapBroadphase::calculateOverlappingPairsHost()
|
||||
{
|
||||
//test
|
||||
//if (m_currentBuffer>=0)
|
||||
// calculateOverlappingPairsHostIncremental3Sap();
|
||||
|
||||
int axis=0;
|
||||
|
||||
btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
||||
|
||||
|
||||
if (forceHost)
|
||||
{
|
||||
|
||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
||||
@@ -105,7 +175,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
{
|
||||
//sync aabb
|
||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
||||
m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
|
||||
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||
}
|
||||
}
|
||||
@@ -116,7 +186,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
{
|
||||
//sync aabb
|
||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
||||
m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
|
||||
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||
|
||||
}
|
||||
@@ -175,8 +245,15 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
m_overlappingPairs.resize(0);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
//init3dSap();
|
||||
|
||||
}
|
||||
|
||||
void btGpuSapBroadphase::calculateOverlappingPairs()
|
||||
{
|
||||
int axis = 0;//todo on GPU for now hardcode
|
||||
|
||||
|
||||
|
||||
{
|
||||
|
||||
@@ -185,8 +262,8 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
if (syncOnHost)
|
||||
{
|
||||
BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
|
||||
{
|
||||
@@ -195,7 +272,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
{
|
||||
//sync aabb
|
||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
||||
m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
|
||||
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||
}
|
||||
}
|
||||
@@ -226,8 +303,8 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
if (syncOnHost)
|
||||
{
|
||||
BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
||||
btAlignedObjectArray<btSapAabb> allHostAabbs;
|
||||
m_allAabbsGPU.copyToHost(allHostAabbs);
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
|
||||
{
|
||||
@@ -236,7 +313,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
{
|
||||
//sync aabb
|
||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
||||
m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
|
||||
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
||||
}
|
||||
}
|
||||
@@ -432,6 +509,7 @@ void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
|
||||
|
||||
}//BT_PROFILE("GPU_RADIX SORT");
|
||||
|
||||
|
||||
}
|
||||
|
||||
void btGpuSapBroadphase::writeAabbsToGpu()
|
||||
|
||||
@@ -24,6 +24,10 @@ class btGpuSapBroadphase
|
||||
|
||||
class btRadixSort32CL* m_sorter;
|
||||
|
||||
///test for 3d SAP
|
||||
btAlignedObjectArray<btSortData> m_sortedAxisCPU[3][2];
|
||||
int m_currentBuffer;
|
||||
|
||||
public:
|
||||
|
||||
btOpenCLArray<btSapAabb> m_allAabbsGPU;
|
||||
@@ -45,7 +49,11 @@ class btGpuSapBroadphase
|
||||
btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
|
||||
virtual ~btGpuSapBroadphase();
|
||||
|
||||
void calculateOverlappingPairs(bool forceHost=false);
|
||||
void calculateOverlappingPairs();
|
||||
void calculateOverlappingPairsHost();
|
||||
|
||||
void init3dSap();
|
||||
void calculateOverlappingPairsHostIncremental3Sap();
|
||||
|
||||
void createProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
||||
void createLargeProxy(const btVector3& aabbMin, const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
|
||||
|
||||
@@ -16,6 +16,8 @@ subject to the following restrictions:
|
||||
|
||||
#include "Solver.h"
|
||||
|
||||
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
|
||||
bool useNewBatchingKernel = false;
|
||||
|
||||
#define SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl"
|
||||
#define SOLVER_SETUP2_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup2.cl"
|
||||
@@ -24,6 +26,7 @@ subject to the following restrictions:
|
||||
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
||||
|
||||
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
||||
#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl"
|
||||
|
||||
|
||||
#include "../kernels/solverSetup.h"
|
||||
@@ -33,6 +36,9 @@ subject to the following restrictions:
|
||||
#include "../kernels/solveFriction.h"
|
||||
|
||||
#include "../kernels/batchingKernels.h"
|
||||
#include "../kernels/batchingKernelsNew.h"
|
||||
|
||||
|
||||
#include "BulletCommon/btQuickprof.h"
|
||||
#include "../../parallel_primitives/host/btLauncherCL.h"
|
||||
#include "BulletCommon/btVector3.h"
|
||||
@@ -94,7 +100,7 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
||||
const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
|
||||
|
||||
m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
|
||||
m_contactBuffer = new btOpenCLArray<btContact4>(ctx,queue);
|
||||
m_contactBuffer2 = new btOpenCLArray<btContact4>(ctx,queue);
|
||||
|
||||
m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
|
||||
m_numConstraints->resize(N_SPLIT*N_SPLIT);
|
||||
@@ -108,6 +114,8 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
||||
|
||||
cl_int pErrNum;
|
||||
const char* batchKernelSource = batchingKernelsCL;
|
||||
const char* batchKernelNewSource = batchingKernelsNewCL;
|
||||
|
||||
const char* solverSetupSource = solverSetupCL;
|
||||
const char* solverSetup2Source = solverSetup2CL;
|
||||
const char* solveContactSource = solveContactCL;
|
||||
@@ -159,13 +167,20 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
|
||||
m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
|
||||
btAssert(m_batchingKernel);
|
||||
}
|
||||
|
||||
{
|
||||
cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||
btAssert(batchingNewProg);
|
||||
|
||||
m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||
//m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
|
||||
btAssert(m_batchingKernelNew);
|
||||
}
|
||||
}
|
||||
|
||||
Solver::~Solver()
|
||||
{
|
||||
delete m_sortDataBuffer;
|
||||
delete m_contactBuffer;
|
||||
delete m_contactBuffer2;
|
||||
|
||||
delete m_sort32;
|
||||
delete m_scan;
|
||||
@@ -173,6 +188,7 @@ Solver::~Solver()
|
||||
|
||||
|
||||
clReleaseKernel(m_batchingKernel);
|
||||
clReleaseKernel(m_batchingKernelNew);
|
||||
|
||||
clReleaseKernel( m_solveContactKernel);
|
||||
clReleaseKernel( m_solveFrictionKernel);
|
||||
@@ -843,7 +859,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
|
||||
btBufferInfoCL bInfo[] = {
|
||||
btBufferInfoCL( contacts->getBufferCL() ),
|
||||
btBufferInfoCL( m_contactBuffer->getBufferCL() ),
|
||||
btBufferInfoCL( m_contactBuffer2->getBufferCL()),
|
||||
btBufferInfoCL( nNative->getBufferCL() ),
|
||||
btBufferInfoCL( offsetsNative->getBufferCL() ),
|
||||
#ifdef BATCH_DEBUG
|
||||
@@ -852,10 +868,22 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("batchingKernel");
|
||||
btLauncherCL launcher( m_queue, m_batchingKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
//btLauncherCL launcher( m_queue, m_batchingKernel);
|
||||
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
|
||||
|
||||
btLauncherCL launcher( m_queue, k);
|
||||
if (!useNewBatchingKernel )
|
||||
{
|
||||
launcher.setBuffer( contacts->getBufferCL() );
|
||||
}
|
||||
launcher.setBuffer( m_contactBuffer2->getBufferCL() );
|
||||
launcher.setBuffer( nNative->getBufferCL());
|
||||
launcher.setBuffer( offsetsNative->getBufferCL());
|
||||
|
||||
//launcher.setConst( cdata );
|
||||
launcher.setConst(staticIdx);
|
||||
|
||||
@@ -899,7 +927,7 @@ void Solver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts,
|
||||
}
|
||||
|
||||
// copy buffer to buffer
|
||||
btAssert(m_contactBuffer->size()==nContacts);
|
||||
//btAssert(m_contactBuffer->size()==nContacts);
|
||||
//contacts->copyFromOpenCLArray( *m_contactBuffer);
|
||||
//clFinish(m_queue);//needed?
|
||||
|
||||
|
||||
@@ -94,6 +94,7 @@ class Solver : public SolverBase
|
||||
|
||||
int m_nIterations;
|
||||
cl_kernel m_batchingKernel;
|
||||
cl_kernel m_batchingKernelNew;
|
||||
cl_kernel m_solveContactKernel;
|
||||
cl_kernel m_solveFrictionKernel;
|
||||
cl_kernel m_contactToConstraintKernel;
|
||||
@@ -106,7 +107,7 @@ class Solver : public SolverBase
|
||||
class btPrefixScanCL* m_scan;
|
||||
|
||||
btOpenCLArray<btSortData>* m_sortDataBuffer;
|
||||
btOpenCLArray<btContact4>* m_contactBuffer;
|
||||
btOpenCLArray<btContact4>* m_contactBuffer2;
|
||||
|
||||
enum
|
||||
{
|
||||
|
||||
@@ -18,7 +18,7 @@ struct btConfig
|
||||
int m_maxTriConvexPairCapacity;
|
||||
|
||||
btConfig()
|
||||
:m_maxConvexBodies(64*1024),
|
||||
:m_maxConvexBodies(32*1024),
|
||||
m_maxConvexShapes(8192),
|
||||
m_maxVerticesPerFace(64),
|
||||
m_maxFacesPerShape(64),
|
||||
@@ -26,7 +26,7 @@ struct btConfig
|
||||
m_maxConvexIndices(8192),
|
||||
m_maxConvexUniqueEdges(8192),
|
||||
m_maxCompoundChildShapes(8192),
|
||||
m_maxTriConvexPairCapacity(64*1024)
|
||||
m_maxTriConvexPairCapacity(512*1024)
|
||||
{
|
||||
m_maxBroadphasePairs = 16*m_maxConvexBodies;
|
||||
}
|
||||
|
||||
@@ -17,12 +17,14 @@
|
||||
#define SOLVER_CONTACT_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveContact.cl"
|
||||
#define SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl"
|
||||
#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl"
|
||||
#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl"
|
||||
|
||||
#include "../kernels/solverSetup.h"
|
||||
#include "../kernels/solverSetup2.h"
|
||||
#include "../kernels/solveContact.h"
|
||||
#include "../kernels/solveFriction.h"
|
||||
#include "../kernels/batchingKernels.h"
|
||||
#include "../kernels/batchingKernelsNew.h"
|
||||
|
||||
|
||||
|
||||
@@ -48,13 +50,13 @@ struct btGpuBatchingPgsSolverInternalData
|
||||
int m_nIterations;
|
||||
|
||||
btOpenCLArray<btGpuConstraint4>* m_contactCGPU;
|
||||
|
||||
btOpenCLArray<unsigned int>* m_numConstraints;
|
||||
btOpenCLArray<unsigned int>* m_offsets;
|
||||
|
||||
Solver* m_solverGPU;
|
||||
|
||||
cl_kernel m_batchingKernel;
|
||||
cl_kernel m_batchingKernelNew;
|
||||
cl_kernel m_solveContactKernel;
|
||||
cl_kernel m_solveFrictionKernel;
|
||||
cl_kernel m_contactToConstraintKernel;
|
||||
@@ -72,6 +74,11 @@ struct btGpuBatchingPgsSolverInternalData
|
||||
btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
|
||||
btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
|
||||
btOpenCLArray<btContact4>* m_pBufContactOutGPU;
|
||||
|
||||
|
||||
btAlignedObjectArray<unsigned int> m_idxBuffer;
|
||||
btAlignedObjectArray<btSortData> m_sortData;
|
||||
btAlignedObjectArray<btContact4> m_old;
|
||||
};
|
||||
|
||||
|
||||
@@ -114,6 +121,7 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
||||
|
||||
cl_int pErrNum;
|
||||
const char* batchKernelSource = batchingKernelsCL;
|
||||
const char* batchKernelNewSource = batchingKernelsNewCL;
|
||||
const char* solverSetupSource = solverSetupCL;
|
||||
const char* solverSetup2Source = solverSetup2CL;
|
||||
const char* solveContactSource = solveContactCL;
|
||||
@@ -166,7 +174,14 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
|
||||
btAssert(m_data->m_batchingKernel);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
|
||||
btAssert(batchingNewProg);
|
||||
|
||||
m_data->m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
|
||||
btAssert(m_data->m_batchingKernelNew);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -186,6 +201,7 @@ btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
|
||||
|
||||
|
||||
clReleaseKernel(m_data->m_batchingKernel);
|
||||
clReleaseKernel(m_data->m_batchingKernelNew);
|
||||
|
||||
clReleaseKernel( m_data->m_solveContactKernel);
|
||||
clReleaseKernel( m_data->m_solveFrictionKernel);
|
||||
@@ -406,7 +422,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
csCfg.m_averageExtent = .2f;//@TODO m_averageObjExtent;
|
||||
csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
|
||||
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
||||
|
||||
btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
|
||||
|
||||
void* additionalData = 0;//m_data->m_frictionCGPU;
|
||||
@@ -419,16 +435,17 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
{
|
||||
|
||||
if( m_data->m_solverGPU->m_contactBuffer)
|
||||
if( m_data->m_solverGPU->m_contactBuffer2)
|
||||
{
|
||||
m_data->m_solverGPU->m_contactBuffer->resize(nContacts);
|
||||
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||
}
|
||||
|
||||
if( m_data->m_solverGPU->m_contactBuffer == 0 )
|
||||
if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
|
||||
{
|
||||
m_data->m_solverGPU->m_contactBuffer = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
|
||||
m_data->m_solverGPU->m_contactBuffer->resize(nContacts);
|
||||
m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
|
||||
m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
|
||||
}
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
|
||||
|
||||
@@ -438,7 +455,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
|
||||
|
||||
|
||||
btOpenCLArray<btContact4>* contactNative = contactsIn;
|
||||
|
||||
const btOpenCLArray<btRigidBodyCL>* bodyNative = bodyBuf;
|
||||
|
||||
|
||||
@@ -477,7 +494,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
|
||||
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setConst( cdata.m_nContacts );
|
||||
@@ -536,7 +553,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
btInt4 cdata;
|
||||
cdata.x = nContacts;
|
||||
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer->getBufferCL())
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
|
||||
, btBufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
|
||||
btLauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel);
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
@@ -554,19 +571,18 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
|
||||
|
||||
if (nContacts)
|
||||
{
|
||||
BT_PROFILE("gpu m_copyConstraintKernel");
|
||||
|
||||
btInt4 cdata; cdata.x = nContacts;
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer->getBufferCL() ), btBufferInfoCL( contactNative->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
clFinish(m_data->m_queue);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("gpu m_copyConstraintKernel");
|
||||
btInt4 cdata; cdata.x = nContacts;
|
||||
btBufferInfoCL bInfo[] = { btBufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), btBufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) };
|
||||
btLauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
|
||||
launcher.setConst( cdata );
|
||||
launcher.launch1D( nContacts, 64 );
|
||||
clFinish(m_data->m_queue);
|
||||
}
|
||||
|
||||
|
||||
bool compareGPU = false;
|
||||
@@ -575,13 +591,13 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (gpuBatchContacts)
|
||||
{
|
||||
BT_PROFILE("gpu batchContacts");
|
||||
maxNumBatches = 50;
|
||||
m_data->m_solverGPU->batchContacts( (btOpenCLArray<btContact4>*)contactNative, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
maxNumBatches = 25;//250;
|
||||
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
|
||||
} else
|
||||
{
|
||||
BT_PROFILE("cpu batchContacts");
|
||||
btAlignedObjectArray<btContact4> cpuContacts;
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_pBufContactOutGPU;
|
||||
btOpenCLArray<btContact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
|
||||
contactsIn->copyToHost(cpuContacts);
|
||||
|
||||
btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
|
||||
@@ -611,8 +627,11 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
numNonzeroGrid++;
|
||||
//printf("cpu batch\n");
|
||||
|
||||
int simdWidth = 32;
|
||||
int numBatches = sortConstraintByBatch( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||
|
||||
int simdWidth =64;//-1;//32;
|
||||
int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies); // on GPU
|
||||
|
||||
|
||||
maxNumBatches = btMax(numBatches,maxNumBatches);
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
@@ -622,7 +641,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
}
|
||||
{
|
||||
BT_PROFILE("m_contactBuffer->copyFromHost");
|
||||
m_data->m_solverGPU->m_contactBuffer->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
||||
m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -636,7 +655,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
{
|
||||
//BT_PROFILE("gpu convertToConstraints");
|
||||
m_data->m_solverGPU->convertToConstraints( bodyBuf,
|
||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer /*contactNative*/,
|
||||
shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
|
||||
contactConstraintOut,
|
||||
additionalData, nContacts,
|
||||
(SolverBase::ConstraintCfg&) csCfg );
|
||||
@@ -700,6 +719,13 @@ static bool sortfnc(const btSortData& a,const btSortData& b)
|
||||
return (a.m_key<b.m_key);
|
||||
}
|
||||
|
||||
|
||||
|
||||
btAlignedObjectArray<int> bodyUsed;
|
||||
|
||||
|
||||
|
||||
|
||||
btAlignedObjectArray<unsigned int> idxBuffer;
|
||||
btAlignedObjectArray<btSortData> sortData;
|
||||
btAlignedObjectArray<btContact4> old;
|
||||
@@ -830,3 +856,301 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
|
||||
#endif
|
||||
return batchIdx;
|
||||
}
|
||||
|
||||
|
||||
inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch");
|
||||
|
||||
|
||||
|
||||
bodyUsed.resize(2*simdWidth);
|
||||
|
||||
for (int q=0;q<2*simdWidth;q++)
|
||||
bodyUsed[q]=0;
|
||||
|
||||
int curBodyUsed = 0;
|
||||
|
||||
int numIter = 0;
|
||||
|
||||
m_data->m_sortData.resize(numConstraints);
|
||||
m_data->m_idxBuffer.resize(numConstraints);
|
||||
m_data->m_old.resize(numConstraints);
|
||||
|
||||
unsigned int* idxSrc = &m_data->m_idxBuffer[0];
|
||||
|
||||
#if defined(_DEBUG)
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
cs[i].getBatchIdx() = -1;
|
||||
#endif
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
idxSrc[i] = i;
|
||||
|
||||
int numValidConstraints = 0;
|
||||
int unprocessedConstraintIndex = 0;
|
||||
|
||||
int batchIdx = 0;
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batch innerloop");
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
numIter++;
|
||||
int nCurrentBatch = 0;
|
||||
// clear flag
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[i] = 0;
|
||||
curBodyUsed = 0;
|
||||
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
int idx = idxSrc[i];
|
||||
btAssert( idx < numConstraints );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
int bodyA = abs(bodyAS);
|
||||
int bodyB = abs(bodyBS);
|
||||
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||
int aUnavailable = 0;
|
||||
int bUnavailable = 0;
|
||||
if (!aIsStatic)
|
||||
{
|
||||
for (int j=0;j<curBodyUsed;j++)
|
||||
{
|
||||
if (bodyA == bodyUsed[j])
|
||||
{
|
||||
aUnavailable=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!aUnavailable)
|
||||
if (!bIsStatic)
|
||||
{
|
||||
for (int j=0;j<curBodyUsed;j++)
|
||||
{
|
||||
if (bodyB == bodyUsed[j])
|
||||
{
|
||||
bUnavailable=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
if (!aIsStatic)
|
||||
{
|
||||
bodyUsed[curBodyUsed++] = bodyA;
|
||||
}
|
||||
if (!bIsStatic)
|
||||
{
|
||||
bodyUsed[curBodyUsed++] = bodyB;
|
||||
}
|
||||
|
||||
cs[idx].getBatchIdx() = batchIdx;
|
||||
m_data->m_sortData[idx].m_key = batchIdx;
|
||||
m_data->m_sortData[idx].m_value = idx;
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
btSwap(idxSrc[i], idxSrc[numValidConstraints]);
|
||||
}
|
||||
|
||||
numValidConstraints++;
|
||||
{
|
||||
nCurrentBatch++;
|
||||
if( nCurrentBatch == simdWidth )
|
||||
{
|
||||
nCurrentBatch = 0;
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[i] = 0;
|
||||
|
||||
|
||||
curBodyUsed = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
batchIdx ++;
|
||||
}
|
||||
}
|
||||
{
|
||||
BT_PROFILE("quickSort");
|
||||
//m_data->m_sortData.quickSort(sortfnc);
|
||||
}
|
||||
|
||||
{
|
||||
BT_PROFILE("reorder");
|
||||
// reorder
|
||||
|
||||
memcpy( &m_data->m_old[0], cs, sizeof(btContact4)*numConstraints);
|
||||
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
|
||||
int idx = m_data->m_sortData[idxSrc[i]].m_value;
|
||||
cs[i] = m_data->m_old[idx];
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_DEBUG)
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
return batchIdx;
|
||||
}
|
||||
|
||||
|
||||
inline int btGpuBatchingPgsSolver::sortConstraintByBatch3( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
|
||||
BT_PROFILE("sortConstraintByBatch");
|
||||
|
||||
static int maxSwaps = 0;
|
||||
int numSwaps = 0;
|
||||
|
||||
static int maxNumConstraints = 0;
|
||||
if (maxNumConstraints<numConstraints)
|
||||
{
|
||||
maxNumConstraints = numConstraints;
|
||||
printf("maxNumConstraints = %d\n",maxNumConstraints );
|
||||
}
|
||||
|
||||
bodyUsed.resize(2*simdWidth);
|
||||
|
||||
for (int q=0;q<2*simdWidth;q++)
|
||||
bodyUsed[q]=0;
|
||||
|
||||
int curBodyUsed = 0;
|
||||
|
||||
int numIter = 0;
|
||||
|
||||
m_data->m_sortData.resize(0);
|
||||
m_data->m_idxBuffer.resize(0);
|
||||
m_data->m_old.resize(0);
|
||||
|
||||
|
||||
#if defined(_DEBUG)
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
cs[i].getBatchIdx() = -1;
|
||||
#endif
|
||||
|
||||
int numValidConstraints = 0;
|
||||
int unprocessedConstraintIndex = 0;
|
||||
|
||||
int batchIdx = 0;
|
||||
|
||||
|
||||
{
|
||||
BT_PROFILE("cpu batch innerloop");
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
numIter++;
|
||||
int nCurrentBatch = 0;
|
||||
// clear flag
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[i] = 0;
|
||||
curBodyUsed = 0;
|
||||
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
int idx = i;
|
||||
btAssert( idx < numConstraints );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
int bodyA = abs(bodyAS);
|
||||
int bodyB = abs(bodyBS);
|
||||
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||
int aUnavailable = 0;
|
||||
int bUnavailable = 0;
|
||||
if (!aIsStatic)
|
||||
{
|
||||
for (int j=0;j<curBodyUsed;j++)
|
||||
{
|
||||
if (bodyA == bodyUsed[j])
|
||||
{
|
||||
aUnavailable=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!aUnavailable)
|
||||
if (!bIsStatic)
|
||||
{
|
||||
for (int j=0;j<curBodyUsed;j++)
|
||||
{
|
||||
if (bodyB == bodyUsed[j])
|
||||
{
|
||||
bUnavailable=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
if (!aIsStatic)
|
||||
{
|
||||
bodyUsed[curBodyUsed++] = bodyA;
|
||||
}
|
||||
if (!bIsStatic)
|
||||
{
|
||||
bodyUsed[curBodyUsed++] = bodyB;
|
||||
}
|
||||
|
||||
cs[idx].getBatchIdx() = batchIdx;
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
btSwap(cs[i],cs[numValidConstraints]);
|
||||
numSwaps++;
|
||||
}
|
||||
|
||||
numValidConstraints++;
|
||||
{
|
||||
nCurrentBatch++;
|
||||
if( nCurrentBatch == simdWidth )
|
||||
{
|
||||
nCurrentBatch = 0;
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[i] = 0;
|
||||
curBodyUsed = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
batchIdx ++;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_DEBUG)
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
btAssert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (maxSwaps<numSwaps)
|
||||
{
|
||||
maxSwaps = numSwaps;
|
||||
printf("maxSwaps = %d\n", maxSwaps);
|
||||
}
|
||||
|
||||
return batchIdx;
|
||||
}
|
||||
|
||||
@@ -12,10 +12,18 @@ class btGpuBatchingPgsSolver
|
||||
{
|
||||
protected:
|
||||
|
||||
|
||||
|
||||
struct btGpuBatchingPgsSolverInternalData* m_data;
|
||||
|
||||
void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
|
||||
|
||||
inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
inline int sortConstraintByBatch2( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
inline int sortConstraintByBatch3( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
|
||||
|
||||
|
||||
|
||||
void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf,
|
||||
btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
|
||||
|
||||
|
||||
@@ -63,10 +63,15 @@ void btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
{
|
||||
|
||||
//update worldspace AABBs from local AABB/worldtransform
|
||||
setupGpuAabbsFull();
|
||||
{
|
||||
setupGpuAabbsFull();
|
||||
}
|
||||
|
||||
//compute overlapping pairs
|
||||
m_data->m_broadphaseSap->calculateOverlappingPairs();
|
||||
{
|
||||
//m_data->m_broadphaseSap->calculateOverlappingPairsHost();
|
||||
m_data->m_broadphaseSap->calculateOverlappingPairs();
|
||||
}
|
||||
|
||||
//compute contact points
|
||||
|
||||
|
||||
236
opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl
Normal file
236
opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl
Normal file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
Copyright (c) 2012 Advanced Micro Devices, Inc.
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Originally written by Erwin Coumans
|
||||
|
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
|
||||
|
||||
#ifdef cl_ext_atomic_counters_32
|
||||
#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
|
||||
#else
|
||||
#define counter32_t volatile __global int*
|
||||
#endif
|
||||
|
||||
#define SIMD_WIDTH 64
|
||||
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned char u8;
|
||||
|
||||
#define GET_GROUP_IDX get_group_id(0)
|
||||
#define GET_LOCAL_IDX get_local_id(0)
|
||||
#define GET_GLOBAL_IDX get_global_id(0)
|
||||
#define GET_GROUP_SIZE get_local_size(0)
|
||||
#define GET_NUM_GROUPS get_num_groups(0)
|
||||
#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
|
||||
#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
|
||||
#define AtomInc(x) atom_inc(&(x))
|
||||
#define AtomInc1(x, out) out = atom_inc(&(x))
|
||||
#define AppendInc(x, out) out = atomic_inc(x)
|
||||
#define AtomAdd(x, value) atom_add(&(x), value)
|
||||
#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
|
||||
#define AtomXhg(x, value) atom_xchg ( &(x), value )
|
||||
|
||||
|
||||
#define SELECT_UINT4( b, a, condition ) select( b,a,condition )
|
||||
|
||||
#define make_float4 (float4)
|
||||
#define make_float2 (float2)
|
||||
#define make_uint4 (uint4)
|
||||
#define make_int4 (int4)
|
||||
#define make_uint2 (uint2)
|
||||
#define make_int2 (int2)
|
||||
|
||||
|
||||
#define max2 max
|
||||
#define min2 min
|
||||
|
||||
|
||||
#define WG_SIZE 64
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float4 m_worldPos[4];
|
||||
float4 m_worldNormal;
|
||||
u32 m_coeffs;
|
||||
int m_batchIdx;
|
||||
|
||||
int m_bodyAPtrAndSignBit;//sign bit set for fixed objects
|
||||
int m_bodyBPtrAndSignBit;
|
||||
}Contact4;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int m_n;
|
||||
int m_start;
|
||||
int m_staticIdx;
|
||||
int m_paddings[1];
|
||||
} ConstBuffer;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int m_a;
|
||||
int m_b;
|
||||
u32 m_idx;
|
||||
}Elem;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )
|
||||
{
|
||||
int wgIdx = GET_GROUP_IDX;
|
||||
int lIdx = GET_LOCAL_IDX;
|
||||
|
||||
const int m_n = gN[wgIdx];
|
||||
const int m_start = gStart[wgIdx];
|
||||
|
||||
if( lIdx == 0 )
|
||||
{
|
||||
for (int i=0;i<m_n;i++)
|
||||
{
|
||||
int srcIdx = i+m_start;
|
||||
int batchIndex = i;
|
||||
gConstraints[ srcIdx ].m_batchIdx = batchIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define CHECK_SIZE (WG_SIZE)
|
||||
|
||||
|
||||
|
||||
|
||||
u32 readBuf(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
return buff[bufIdx] & (1<<bitIdx);
|
||||
}
|
||||
|
||||
void writeBuf(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
buff[bufIdx] |= (1<<bitIdx);
|
||||
//atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||
}
|
||||
|
||||
u32 tryWrite(__local u32* buff, int idx)
|
||||
{
|
||||
idx = idx % (32*CHECK_SIZE);
|
||||
int bitIdx = idx%32;
|
||||
int bufIdx = idx/32;
|
||||
u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );
|
||||
return ((ans >> bitIdx)&1) == 0;
|
||||
}
|
||||
|
||||
|
||||
// batching on the GPU
|
||||
__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )
|
||||
{
|
||||
int wgIdx = GET_GROUP_IDX;
|
||||
int lIdx = GET_LOCAL_IDX;
|
||||
const int numConstraints = gN[wgIdx];
|
||||
const int m_start = gStart[wgIdx];
|
||||
|
||||
|
||||
__local u32 ldsFixedBuffer[CHECK_SIZE];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if( lIdx == 0 )
|
||||
{
|
||||
|
||||
|
||||
__global Contact4* cs = &gConstraints[m_start];
|
||||
|
||||
|
||||
int numValidConstraints = 0;
|
||||
int batchIdx = 0;
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
int nCurrentBatch = 0;
|
||||
// clear flag
|
||||
|
||||
for(int i=0; i<CHECK_SIZE; i++)
|
||||
ldsFixedBuffer[i] = 0;
|
||||
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
|
||||
int bodyAS = cs[i].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[i].m_bodyBPtrAndSignBit;
|
||||
int bodyA = abs(bodyAS);
|
||||
int bodyB = abs(bodyBS);
|
||||
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||
int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);
|
||||
int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
if (!aIsStatic)
|
||||
{
|
||||
writeBuf( ldsFixedBuffer, bodyA );
|
||||
}
|
||||
if (!bIsStatic)
|
||||
{
|
||||
writeBuf( ldsFixedBuffer, bodyB );
|
||||
}
|
||||
|
||||
cs[i].m_batchIdx = batchIdx;
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
//btSwap(cs[i],cs[numValidConstraints]);
|
||||
|
||||
Contact4 tmp = cs[i];
|
||||
cs[i] = cs[numValidConstraints];
|
||||
cs[numValidConstraints] = tmp;
|
||||
|
||||
}
|
||||
|
||||
numValidConstraints++;
|
||||
|
||||
nCurrentBatch++;
|
||||
if( nCurrentBatch == SIMD_WIDTH)
|
||||
{
|
||||
nCurrentBatch = 0;
|
||||
for(int i=0; i<CHECK_SIZE; i++)
|
||||
ldsFixedBuffer[i] = 0;
|
||||
|
||||
}
|
||||
}
|
||||
}//for
|
||||
batchIdx ++;
|
||||
}//while
|
||||
}//if( lIdx == 0 )
|
||||
|
||||
//return batchIdx;
|
||||
}
|
||||
240
opencl/gpu_rigidbody/kernels/batchingKernelsNew.h
Normal file
240
opencl/gpu_rigidbody/kernels/batchingKernelsNew.h
Normal file
@@ -0,0 +1,240 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* batchingKernelsNewCL= \
|
||||
"/*\n"
|
||||
"Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
|
||||
"\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose, \n"
|
||||
"including commercial applications, and to alter it and redistribute it freely, \n"
|
||||
"subject to the following restrictions:\n"
|
||||
"\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Originally written by Erwin Coumans\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"\n"
|
||||
"#ifdef cl_ext_atomic_counters_32\n"
|
||||
"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n"
|
||||
"#else\n"
|
||||
"#define counter32_t volatile __global int*\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#define SIMD_WIDTH 64\n"
|
||||
"\n"
|
||||
"typedef unsigned int u32;\n"
|
||||
"typedef unsigned short u16;\n"
|
||||
"typedef unsigned char u8;\n"
|
||||
"\n"
|
||||
"#define GET_GROUP_IDX get_group_id(0)\n"
|
||||
"#define GET_LOCAL_IDX get_local_id(0)\n"
|
||||
"#define GET_GLOBAL_IDX get_global_id(0)\n"
|
||||
"#define GET_GROUP_SIZE get_local_size(0)\n"
|
||||
"#define GET_NUM_GROUPS get_num_groups(0)\n"
|
||||
"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
|
||||
"#define AtomInc(x) atom_inc(&(x))\n"
|
||||
"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
|
||||
"#define AppendInc(x, out) out = atomic_inc(x)\n"
|
||||
"#define AtomAdd(x, value) atom_add(&(x), value)\n"
|
||||
"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n"
|
||||
"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n"
|
||||
"\n"
|
||||
"#define make_float4 (float4)\n"
|
||||
"#define make_float2 (float2)\n"
|
||||
"#define make_uint4 (uint4)\n"
|
||||
"#define make_int4 (int4)\n"
|
||||
"#define make_uint2 (uint2)\n"
|
||||
"#define make_int2 (int2)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define max2 max\n"
|
||||
"#define min2 min\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define WG_SIZE 64\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" float4 m_worldPos[4];\n"
|
||||
" float4 m_worldNormal;\n"
|
||||
" u32 m_coeffs;\n"
|
||||
" int m_batchIdx;\n"
|
||||
"\n"
|
||||
" int m_bodyAPtrAndSignBit;//sign bit set for fixed objects\n"
|
||||
" int m_bodyBPtrAndSignBit;\n"
|
||||
"}Contact4;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_n;\n"
|
||||
" int m_start;\n"
|
||||
" int m_staticIdx;\n"
|
||||
" int m_paddings[1];\n"
|
||||
"} ConstBuffer;\n"
|
||||
"\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" int m_a;\n"
|
||||
" int m_b;\n"
|
||||
" u32 m_idx;\n"
|
||||
"}Elem;\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatchesBruteForce( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n"
|
||||
"{\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" \n"
|
||||
" const int m_n = gN[wgIdx];\n"
|
||||
" const int m_start = gStart[wgIdx];\n"
|
||||
" \n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" for (int i=0;i<m_n;i++)\n"
|
||||
" {\n"
|
||||
" int srcIdx = i+m_start;\n"
|
||||
" int batchIndex = i;\n"
|
||||
" gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"#define CHECK_SIZE (WG_SIZE)\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"u32 readBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
" return buff[bufIdx] & (1<<bitIdx);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void writeBuf(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
" buff[bufIdx] |= (1<<bitIdx);\n"
|
||||
" //atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"u32 tryWrite(__local u32* buff, int idx)\n"
|
||||
"{\n"
|
||||
" idx = idx % (32*CHECK_SIZE);\n"
|
||||
" int bitIdx = idx%32;\n"
|
||||
" int bufIdx = idx/32;\n"
|
||||
" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n"
|
||||
" return ((ans >> bitIdx)&1) == 0;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"// batching on the GPU\n"
|
||||
"__kernel void CreateBatchesNew( __global Contact4* gConstraints, __global const u32* gN, __global const u32* gStart, int staticIdx )\n"
|
||||
"{\n"
|
||||
" int wgIdx = GET_GROUP_IDX;\n"
|
||||
" int lIdx = GET_LOCAL_IDX;\n"
|
||||
" const int numConstraints = gN[wgIdx];\n"
|
||||
" const int m_start = gStart[wgIdx];\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" __local u32 ldsFixedBuffer[CHECK_SIZE];\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" if( lIdx == 0 )\n"
|
||||
" {\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" __global Contact4* cs = &gConstraints[m_start]; \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" int numValidConstraints = 0;\n"
|
||||
" int batchIdx = 0;\n"
|
||||
"\n"
|
||||
" while( numValidConstraints < numConstraints)\n"
|
||||
" {\n"
|
||||
" int nCurrentBatch = 0;\n"
|
||||
" // clear flag\n"
|
||||
" \n"
|
||||
" for(int i=0; i<CHECK_SIZE; i++) \n"
|
||||
" ldsFixedBuffer[i] = 0; \n"
|
||||
"\n"
|
||||
" for(int i=numValidConstraints; i<numConstraints; i++)\n"
|
||||
" {\n"
|
||||
"\n"
|
||||
" int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n"
|
||||
" int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n"
|
||||
" int bodyA = abs(bodyAS);\n"
|
||||
" int bodyB = abs(bodyBS);\n"
|
||||
" bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n"
|
||||
" bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n"
|
||||
" int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n"
|
||||
" int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n"
|
||||
" \n"
|
||||
" if( aUnavailable==0 && bUnavailable==0 ) // ok\n"
|
||||
" {\n"
|
||||
" if (!aIsStatic)\n"
|
||||
" {\n"
|
||||
" writeBuf( ldsFixedBuffer, bodyA );\n"
|
||||
" }\n"
|
||||
" if (!bIsStatic)\n"
|
||||
" {\n"
|
||||
" writeBuf( ldsFixedBuffer, bodyB );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" cs[i].m_batchIdx = batchIdx;\n"
|
||||
"\n"
|
||||
" if (i!=numValidConstraints)\n"
|
||||
" {\n"
|
||||
" //btSwap(cs[i],cs[numValidConstraints]);\n"
|
||||
" \n"
|
||||
" Contact4 tmp = cs[i];\n"
|
||||
" cs[i] = cs[numValidConstraints];\n"
|
||||
" cs[numValidConstraints] = tmp;\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" numValidConstraints++;\n"
|
||||
" \n"
|
||||
" nCurrentBatch++;\n"
|
||||
" if( nCurrentBatch == SIMD_WIDTH)\n"
|
||||
" {\n"
|
||||
" nCurrentBatch = 0;\n"
|
||||
" for(int i=0; i<CHECK_SIZE; i++) \n"
|
||||
" ldsFixedBuffer[i] = 0;\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }//for\n"
|
||||
" batchIdx ++;\n"
|
||||
" }//while\n"
|
||||
" }//if( lIdx == 0 )\n"
|
||||
" \n"
|
||||
" //return batchIdx;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
;
|
||||
Reference in New Issue
Block a user