Add the GPU rigid body pipeline from https://github.com/erwincoumans/experiments as a Bullet 3.x preview for Bullet 2.80

2012-03-05 00:54:32 +00:00
parent 73c4646b40
commit 571af41cf6
257 changed files with 55106 additions and 0 deletions
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.h
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.h
@@ -0,0 +1,77 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.  
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+
+#pragma once
+
+#include <Adl/Adl.h>
+#include <AdlPrimitives/Math/Math.h>
+
+namespace adl
+{
+
+class FillBase
+{
+	public:
+		enum Option
+		{
+
+		};
+};
+
+template<DeviceType TYPE>
+class Fill
+{
+	public:
+		typedef Launcher::BufferInfo BufferInfo;
+
+		struct ConstData
+		{
+			int4 m_data;
+			int m_offset;
+			int m_n;
+			int m_padding[2];
+		};
+
+		struct Data
+		{
+			const Device* m_device;
+			Kernel* m_fillIntKernel;
+			Kernel* m_fillInt2Kernel;
+			Kernel* m_fillInt4Kernel;
+			Buffer<ConstData>* m_constBuffer;
+		};
+
+		static
+		Data* allocate(const Device* deviceData);
+
+		static
+		void deallocate(Data* data);
+
+		static
+		void execute(Data* data, Buffer<int>& src, const int& value, int n, int offset = 0);
+
+		static
+		void execute(Data* data, Buffer<int2>& src, const int2& value, int n, int offset = 0);
+
+		static
+		void execute(Data* data, Buffer<int4>& src, const int4& value, int n, int offset = 0);
+
+};
+
+
+#include <AdlPrimitives/Fill/FillHost.inl>
+#include <AdlPrimitives/Fill/Fill.inl>
+
+};
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.inl
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/Fill.inl
@@ -0,0 +1,123 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.  
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+
+
+//#define PATH "..\\..\\AdlPrimitives\\Fill\\FillKernels"
+#define PATH "..\\..\\opencl\\primitives\\AdlPrimitives\\Fill\\FillKernels"
+#define KERNEL0 "FillIntKernel"
+#define KERNEL1 "FillInt2Kernel"
+#define KERNEL2 "FillInt4Kernel"
+
+#include <AdlPrimitives/Fill/FillKernelsCL.h>
+#include <AdlPrimitives/Fill/FillKernelsDX11.h>
+
+
+template<DeviceType TYPE>
+typename Fill<TYPE>::Data* Fill<TYPE>::allocate( const Device* device )
+{
+	ADLASSERT( TYPE == device->m_type );
+
+	const char* src[] = 
+#if defined(ADL_LOAD_KERNEL_FROM_STRING)
+		{fillKernelsCL, fillKernelsDX11};
+#else
+		{0,0};
+#endif
+
+	Data* data = new Data;
+	data->m_device = device;
+	data->m_fillIntKernel = device->getKernel( PATH, KERNEL0, 0, src[TYPE] );
+	data->m_fillInt2Kernel = device->getKernel( PATH, KERNEL1, 0, src[TYPE] );
+	data->m_fillInt4Kernel = device->getKernel( PATH, KERNEL2, 0, src[TYPE] );
+	data->m_constBuffer = new Buffer<ConstData>( device, 1, BufferBase::BUFFER_CONST );
+
+	return data;
+}
+
+template<DeviceType TYPE>
+void Fill<TYPE>::deallocate( Data* data )
+{
+	delete data->m_constBuffer;
+	delete data;
+}
+
+template<DeviceType TYPE>
+void Fill<TYPE>::execute(Data* data, Buffer<int>& src, const int& value, int n, int offset)
+{
+	ADLASSERT( n>0 );
+	ConstData constBuffer;
+	{
+		constBuffer.m_offset = offset;
+		constBuffer.m_n = n;
+		constBuffer.m_data = make_int4( value );
+	}
+
+	{
+		BufferInfo bInfo[] = { BufferInfo( &src ) };
+
+		Launcher launcher( data->m_device, data->m_fillIntKernel );
+		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
+		launcher.setConst( *data->m_constBuffer, constBuffer );
+		launcher.launch1D( n );
+	}
+}
+
+template<DeviceType TYPE>
+void Fill<TYPE>::execute(Data* data, Buffer<int2>& src, const int2& value, int n, int offset)
+{
+	ADLASSERT( n>0 );
+	ConstData constBuffer;
+	{
+		constBuffer.m_offset = offset;
+		constBuffer.m_n = n;
+		constBuffer.m_data = make_int4( value.x, value.y, 0, 0 );
+	}
+
+	{
+		BufferInfo bInfo[] = { BufferInfo( &src ) };
+
+		Launcher launcher( data->m_device, data->m_fillInt2Kernel );
+		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
+		launcher.setConst( *data->m_constBuffer, constBuffer );
+		launcher.launch1D( n );
+	}
+}
+
+template<DeviceType TYPE>
+void Fill<TYPE>::execute(Data* data, Buffer<int4>& src, const int4& value, int n, int offset)
+{
+	ADLASSERT( n>0 );
+	ConstData constBuffer;
+	{
+		constBuffer.m_offset = offset;
+		constBuffer.m_n = n;
+		constBuffer.m_data = value;
+	}
+
+	{
+		BufferInfo bInfo[] = { BufferInfo( &src ) };
+
+		Launcher launcher( data->m_device, data->m_fillInt4Kernel );
+		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
+		launcher.setConst( *data->m_constBuffer, constBuffer );
+		launcher.launch1D( n );
+	}
+}
+
+#undef PATH
+#undef KERNEL0
+#undef KERNEL1
+#undef KERNEL2
+
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillHost.inl
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillHost.inl
@@ -0,0 +1,99 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.  
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+
+template<>
+class Fill<TYPE_HOST>
+{
+	public:
+		struct Data
+		{
+		};
+
+		static
+		Data* allocate(const Device* deviceData)
+		{
+			return 0;
+		}
+
+		static
+		void deallocate(Data* data)
+		{
+
+		}
+
+		template<typename T>
+		static
+		void executeImpl(Data* data, Buffer<T>& src, const T& value, int n, int offset = 0)
+		{
+			ADLASSERT( src.getType() == TYPE_HOST );
+			ADLASSERT( src.m_size >= offset+n );
+			HostBuffer<T>& hSrc = (HostBuffer<T>&)src;
+
+			for(int idx=offset; idx<offset+n; idx++)
+			{
+				hSrc[idx] = value;
+			}
+		}
+
+		static
+		void execute(Data* data, Buffer<int>& src, const int& value, int n, int offset = 0)
+		{
+			executeImpl( data, src, value, n, offset );
+		}
+
+		static
+		void execute(Data* data, Buffer<int2>& src, const int2& value, int n, int offset = 0)
+		{
+			executeImpl( data, src, value, n, offset );
+		}
+
+		static
+		void execute(Data* data, Buffer<int4>& src, const int4& value, int n, int offset = 0)
+		{
+			executeImpl( data, src, value, n, offset );
+		}
+
+/*
+		static
+		void execute(Data* data, Buffer<int>& src, int value, int n, int offset = 0)
+		{
+			ADLASSERT( src.getType() == TYPE_HOST );
+			ADLASSERT( src.m_size <= offset+n );
+			HostBuffer<u32>& hSrc = (HostBuffer<u32>&)src;
+
+			for(int idx=offset; idx<offset+n; idx++)
+			{
+				src[i] = value;
+			}
+		}
+
+		static
+		void execute(Data* data, Buffer<int2>& src, const int2& value, int n, int offset = 0)
+		{
+			ADLASSERT( src.getType() == TYPE_HOST );
+			ADLASSERT( src.m_size <= offset+n );
+
+		}
+
+		static
+		void execute(Data* data, Buffer<int4>& src, const int4& value, int n, int offset = 0)
+		{
+			ADLASSERT( src.getType() == TYPE_HOST );
+			ADLASSERT( src.m_size <= offset+n );
+
+		}
+*/
+};
+
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.cl
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.cl
@@ -0,0 +1,81 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.  
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
+
+typedef unsigned int u32;
+#define GET_GROUP_IDX get_group_id(0)
+#define GET_LOCAL_IDX get_local_id(0)
+#define GET_GLOBAL_IDX get_global_id(0)
+#define GET_GROUP_SIZE get_local_size(0)
+#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
+#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
+#define AtomInc(x) atom_inc(&(x))
+#define AtomInc1(x, out) out = atom_inc(&(x))
+
+#define make_uint4 (uint4)
+#define make_uint2 (uint2)
+#define make_int2 (int2)
+
+typedef struct
+{
+	int4 m_data;
+	int m_offset;
+	int m_n;
+	int m_padding[2];
+} ConstBuffer;
+
+
+__kernel
+__attribute__((reqd_work_group_size(64,1,1)))
+void FillIntKernel(__global int* dstInt, 
+					ConstBuffer cb)
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < cb.m_n )
+	{
+		dstInt[ cb.m_offset+gIdx ] = cb.m_data.x;
+	}
+}
+
+__kernel
+__attribute__((reqd_work_group_size(64,1,1)))
+void FillInt2Kernel(__global int2* dstInt2, 
+					ConstBuffer cb)
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < cb.m_n )
+	{
+		dstInt2[ cb.m_offset+gIdx ] = make_int2( cb.m_data.x, cb.m_data.y );
+	}
+}
+
+__kernel
+__attribute__((reqd_work_group_size(64,1,1)))
+void FillInt4Kernel(__global int4* dstInt4, 
+					ConstBuffer cb)
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < cb.m_n )
+	{
+		dstInt4[ cb.m_offset+gIdx ] = cb.m_data;
+	}
+}
+
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.hlsl
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernels.hlsl
@@ -0,0 +1,79 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.  
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+
+
+typedef uint u32;
+
+#define GET_GROUP_IDX groupIdx.x
+#define GET_LOCAL_IDX localIdx.x
+#define GET_GLOBAL_IDX globalIdx.x
+#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()
+#define GROUP_MEM_FENCE
+#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID
+#define AtomInc(x) InterlockedAdd(x, 1)
+#define AtomInc1(x, out) InterlockedAdd(x, 1, out)
+
+#define make_uint4 uint4
+#define make_uint2 uint2
+#define make_int2 int2
+
+
+cbuffer CB : register( b0 )
+{
+	int4 m_data;
+	int m_offset;
+	int m_n;
+	int m_padding[2];
+};
+
+
+RWStructuredBuffer<int> dstInt : register( u0 );
+
+[numthreads(64, 1, 1)]
+void FillIntKernel( DEFAULT_ARGS )
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < m_n )
+	{
+		dstInt[ m_offset+gIdx ] = m_data.x;
+	}
+}
+
+RWStructuredBuffer<int2> dstInt2 : register( u0 );
+
+[numthreads(64, 1, 1)]
+void FillInt2Kernel( DEFAULT_ARGS )
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < m_n )
+	{
+		dstInt2[ m_offset+gIdx ] = make_int2( m_data.x, m_data.y );
+	}
+}
+
+RWStructuredBuffer<int4> dstInt4 : register( u0 );
+
+[numthreads(64, 1, 1)]
+void FillInt4Kernel( DEFAULT_ARGS )
+{
+	int gIdx = GET_GLOBAL_IDX;
+
+	if( gIdx < m_n )
+	{
+		dstInt4[ m_offset+gIdx ] = m_data;
+	}
+}
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsCL.h
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsCL.h
@@ -0,0 +1,71 @@
+static const char* fillKernelsCL= \
+"/*\n"
+"		2011 Takahiro Harada\n"
+"*/\n"
+"\n"
+"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
+"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
+"\n"
+"typedef unsigned int u32;\n"
+"#define GET_GROUP_IDX get_group_id(0)\n"
+"#define GET_LOCAL_IDX get_local_id(0)\n"
+"#define GET_GLOBAL_IDX get_global_id(0)\n"
+"#define GET_GROUP_SIZE get_local_size(0)\n"
+"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n"
+"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n"
+"#define AtomInc(x) atom_inc(&(x))\n"
+"#define AtomInc1(x, out) out = atom_inc(&(x))\n"
+"\n"
+"#define make_uint4 (uint4)\n"
+"#define make_uint2 (uint2)\n"
+"#define make_int2 (int2)\n"
+"\n"
+"typedef struct\n"
+"{\n"
+"	int4 m_data;\n"
+"	int m_offset;\n"
+"	int m_n;\n"
+"	int m_padding[2];\n"
+"} ConstBuffer;\n"
+"\n"
+"\n"
+"__kernel\n"
+"__attribute__((reqd_work_group_size(64,1,1)))\n"
+"void FillIntKernel(__global int* dstInt, \n"
+"					ConstBuffer cb)\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < cb.m_n )\n"
+"	{\n"
+"		dstInt[ cb.m_offset+gIdx ] = cb.m_data.x;\n"
+"	}\n"
+"}\n"
+"\n"
+"__kernel\n"
+"__attribute__((reqd_work_group_size(64,1,1)))\n"
+"void FillInt2Kernel(__global int2* dstInt2, \n"
+"					ConstBuffer cb)\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < cb.m_n )\n"
+"	{\n"
+"		dstInt2[ cb.m_offset+gIdx ] = make_int2( cb.m_data.x, cb.m_data.y );\n"
+"	}\n"
+"}\n"
+"\n"
+"__kernel\n"
+"__attribute__((reqd_work_group_size(64,1,1)))\n"
+"void FillInt4Kernel(__global int4* dstInt4, \n"
+"					ConstBuffer cb)\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < cb.m_n )\n"
+"	{\n"
+"		dstInt4[ cb.m_offset+gIdx ] = cb.m_data;\n"
+"	}\n"
+"}\n"
+"\n"
+;
--- a/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsDX11.h
+++ b/Extras/RigidBodyGpuPipeline/opencl/primitives/AdlPrimitives/Fill/FillKernelsDX11.h
@@ -0,0 +1,69 @@
+static const char* fillKernelsDX11= \
+"/*\n"
+"		2011 Takahiro Harada\n"
+"*/\n"
+"\n"
+"typedef uint u32;\n"
+"\n"
+"#define GET_GROUP_IDX groupIdx.x\n"
+"#define GET_LOCAL_IDX localIdx.x\n"
+"#define GET_GLOBAL_IDX globalIdx.x\n"
+"#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n"
+"#define GROUP_MEM_FENCE\n"
+"#define DEFAULT_ARGS uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID\n"
+"#define AtomInc(x) InterlockedAdd(x, 1)\n"
+"#define AtomInc1(x, out) InterlockedAdd(x, 1, out)\n"
+"\n"
+"#define make_uint4 uint4\n"
+"#define make_uint2 uint2\n"
+"#define make_int2 int2\n"
+"\n"
+"\n"
+"cbuffer CB : register( b0 )\n"
+"{\n"
+"	int4 m_data;\n"
+"	int m_offset;\n"
+"	int m_n;\n"
+"	int m_padding[2];\n"
+"};\n"
+"\n"
+"\n"
+"RWStructuredBuffer<int> dstInt : register( u0 );\n"
+"\n"
+"[numthreads(64, 1, 1)]\n"
+"void FillIntKernel( DEFAULT_ARGS )\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < m_n )\n"
+"	{\n"
+"		dstInt[ m_offset+gIdx ] = m_data.x;\n"
+"	}\n"
+"}\n"
+"\n"
+"RWStructuredBuffer<int2> dstInt2 : register( u0 );\n"
+"\n"
+"[numthreads(64, 1, 1)]\n"
+"void FillInt2Kernel( DEFAULT_ARGS )\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < m_n )\n"
+"	{\n"
+"		dstInt2[ m_offset+gIdx ] = make_int2( m_data.x, m_data.y );\n"
+"	}\n"
+"}\n"
+"\n"
+"RWStructuredBuffer<int4> dstInt4 : register( u0 );\n"
+"\n"
+"[numthreads(64, 1, 1)]\n"
+"void FillInt4Kernel( DEFAULT_ARGS )\n"
+"{\n"
+"	int gIdx = GET_GLOBAL_IDX;\n"
+"\n"
+"	if( gIdx < m_n )\n"
+"	{\n"
+"		dstInt4[ m_offset+gIdx ] = m_data;\n"
+"	}\n"
+"}\n"
+;